Loading...
Loading...
Run Megatron-LM (MLM) and Megatron Bridge training with mock or real data. Covers correlation testing, available recipes, and multi-GPU examples.
npx skill4agent add nvidia/skills mlm-bridge-trainingvanilla_gpt_pretrain_configGPTModelProvidervocab_sizepretrain_gpt.pyPYTHONPATH=3rdparty/Megatron-LM:$PYTHONPATH \
uv run python -m torch.distributed.run --nproc_per_node=1 \
3rdparty/Megatron-LM/pretrain_gpt.py \
--num-layers 2 --hidden-size 256 --num-attention-heads 4 \
--ffn-hidden-size 1024 --seq-length 512 --max-position-embeddings 512 \
--micro-batch-size 4 --global-batch-size 32 \
--train-iters 10 --eval-iters 2 --eval-interval 10 \
--mock-data --bf16 --use-mcore-models \
--tokenizer-type NullTokenizer --vocab-size 32000 \
--lr 3e-4 --min-lr 3e-5 --seed 1234 --log-interval 1rm -rf nemo_experiments && \
uv run python -m torch.distributed.run --nproc_per_node=1 \
scripts/training/run_recipe.py \
--recipe vanilla_gpt_pretrain_config \
model.num_layers=2 model.hidden_size=256 \
model.num_attention_heads=4 model.ffn_hidden_size=1024 \
model.seq_length=512 dataset.sequence_length=512 \
train.train_iters=10 train.global_batch_size=32 train.micro_batch_size=4 \
validation.eval_interval=10 validation.eval_iters=2 \
optimizer.lr=3e-4 optimizer.min_lr=3e-5 \
scheduler.lr_warmup_iters=1 scheduler.lr_decay_iters=10 \
rng.seed=1234 logger.log_interval=1lm lossPYTHONPATH=3rdparty/Megatron-LM:$PYTHONPATH \
uv run python -m torch.distributed.run --nproc_per_node=2 \
3rdparty/Megatron-LM/pretrain_gpt.py \
--tensor-model-parallel-size 2 --sequence-parallel \
--num-layers 4 --hidden-size 256 --num-attention-heads 4 \
--seq-length 1024 --max-position-embeddings 1024 \
--micro-batch-size 2 --global-batch-size 16 \
--train-iters 10 --eval-iters 2 --eval-interval 10 \
--mock-data --bf16 --use-mcore-models \
--tokenizer-type NullTokenizer --vocab-size 1024 \
--lr 1e-4 --log-interval 1rm -rf nemo_experiments && \
uv run python -m torch.distributed.run --nproc_per_node=2 \
scripts/training/run_recipe.py \
--recipe vanilla_gpt_pretrain_config \
model.tensor_model_parallel_size=2 model.sequence_parallel=true \
model.num_layers=4 model.hidden_size=256 \
model.num_attention_heads=4 model.ffn_hidden_size=1024 \
model.seq_length=1024 dataset.sequence_length=1024 \
train.train_iters=10 train.global_batch_size=16 train.micro_batch_size=2 \
validation.eval_interval=10 validation.eval_iters=2 \
scheduler.lr_warmup_iters=2 scheduler.lr_decay_iters=10 \
logger.log_interval=1--recipevanilla_gpt_pretrain_configllama32_1b_pretrain_configllama3_8b_pretrain_configqwen3_8b_pretrain_configdeepseek_v2_lite_pretrain_config_sft_config_peft_config./scripts/switch_mcore.sh status./scripts/switch_mcore.sh dev
# uv sync (without --locked) since lockfile is for main
uv sync./scripts/switch_mcore.sh maingit submodule update --init 3rdparty/Megatron-LMrm -rf nemo_experimentsuv runuv run python -m torch.distributed.runtorchrunpython3rdparty/Megatron-LMgpt_builders.pytrain.train_itersscheduler.lr_warmup_itersscheduler.lr_decay_itersdataset.sequence_lengthdataset.seq_lengthuv sync --lockeduv sync--locked