Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions examples/config_tiny_llama.yaml
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
checkpoints:
checkpoint_interval: 10
checkpoints_path: /fsx/thomwolf/github/nanotron/checkpoints
checkpoints_path: /fsx/nouamane/projects/nanotron/checkpoints
checkpoints_path_is_shared_file_system: false
resume_checkpoint_path: null
save_initial_state: false
Expand All @@ -19,9 +19,10 @@ general:
consumed_train_samples: null
ignore_sanity_checks: false
project: debug
run: tiny_llama
run: tiny_llama_%date_%jobid
seed: 42
step: null
lighteval: null
logging:
iteration_step_info_interval: 1
log_level: info
Expand Down Expand Up @@ -59,6 +60,7 @@ optimizer:
clip_grad: 1.0
learning_rate_scheduler:
learning_rate: 0.0003
lr_decay_starting_step: null
lr_decay_steps: 8
lr_decay_style: cosine
lr_warmup_steps: 2
Expand Down
86 changes: 0 additions & 86 deletions examples/debug_run_train.yaml

This file was deleted.

22 changes: 11 additions & 11 deletions src/nanotron/config/config.py
Original file line number Diff line number Diff line change
Expand Up @@ -288,17 +288,17 @@ def __post_init__(self):
class Config:
"""Main configuration class"""

general: Optional[GeneralArgs]
checkpoints: Optional[CheckpointsArgs]
parallelism: Optional[ParallelismArgs]
model: Optional[ModelArgs]
tokenizer: Optional[TokenizerArgs]
logging: Optional[LoggingArgs]
tokens: Optional[TokensArgs]
optimizer: Optional[OptimizerArgs]
data: Optional[DataArgs]
profiler: Optional[ProfilerArgs]
lighteval: Optional[LightEvalConfig]
general: GeneralArgs
parallelism: ParallelismArgs
model: ModelArgs
tokenizer: TokenizerArgs
checkpoints: Optional[CheckpointsArgs] = None
logging: Optional[LoggingArgs] = None
tokens: Optional[TokensArgs] = None
optimizer: Optional[OptimizerArgs] = None
data: Optional[DataArgs] = None
profiler: Optional[ProfilerArgs] = None
lighteval: Optional[LightEvalConfig] = None

@classmethod
def create_empty(cls):
Expand Down
2 changes: 1 addition & 1 deletion src/nanotron/serialize/optimizer.py
Original file line number Diff line number Diff line change
Expand Up @@ -17,9 +17,9 @@
)
from nanotron.parallel import ParallelContext
from nanotron.parallel.parameters import NanotronParameter
from nanotron.sanity_checks import check_optim_state_in_sync
from nanotron.serialize.metadata import TensorMetadata
from nanotron.serialize.utils import ObjectType, merge_and_shard_tp_tensors
from nanotron.sanity_checks import check_optim_state_in_sync


# TODO(xrsrke): take rank instead of parallel_context
Expand Down
2 changes: 0 additions & 2 deletions src/nanotron/serialize/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,11 +4,9 @@
from typing import List, Optional, Tuple

import torch
import torch.distributed as dist

from nanotron.parallel import ParallelContext
from nanotron.parallel.parameters import SlicesPair
from nanotron.sanity_checks import assert_tensor_synced_across_pg
from nanotron.serialize.metadata import TensorMetadata


Expand Down