Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
10 changes: 6 additions & 4 deletions projects/Llama/configs/llama_config.py
Original file line number Diff line number Diff line change
@@ -1,7 +1,9 @@
from omegaconf import DictConfig, OmegaConf

from libai.config import LazyCall
from projects.Llama.llama import LlamaForCausalLM
from projects.Llama.llama_gpt import LlamaForCausalLM

# from projects.Llama.llama import LlamaForCausalLM
from projects.Llama.tokenizer import LlamaTokenizer
from configs.common.train import train

Expand All @@ -21,7 +23,7 @@
tie_word_embeddings=False,
vocab_size=32000,
use_scaled_init_for_output_weights=False,
scale_mask_softmax_fusion=False,
scale_mask_softmax_fusion=True,
amp_enabled=True,
# Inference
is_encoder_decoder=False,
Expand All @@ -48,7 +50,7 @@
eos_token_id=2,
pad_token_id=0,
# train
pretrained_model_path="meta-llama/Llama-2-7b-hf",
pretrained_model_path="Llama-2-7b-hf",
)

cfg = DictConfig(cfg)
Expand All @@ -57,5 +59,5 @@
tokenization = OmegaConf.create()
tokenization.make_vocab_size_divisible_by = 1
tokenization.tokenizer = LazyCall(LlamaTokenizer)(
pretrained_model_path="meta-llama/Llama-2-7b-hf/tokenizer.model"
pretrained_model_path="Llama-2-7b-hf/tokenizer.model"
)
10 changes: 5 additions & 5 deletions projects/Llama/configs/llama_sft.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,14 +13,14 @@
from projects.Llama.configs.llama_config import cfg
from projects.Llama.dataset import AlpacaDataset
from projects.Llama.tokenizer import LlamaTokenizer
from projects.Llama.llama import LlamaForCausalLM
from projects.Llama.llama_gpt import LlamaForCausalLM


# Hyperparameters
weight_decay = 0.1
learning_rate = 5e-5
dataset_path = "alpaca_data"
pretrained_model_path = "meta-llama/Llama-2-7b-hf"
dataset_path = "/home/lixin/Data/alpaca"
pretrained_model_path = "/data/hf_models/Llama-2-7b-hf"

# graph & optim
graph["enabled"] = False
Expand Down Expand Up @@ -68,7 +68,7 @@
train_iter=1,
log_period=10,
warmup_ratio=1 / 3,
num_accumulation_steps=8,
num_accumulation_steps=1,
rdma_enabled=False,
amp=dict(enabled=True),
activation_checkpoint=dict(enabled=True),
Expand All @@ -79,7 +79,7 @@
dist=dict(
data_parallel_size=1,
tensor_parallel_size=1,
pipeline_parallel_size=8,
pipeline_parallel_size=4,
pipeline_num_layers=cfg.hidden_layers,
),
evaluation=dict(
Expand Down
1 change: 1 addition & 0 deletions projects/Llama/images/LLamaLayer.drawio
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
<mxfile host="Electron" modified="2024-01-22T04:36:07.699Z" agent="5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) draw.io/19.0.0 Chrome/100.0.4896.160 Electron/18.3.2 Safari/537.36" etag="yhvSpiKs_DXS7XiJI6U_" version="19.0.0" type="device"><diagram id="acZnmB1dkhpwk139XDq4" name="第 1 页">7V1bc5s4FP41nn2qxwIE+LG5tH1IZzubndntvmTUoNg0YHmx3MT76xdiCWMkYtEgkHDHk6mRAZvznfOdi3TUiXuZPn/M0Hr5mUQ4mTiz6HniXk0cB8zcMP+nGNntR3xvvh9YZHHETjoM3Mb/YX4lG93GEd4cnUgJSWi8Ph68J6sVvqdHYyjLyNPxaQ8kOf7WNVpgYeD2HiXi6F9xRJf70dAJDuOfcLxY8m8GPnu+FPGT2ZNsligiT5Uh93riXmaE0P279PkSJ4XwuFz2131o+LT8YRleUZUL/nl8ysL1jQcpvfwdzf6Y/Y3cdz67zQ+UbNkTs19Ld1wEGdmuIlzcBUzci6dlTPHtGt0Xnz7loOdjS5om7OMIbZbluRuakcdSbvkTX4g/mv8AnFH8XBliD/ERkxTTbJefwj+FTKA7rilwf/x0wCeYsXOWFWxcPoiYTizKex/Elr9hkmsjxdNCXORSXKs/fanQ6Bu/w+xVqfg1oYQzQSi+RCaONpl4gkyWcRTh1cTxk/zLL75l+bsFfdESRHMTb1a72Wm1EyX7KlDKyiaqFuxVilDULHiRP+3ln/nf1QReCWLLH4wey2ZvhZckIVk+siKr/MyLhzhJakMoiRer/PA+lyHOxy8KMcU5Fb5nH6Q5gMXXSME4huuBrOgt+1ESpnojOFzJnWDqBJVXqKTz0JtCr/KCnibwguFpofQ+XGSuGi8EuhQ6FGTy7xaz++q2/qBBqm+0fm2ymjcZ/ydOAL+NiwHUAWLq7NVNWUm5wXw6m1dfriYAeRBrEgOUx0MxABBDzkfcDwGUeNjCAMA5NwpoAZElHCAGwoNzgAuG5gAxrt2/7YUFvAa5GssCkvxy5CygDpElLBAI6OBogblcSEaXZEFWKLk+jNbkeDjnhpA1A/Q7pnTHqmZoS8kx3LmQs93f7PqXg6/FwRTyw6vn6odXO3bUSEUbss3u8SvPyXCjKFvg1wBmaUAhg1fRzXCCaPzjuBgnQ+fl0vdZhnaVE9YkXtFN5c5fioGDXYN5LTZiavNB8Xzgw5pm7H/BQU/KR3mD6oQ2qQ5+jmlxGZjOIDvcX+gGITs+XFkcVC/8grM4F1fBOD3pIQ9Gu1PEt9HE3Case4AHjoInXK8HnnDEZDOJVxhlhsYF6vFu81SAV8NFsbrl6orUJLH9KO03VLRfR4/9Cgbn1ac/mGMub7H/peyqGshd2J4r2F5G8u/c3a3J5g6n33J4R2uFrlsTvje0FRqQcnv1WcqhC++OmHL3V3l3bEu5nbNLuVtAZEfK7Rgw/VanxsGL7444/9ZX8d2xbfrNObv5txYQ2cEBLhDQGWU8zssYpwPyzgtvb8PHMY+jB58cccVkor/JkRIRW1jalQT7B5a+HSFLt4DIEpaGZ8LSUJGlOQOYwtJiMrRGG3onixzNMCt1d9HsFkL/yC0MXlt0xXzmBQW5bxgNDk7gHbtnxRRKHw5iCtV7zFKvLg2eV7pirtRXXlniYUvEwsujZxSxqENkR8TinUle6ahO9HD7NyRi8SR55TWczGeT8HJy7U0uwkn43lAD68RnQrNiF09MaXv3mbA+I6PYIqXPD4iZK6J01YvTLAGxxmk2dkLxYuztuHxmC4Qs8Zn+ID6Tr0ebVBej8aVpDUvR+vezqpUBzhmm+Nlh1iZagSkPiWzDFErSk759tW+ar4Zi01JvvroExBZfDSXR96h9dQuE7PDV0P3F66fo+iSvc84whdcNWA8D6w0UkrrlXKL4UBtVicXce7TdoOLCFG0e+yH41itj3sGpyBgh7FNwjYtjwEg5vjVI4ZGqu2AagsrLUWN8d+q71VeoCU/+5WOvaELViqZh7O0bUEELvJ+LyvVtj+OLJTSypest7YW3fU1FNI3yOllFG9vUUwuMmnYVUovNhV2FdEEoRiwb8kBT9CzurJUimm4TQ9FUJ6rmMkGt7cfx1XaA0jbPwTf/+5U3NXP1Sc8bOF17XnZprWksrHd1ht7Un0MfOiBw58Ct3VBzCxmvxlRd2d06I9/Ha75BOD8GAPrDmm+gsBum7iArDH4yyALapCKW83oMsoLWajV0kBVIQvXRbtrYAp6yE7Ke06rFV/K9HrtHzxvEhfefCKu2jPidT0+9XNp2D4agvo8deH0PBuF8xqNa92AIhlnJbkX8xxe126pwtU08DFE4cWn+521C4yVG0XtKc5BiIs5Smuhu2PFex4HfTYQJasGUL1ZdQSApm+uLMMVJGBRFhgLUAQD+rM7aA69EDBRW7xu34f3xak5PFjHNpVqsTYqS2Z++E6Va7QcCtThSX9wfihm8AVvel1BZkz+Fkix8vPmTOjyW7HoPJL1ANzcoRTdol0vYBug0xSI1GpesfQBAQuPaUl3gGVDvqu9NPDyPA0n3iAFEfkDLGiYHsr6P0VJ5G4Bs4XLPqmqYMmuf3uCQSf30DqWcLAxZJwIkjTLJ3vHOViRLTbY3hpymtAmGEuPqM/cHngErMN0wMM7diim5Ee5W035lOiXZuChzlO629W5lxrtbPikz3vmD7lx06XkVfLRZu54BSYNMyqv2xRfKOosMMtRu/bRTW4YD56JHklU3HW00CocJepkVHizva+WTU1Y4dWDVEEELM3wVx1GboTh5Zu5kjA7L44s3mOUFkliwZ8szIEKGvPnNnAhZ0qVkQoTcviVm8Ai5uW1pjBFyC4BsiZD9YSLkDqLdt7pZ1calki1McbN80cXZV6Q8P5iaVpMyoq/MvCkgSWOZCR7Xvo4z0NxyNkaP20G7mXEe16r/urJDj6vcKgy6X7H6RsjEfCFN1ibbWbeO1q81KhvgZgdtDJxNg5+qKQ1WUrLW8iRdfGdVUvJ8UAtxhy8qmdjVZ0CIK2nrMyHEta/fD5xVw18bgGwJccffttWhc1bu4wfdN/I3wJwfZoTQamtVru7LzyTCxRn/Aw==</diagram></mxfile>
4 changes: 4 additions & 0 deletions projects/Llama/images/LLamaLayer.svg
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading