Loading...
Loading...
Master fine-tuning of large language models for specific domains and tasks. Covers data preparation, training techniques, optimization strategies, and evaluation methods. Use when adapting models for specialized applications, reducing inference costs, or improving domain-specific performance.
npx skill4agent add qodex-ai/ai-agent-skills llm-fine-tuning-guidepython examples/full_fine_tuning.pypython examples/lora_fine_tuning.pypython examples/qlora_fine_tuning.pypython scripts/data_preparation.pyfrom transformers import AutoModelForCausalLM, AutoTokenizer, TrainingArguments, Trainer
model_id = "meta-llama/Llama-2-7b"
model = AutoModelForCausalLM.from_pretrained(model_id)
tokenizer = AutoTokenizer.from_pretrained(model_id)
training_args = TrainingArguments(
output_dir="./fine-tuned-llama",
num_train_epochs=3,
per_device_train_batch_size=4,
gradient_accumulation_steps=4,
learning_rate=2e-5,
weight_decay=0.01,
logging_steps=10,
save_steps=100,
eval_strategy="steps",
eval_steps=50,
load_best_model_at_end=True,
)
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
)
trainer.train()from peft import get_peft_model, LoraConfig, TaskType
from transformers import AutoModelForCausalLM, AutoTokenizer
base_model_id = "meta-llama/Llama-2-7b"
model = AutoModelForCausalLM.from_pretrained(base_model_id)
tokenizer = AutoTokenizer.from_pretrained(base_model_id)
# Configure LoRA
lora_config = LoraConfig(
r=8, # Rank of low-rank matrices
lora_alpha=16, # Scaling factor
target_modules=["q_proj", "v_proj"], # Which layers to adapt
lora_dropout=0.05,
bias="none",
task_type=TaskType.CAUSAL_LM
)
# Wrap model with LoRA
model = get_peft_model(model, lora_config)
model.print_trainable_parameters()
# Output: trainable params: 4,194,304 || all params: 6,738,415,616 || trainable%: 0.06
# Train as normal
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
)
trainer.train()
# Save only LoRA weights
model.save_pretrained("./llama-lora-adapter")from peft import prepare_model_for_kbit_training, get_peft_model, LoraConfig
from transformers import AutoModelForCausalLM, BitsAndBytesConfig
# Quantization config
bnb_config = BitsAndBytesConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype="float16",
bnb_4bit_use_double_quant=True
)
# Load quantized model
model = AutoModelForCausalLM.from_pretrained(
"meta-llama/Llama-2-7b",
quantization_config=bnb_config,
device_map="auto"
)
# Prepare for training
model = prepare_model_for_kbit_training(model)
# Apply LoRA
lora_config = LoraConfig(
r=8,
lora_alpha=32,
target_modules=["q_proj", "v_proj", "k_proj", "o_proj"],
lora_dropout=0.05,
bias="none",
task_type=TaskType.CAUSAL_LM
)
model = get_peft_model(model, lora_config)
# Train on single GPU
trainer = Trainer(
model=model,
args=TrainingArguments(
output_dir="./qlora-output",
per_device_train_batch_size=1,
gradient_accumulation_steps=4,
learning_rate=5e-4,
num_train_epochs=3,
),
train_dataset=train_dataset,
)
trainer.train()from peft import get_peft_model, PrefixTuningConfig
config = PrefixTuningConfig(
num_virtual_tokens=20,
task_type=TaskType.CAUSAL_LM,
)
model = get_peft_model(model, config)
# Only 20 * embedding_dim parameters trained# Training data format
training_data = [
{
"instruction": "Translate to French",
"input": "Hello, how are you?",
"output": "Bonjour, comment allez-vous?"
},
{
"instruction": "Summarize this text",
"input": "Long document...",
"output": "Summary..."
}
]
# Template for training
template = """Below is an instruction that describes a task, paired with an input that provides further context.
### Instruction:
{instruction}
### Input:
{input}
### Response:
{output}"""
# Create formatted dataset
formatted_data = [
template.format(**example) for example in training_data
]legal_training_data = [
{
"prompt": "What are the key clauses in an NDA?",
"completion": """Key clauses typically include:
1. Definition of Confidential Information
2. Non-Disclosure Obligations
3. Permitted Disclosures
4. Term and Termination
5. Return of Information
6. Remedies"""
},
# ... more legal examples
]
# Train on legal domain
model = fine_tune_on_domain(
base_model="gpt-3.5-turbo",
training_data=legal_training_data,
epochs=3,
learning_rate=0.0002,
)class DatasetValidator:
def validate_dataset(self, data):
issues = {
"empty_samples": 0,
"duplicates": 0,
"outliers": 0,
"imbalance": {}
}
# Check for empty samples
for sample in data:
if not sample.get("text"):
issues["empty_samples"] += 1
# Check for duplicates
texts = [s.get("text") for s in data]
issues["duplicates"] = len(texts) - len(set(texts))
# Check for length outliers
lengths = [len(t.split()) for t in texts]
mean_length = sum(lengths) / len(lengths)
issues["outliers"] = sum(1 for l in lengths if l > mean_length * 3)
return issues
# Validate before training
validator = DatasetValidator()
issues = validator.validate_dataset(training_data)
print(f"Dataset Issues: {issues}")from nlpaug.augmenter.word import SynonymAug, RandomWordAug
import nlpaug.flow as naf
# Create augmentation pipeline
text = "The quick brown fox jumps over the lazy dog"
# Synonym replacement
aug_syn = SynonymAug(aug_p=0.3)
augmented_syn = aug_syn.augment(text)
# Random word insertion
aug_insert = RandomWordAug(action="insert", aug_p=0.3)
augmented_insert = aug_insert.augment(text)
# Combine augmentations
flow = naf.Sequential([
SynonymAug(aug_p=0.2),
RandomWordAug(action="swap", aug_p=0.2)
])
augmented = flow.augment(text)from sklearn.model_selection import train_test_split
# Create splits
train_data, eval_data = train_test_split(
data,
test_size=0.2,
random_state=42
)
eval_data, test_data = train_test_split(
eval_data,
test_size=0.5,
random_state=42
)
print(f"Train: {len(train_data)}, Eval: {len(eval_data)}, Test: {len(test_data)}")from torch.optim.lr_scheduler import CosineAnnealingLR, LinearLR
# Linear warmup + cosine annealing
def get_scheduler(optimizer, num_steps):
lr_scheduler = get_linear_schedule_with_warmup(
optimizer,
num_warmup_steps=500,
num_training_steps=num_steps
)
return lr_scheduler
training_args = TrainingArguments(
learning_rate=1e-4,
lr_scheduler_type="cosine",
warmup_steps=500,
warmup_ratio=0.1,
)training_args = TrainingArguments(
gradient_accumulation_steps=4, # Accumulate gradients over 4 steps
per_device_train_batch_size=1, # Effective batch size: 1 * 4 = 4
)
# Simulates larger batch on limited GPU memorytraining_args = TrainingArguments(
fp16=True, # Use 16-bit floats
bf16=False,
)
# Reduces memory usage by 50%, speeds up trainingtraining_args = TrainingArguments(
output_dir="./results",
num_train_epochs=3,
per_device_train_batch_size=8,
per_device_eval_batch_size=8,
gradient_accumulation_steps=4,
dataloader_pin_memory=True,
dataloader_num_workers=4,
)
# Automatically uses all available GPUs
trainer = Trainer(
model=model,
args=training_args,
train_dataset=train_dataset,
eval_dataset=eval_dataset,
)from transformers import AutoModelForCausalLM, AutoTokenizer
model = AutoModelForCausalLM.from_pretrained("meta-llama/Llama-3.2-7b")
tokenizer = AutoTokenizer.from_pretrained("meta-llama/Llama-3.2-7b")
# Fine-tune on custom data
# ... training codemodel = AutoModelForCausalLM.from_pretrained("google/gemma-3-2b")
tokenizer = AutoTokenizer.from_pretrained("google/gemma-3-2b")
# Gemma 3 sizes: 2B, 7B, 27B
# Very efficient, great for fine-tuningmodel = AutoModelForCausalLM.from_pretrained("mistralai/Mistral-7B-v0.1")
tokenizer = AutoTokenizer.from_pretrained("mistralai/Mistral-7B-v0.1")
# Strong performance, efficient architectureimport openai
# Prepare training data
training_file = openai.File.create(
file=open("training_data.jsonl", "rb"),
purpose="fine-tune"
)
# Create fine-tuning job
fine_tune_job = openai.FineTuningJob.create(
training_file=training_file.id,
model="gpt-3.5-turbo",
hyperparameters={
"n_epochs": 3,
"learning_rate_multiplier": 0.1,
}
)
# Wait for completion
fine_tuned_model = openai.FineTuningJob.retrieve(fine_tune_job.id)
print(f"Status: {fine_tuned_model.status}")
# Use fine-tuned model
response = openai.ChatCompletion.create(
model=fine_tuned_model.fine_tuned_model,
messages=[{"role": "user", "content": "Hello"}]
)import torch
from math import exp
def calculate_perplexity(model, eval_dataset):
model.eval()
total_loss = 0
total_tokens = 0
with torch.no_grad():
for batch in eval_dataset:
outputs = model(**batch)
loss = outputs.loss
total_loss += loss.item() * batch["input_ids"].shape[0]
total_tokens += batch["input_ids"].shape[0]
perplexity = exp(total_loss / total_tokens)
return perplexity
perplexity = calculate_perplexity(model, eval_dataset)
print(f"Perplexity: {perplexity:.2f}")from sklearn.metrics import accuracy_score, f1_score, precision_score, recall_score
def evaluate_task(predictions, ground_truth):
return {
"accuracy": accuracy_score(ground_truth, predictions),
"precision": precision_score(ground_truth, predictions, average='weighted'),
"recall": recall_score(ground_truth, predictions, average='weighted'),
"f1": f1_score(ground_truth, predictions, average='weighted'),
}
# Evaluate on task
predictions = [model.predict(x) for x in test_data]
metrics = evaluate_task(predictions, test_labels)
print(f"Metrics: {metrics}")class HumanEvaluator:
def evaluate_response(self, prompt, response):
criteria = {
"relevance": self._score_relevance(prompt, response),
"coherence": self._score_coherence(response),
"factuality": self._score_factuality(response),
"helpfulness": self._score_helpfulness(response),
}
return sum(criteria.values()) / len(criteria)
def _score_relevance(self, prompt, response):
# Score 1-5
pass
def _score_coherence(self, response):
# Score 1-5
pass# Conservative training settings
training_args = TrainingArguments(
learning_rate=2e-5, # Lower learning rate
num_train_epochs=2, # Few epochs
weight_decay=0.01, # L2 regularization
warmup_steps=500,
save_total_limit=3,
load_best_model_at_end=True,
)training_args = TrainingArguments(
eval_strategy="steps",
eval_steps=50,
load_best_model_at_end=True,
early_stopping_patience=3,
metric_for_best_model="eval_loss",
)# Use LoRA when data is limited
lora_config = LoraConfig(
r=8,
lora_alpha=16,
target_modules=["q_proj", "v_proj"],
lora_dropout=0.05,
)