组件
Twinkle 提供模块化的组件生态,各组件可独立使用,也可自由组合。
Dataset
数据加载与预处理,支持 ModelScope 和 HuggingFace 数据集。
from twinkle.dataset import Dataset, DatasetMeta
# 从 ModelScope 加载
dataset = Dataset(dataset_meta=DatasetMeta(
'ms://swift/self-cognition',
data_slice=range(1000)
))
# 从 HuggingFace 加载
dataset = Dataset(dataset_meta=DatasetMeta('hf://dataset-name'))
# 设置编码模板
dataset.set_template('Qwen3_5Template', model_id='ms://Qwen/Qwen3.5-4B')
# 应用预处理
dataset.map(SelfCognitionProcessor('Model Name', 'Author'))
# 编码
dataset.encode()
PackingDataset
Bin-packing 数据打包,最大化 GPU 利用率:
from twinkle.dataset import PackingDataset
dataset = PackingDataset(dataset_meta)
dataset.pack_dataset()
DataLoader
支持 device mesh 感知的分布式数据加载:
from twinkle.dataloader import DataLoader
dataloader = DataLoader(
dataset=dataset,
batch_size=8,
min_batch_size=8,
device_mesh=device_mesh,
remote_group='model'
)
for batch in dataloader:
model.forward_backward(inputs=batch)
Model
支持多框架的大模型封装:
TransformersModel
from twinkle.model import TransformersModel
model = TransformersModel(
model_id='ms://Qwen/Qwen3.5-4B',
remote_group='default',
device_mesh=device_mesh
)
MegatronModel
from twinkle.model.megatron import MegatronModel
model = MegatronModel(
model_id='ms://Qwen/Qwen3.5-4B',
device_mesh=model_mesh,
remote_group='model',
mixed_precision='bf16'
)
添加 LoRA 适配器
from peft import LoraConfig
lora_config = LoraConfig(
r=8,
lora_alpha=32,
target_modules='all-linear'
)
model.add_adapter_to_model(
'default',
lora_config,
gradient_accumulation_steps=2
)
设置优化器与调度器
model.set_optimizer(optimizer_cls='AdamW', lr=1e-4)
model.set_lr_scheduler(
scheduler_cls='CosineWarmupScheduler',
num_warmup_steps=5,
num_training_steps=100
)
设置损失函数
# SFT 训练
model.set_loss('CrossEntropyLoss')
# GRPO 训练
model.set_loss('GRPOLoss', epsilon=0.2, beta=0.0)
Sampler
用于推理和 RL 训练的采样组件:
from twinkle.sampler import vLLMSampler
from twinkle.data_format import SamplingParams
sampler = vLLMSampler(
model_id='ms://Qwen/Qwen3.5-4B',
engine_args={
'gpu_memory_utilization': 0.8,
'max_model_len': 4096,
'enable_lora': True,
},
device_mesh=sampler_mesh,
remote_group='sampler'
)
sampling_params = SamplingParams(
max_tokens=1024,
num_samples=4,
logprobs=1
)
responses = sampler.sample(prompts, sampling_params)
Template
针对不同模型架构的分词模板:
from twinkle.template import Template
dataset.set_template('Qwen3_5Template', model_id='ms://Qwen/Qwen3.5-4B', max_length=2048)
sampler.set_template(Qwen3_5Template, model_id='ms://Qwen/Qwen3.5-4B')
Preprocessor
数据预处理与过滤:
from twinkle.preprocessor import SelfCognitionProcessor
# 内置预处理器
dataset.map(SelfCognitionProcessor('Model Name', 'Author'))
# 自定义预处理器
class MyProcessor(Preprocessor):
def __call__(self, example):
# Transform example
return transformed_example
Loss
内置及可定制的损失函数:
from twinkle.loss import Loss
class CustomLoss(Loss):
def forward(self, logits, labels, **kwargs):
# Compute loss
return loss
Reward & Advantage
用于强化学习训练:
from twinkle.reward import GSM8KAccuracyReward
from twinkle.advantage import GRPOAdvantage
# 计算奖励
accuracy_reward = GSM8KAccuracyReward()
rewards = accuracy_reward(trajectories)
# 计算优势
advantage_fn = GRPOAdvantage()
advantages = advantage_fn(rewards, num_generations=8, scale='group')
Metric
训练指标收集:
from twinkle.metric import CompletionRewardMetric
metrics = CompletionRewardMetric()
metrics.accumulate(completion_lengths=lengths, rewards=rewards)
log_dict = metrics.calculate()
CheckpointEngine
RL 训练的权重同步:
from twinkle.checkpoint_engine import CheckpointEngineManager
ckpt_manager = CheckpointEngineManager(model=model, sampler=sampler)
# 同步权重到 sampler
ckpt_manager.sync_weights(merge_and_sync=False)