Install MLX LM and openai
:
pip install mlx-lm openai
import os | |
import torch | |
import psutil | |
import datasets | |
import glob | |
from transformers import ( | |
AutoTokenizer, LlamaConfig, LlamaForCausalLM, Trainer, TrainingArguments, | |
DataCollatorForLanguageModeling | |
) |
# /// script | |
# requires-python = ">=3.11,<3.12" | |
# dependencies = [ | |
# "distilabel[mlx]", | |
# ] | |
# /// | |
from distilabel.models import MlxLLM | |
from distilabel.pipeline import InstructionResponsePipeline | |
llm = MlxLLM( |
# /// script | |
# requires-python = ">=3.11,<3.12" | |
# dependencies = [ | |
# "distilabel[hf-transformers, hf-inference-endpoints]", | |
# ] | |
# /// | |
from distilabel.models import InferenceEndpointsLLM | |
from distilabel.pipeline import InstructionResponsePipeline | |
repo_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B" |
# requires sentence_transformers>=3.2.0 | |
from sentence_transformers import SentenceTransformer, export_optimized_onnx_model, export_dynamic_quantized_onnx_model | |
# The model to export to ONNX (+ optimize, quantize), OpenVINO | |
model_id = "mixedbread-ai/mxbai-embed-large-v1" | |
# Where to save the exported models locally | |
output_dir = model_id.replace("/", "-") | |
onnx_model = SentenceTransformer(model_id, backend="onnx", model_kwargs={"export": True}) | |
onnx_model.save_pretrained(output_dir) |
Begin by enclosing all thoughts within <thinking> tags, exploring multiple angles and approaches. | |
Break down the solution into clear steps within <step> tags. Start with a 20-step budget, requesting more for complex problems if needed. | |
Use <count> tags after each step to show the remaining budget. Stop when reaching 0. | |
Continuously adjust your reasoning based on intermediate results and reflections, adapting your strategy as you progress. | |
Regularly evaluate progress using <reflection> tags. Be critical and honest about your reasoning process. | |
Assign a quality score between 0.0 and 1.0 using <reward> tags after each reflection. Use this to guide your approach: | |
0.8+: Continue current approach | |
0.5-0.7: Consider minor adjustments | |
Below 0.5: Seriously consider backtracking and trying a different approach |
#VERBOSE=0 torchrun --nproc_per_node 3 self_contained_pp_LOC.py | |
import os, random, numpy as np, torch, torch.nn as nn, torch.distributed as dist, torch.nn.functional as F | |
from torch.optim import AdamW | |
from torch.utils.data import DataLoader, DistributedSampler | |
from datasets import load_dataset | |
from transformers import AutoConfig, AutoModelForCausalLM, AutoTokenizer | |
STEP, local_rank, world_size, verbose = 0, int(os.environ["LOCAL_RANK"]), int(os.environ["WORLD_SIZE"]), os.environ.get("VERBOSE", "0") == "1" | |
def set_all_seed(seed): |
""" | |
A minimal, fast example generating text with Llama 3.1 in MLX. | |
To run, install the requirements: | |
pip install -U mlx transformers fire | |
Then generate text with: | |
python l3min.py "How tall is K2?" |