Skip to content

Instantly share code, notes, and snippets.

@chaddy81
Last active November 26, 2025 17:36
Show Gist options
  • Select an option

  • Save chaddy81/4622bcdb89f22f31d2e52966aab7cd51 to your computer and use it in GitHub Desktop.

Select an option

Save chaddy81/4622bcdb89f22f31d2e52966aab7cd51 to your computer and use it in GitHub Desktop.
Fine-tune Qwen3-Coder for UI design generation with Unsloth
Display the source blob
Display the rendered blob
Raw
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"provenance": [],
"gpuType": "T4"
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"accelerator": "GPU"
},
"cells": [
{
"cell_type": "markdown",
"source": "# Design Generator Fine-tuning with Unsloth\n\nThis notebook fine-tunes Qwen2.5-Coder for better HTML/CSS design generation.\n\n**Requirements:**\n- Google Colab with GPU (T4 free tier works, A100 recommended for faster training)\n- Training data in ShareGPT format\n\n**Steps:**\n1. Install dependencies\n2. Upload training data\n3. Load model and add LoRA\n4. Train\n5. Export to GGUF for Ollama",
"metadata": {
"id": "intro"
}
},
{
"cell_type": "code",
"source": [
"# Step 1: Install Unsloth (optimized for Colab)\n",
"%%capture\n",
"!pip install \"unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git\"\n",
"!pip install --no-deps trl peft accelerate bitsandbytes"
],
"metadata": {
"id": "install"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Step 2: Upload training data\n",
"from google.colab import files\n",
"import json\n",
"\n",
"print(\"Upload your training_sharegpt.json file:\")\n",
"uploaded = files.upload()\n",
"\n",
"# Load the data\n",
"filename = list(uploaded.keys())[0]\n",
"with open(filename) as f:\n",
" training_data = json.load(f)\n",
"\n",
"print(f\"Loaded {len(training_data)} training examples\")"
],
"metadata": {
"id": "upload"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": "# Step 3: Load model with 4-bit quantization\nfrom unsloth import FastLanguageModel\nimport torch\n\n# Configuration\nmax_seq_length = 8192\ndtype = None # Auto-detect (Float16 for T4, BFloat16 for A100)\nload_in_4bit = True\n\n# Load Qwen2.5-Coder (7B fits on T4, can try 14B on A100)\nmodel, tokenizer = FastLanguageModel.from_pretrained(\n model_name=\"unsloth/Qwen2.5-Coder-7B-Instruct\",\n max_seq_length=max_seq_length,\n dtype=dtype,\n load_in_4bit=load_in_4bit,\n)\n\nprint(f\"Model loaded! GPU memory used: {torch.cuda.memory_allocated() / 1e9:.2f} GB\")",
"metadata": {
"id": "load_model"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Step 4: Add LoRA adapters\n",
"model = FastLanguageModel.get_peft_model(\n",
" model,\n",
" r=64, # LoRA rank (higher = more capacity but slower)\n",
" target_modules=[\n",
" \"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n",
" \"gate_proj\", \"up_proj\", \"down_proj\",\n",
" ],\n",
" lora_alpha=16,\n",
" lora_dropout=0.05,\n",
" bias=\"none\",\n",
" use_gradient_checkpointing=\"unsloth\",\n",
" random_state=42,\n",
")\n",
"\n",
"print(\"LoRA adapters added!\")\n",
"print(f\"Trainable parameters: {model.print_trainable_parameters()}\")"
],
"metadata": {
"id": "add_lora"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Step 5: Prepare dataset\n",
"from unsloth.chat_templates import get_chat_template\n",
"from datasets import Dataset\n",
"\n",
"# Set chat template (Qwen uses ChatML)\n",
"tokenizer = get_chat_template(\n",
" tokenizer,\n",
" chat_template=\"chatml\",\n",
")\n",
"\n",
"def format_example(example):\n",
" messages = []\n",
" for conv in example[\"conversations\"]:\n",
" role = conv[\"from\"]\n",
" if role == \"system\":\n",
" messages.append({\"role\": \"system\", \"content\": conv[\"value\"]})\n",
" elif role == \"human\":\n",
" messages.append({\"role\": \"user\", \"content\": conv[\"value\"]})\n",
" elif role == \"gpt\":\n",
" messages.append({\"role\": \"assistant\", \"content\": conv[\"value\"]})\n",
" return {\"text\": tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)}\n",
"\n",
"dataset = Dataset.from_list(training_data)\n",
"dataset = dataset.map(format_example, remove_columns=dataset.column_names)\n",
"\n",
"print(f\"Dataset prepared with {len(dataset)} examples\")\n",
"print(f\"\\nSample (truncated):\\n{dataset[0]['text'][:500]}...\")"
],
"metadata": {
"id": "prepare_data"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Step 6: Train!\n",
"from trl import SFTTrainer\n",
"from transformers import TrainingArguments\n",
"\n",
"trainer = SFTTrainer(\n",
" model=model,\n",
" tokenizer=tokenizer,\n",
" train_dataset=dataset,\n",
" dataset_text_field=\"text\",\n",
" max_seq_length=max_seq_length,\n",
" args=TrainingArguments(\n",
" output_dir=\"./design-lora\",\n",
" per_device_train_batch_size=2,\n",
" gradient_accumulation_steps=4,\n",
" warmup_steps=10,\n",
" num_train_epochs=3,\n",
" learning_rate=2e-4,\n",
" fp16=not torch.cuda.is_bf16_supported(),\n",
" bf16=torch.cuda.is_bf16_supported(),\n",
" logging_steps=1,\n",
" save_strategy=\"epoch\",\n",
" optim=\"adamw_8bit\",\n",
" weight_decay=0.01,\n",
" lr_scheduler_type=\"linear\",\n",
" seed=42,\n",
" ),\n",
")\n",
"\n",
"print(\"Starting training...\")\n",
"trainer_stats = trainer.train()\n",
"\n",
"print(f\"\\nTraining complete!\")\n",
"print(f\"Training time: {trainer_stats.metrics['train_runtime'] / 60:.2f} minutes\")"
],
"metadata": {
"id": "train"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Step 7: Test the fine-tuned model\n",
"FastLanguageModel.for_inference(model)\n",
"\n",
"test_prompt = \"\"\"\n",
"Generate a dashboard UI with sidebar navigation, 4 metrics cards, and a data table.\n",
"\n",
"Apply these design tokens:\n",
"- Primary Color: #E07C4C\n",
"- Background: #FAF8F5\n",
"- Corners: rounded-2xl\n",
"\n",
"UI_PATTERN: dashboard\n",
"\"\"\"\n",
"\n",
"messages = [\n",
" {\"role\": \"system\", \"content\": \"You are an expert UI designer. Generate complete, self-contained HTML using ONLY Tailwind CSS utility classes.\"},\n",
" {\"role\": \"user\", \"content\": test_prompt},\n",
"]\n",
"\n",
"inputs = tokenizer.apply_chat_template(\n",
" messages,\n",
" tokenize=True,\n",
" add_generation_prompt=True,\n",
" return_tensors=\"pt\",\n",
").to(\"cuda\")\n",
"\n",
"outputs = model.generate(\n",
" input_ids=inputs,\n",
" max_new_tokens=4096,\n",
" temperature=0.7,\n",
" top_p=0.9,\n",
")\n",
"\n",
"response = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
"print(\"Generated design (first 2000 chars):\")\n",
"print(response[-2000:])"
],
"metadata": {
"id": "test"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Step 8: Save and export\n",
"\n",
"# Save LoRA adapters\n",
"model.save_pretrained(\"design-lora\")\n",
"tokenizer.save_pretrained(\"design-lora\")\n",
"print(\"LoRA adapters saved!\")\n",
"\n",
"# Save GGUF for Ollama (Q4_K_M quantization)\n",
"print(\"\\nExporting to GGUF (this takes a few minutes)...\")\n",
"model.save_pretrained_gguf(\n",
" \"design-lora-gguf\",\n",
" tokenizer,\n",
" quantization_method=\"q4_k_m\"\n",
")\n",
"print(\"GGUF export complete!\")"
],
"metadata": {
"id": "save"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"# Step 9: Download the model\n",
"from google.colab import files\n",
"import shutil\n",
"\n",
"# Zip the GGUF model for download\n",
"shutil.make_archive(\"design-lora-gguf\", \"zip\", \"design-lora-gguf\")\n",
"\n",
"print(\"Downloading GGUF model...\")\n",
"files.download(\"design-lora-gguf.zip\")\n",
"\n",
"print(\"\\n\" + \"=\"*50)\n",
"print(\"NEXT STEPS:\")\n",
"print(\"=\"*50)\n",
"print(\"1. Extract the downloaded zip file\")\n",
"print(\"2. Copy the .gguf file to your Ollama models directory\")\n",
"print(\"3. Create a Modelfile with:\")\n",
"print(\" FROM ./design-lora-q4_k_m.gguf\")\n",
"print(\"4. Run: ollama create design-coder -f Modelfile\")\n",
"print(\"5. Update config.json to use 'design-coder'\")"
],
"metadata": {
"id": "download"
},
"execution_count": null,
"outputs": []
}
]
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment