Last active
November 26, 2025 17:36
-
-
Save chaddy81/4622bcdb89f22f31d2e52966aab7cd51 to your computer and use it in GitHub Desktop.
Fine-tune Qwen3-Coder for UI design generation with Unsloth
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "nbformat": 4, | |
| "nbformat_minor": 0, | |
| "metadata": { | |
| "colab": { | |
| "provenance": [], | |
| "gpuType": "T4" | |
| }, | |
| "kernelspec": { | |
| "name": "python3", | |
| "display_name": "Python 3" | |
| }, | |
| "accelerator": "GPU" | |
| }, | |
| "cells": [ | |
| { | |
| "cell_type": "markdown", | |
| "source": "# Design Generator Fine-tuning with Unsloth\n\nThis notebook fine-tunes Qwen2.5-Coder for better HTML/CSS design generation.\n\n**Requirements:**\n- Google Colab with GPU (T4 free tier works, A100 recommended for faster training)\n- Training data in ShareGPT format\n\n**Steps:**\n1. Install dependencies\n2. Upload training data\n3. Load model and add LoRA\n4. Train\n5. Export to GGUF for Ollama", | |
| "metadata": { | |
| "id": "intro" | |
| } | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# Step 1: Install Unsloth (optimized for Colab)\n", | |
| "%%capture\n", | |
| "!pip install \"unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git\"\n", | |
| "!pip install --no-deps trl peft accelerate bitsandbytes" | |
| ], | |
| "metadata": { | |
| "id": "install" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# Step 2: Upload training data\n", | |
| "from google.colab import files\n", | |
| "import json\n", | |
| "\n", | |
| "print(\"Upload your training_sharegpt.json file:\")\n", | |
| "uploaded = files.upload()\n", | |
| "\n", | |
| "# Load the data\n", | |
| "filename = list(uploaded.keys())[0]\n", | |
| "with open(filename) as f:\n", | |
| " training_data = json.load(f)\n", | |
| "\n", | |
| "print(f\"Loaded {len(training_data)} training examples\")" | |
| ], | |
| "metadata": { | |
| "id": "upload" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": "# Step 3: Load model with 4-bit quantization\nfrom unsloth import FastLanguageModel\nimport torch\n\n# Configuration\nmax_seq_length = 8192\ndtype = None # Auto-detect (Float16 for T4, BFloat16 for A100)\nload_in_4bit = True\n\n# Load Qwen2.5-Coder (7B fits on T4, can try 14B on A100)\nmodel, tokenizer = FastLanguageModel.from_pretrained(\n model_name=\"unsloth/Qwen2.5-Coder-7B-Instruct\",\n max_seq_length=max_seq_length,\n dtype=dtype,\n load_in_4bit=load_in_4bit,\n)\n\nprint(f\"Model loaded! GPU memory used: {torch.cuda.memory_allocated() / 1e9:.2f} GB\")", | |
| "metadata": { | |
| "id": "load_model" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# Step 4: Add LoRA adapters\n", | |
| "model = FastLanguageModel.get_peft_model(\n", | |
| " model,\n", | |
| " r=64, # LoRA rank (higher = more capacity but slower)\n", | |
| " target_modules=[\n", | |
| " \"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n", | |
| " \"gate_proj\", \"up_proj\", \"down_proj\",\n", | |
| " ],\n", | |
| " lora_alpha=16,\n", | |
| " lora_dropout=0.05,\n", | |
| " bias=\"none\",\n", | |
| " use_gradient_checkpointing=\"unsloth\",\n", | |
| " random_state=42,\n", | |
| ")\n", | |
| "\n", | |
| "print(\"LoRA adapters added!\")\n", | |
| "print(f\"Trainable parameters: {model.print_trainable_parameters()}\")" | |
| ], | |
| "metadata": { | |
| "id": "add_lora" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# Step 5: Prepare dataset\n", | |
| "from unsloth.chat_templates import get_chat_template\n", | |
| "from datasets import Dataset\n", | |
| "\n", | |
| "# Set chat template (Qwen uses ChatML)\n", | |
| "tokenizer = get_chat_template(\n", | |
| " tokenizer,\n", | |
| " chat_template=\"chatml\",\n", | |
| ")\n", | |
| "\n", | |
| "def format_example(example):\n", | |
| " messages = []\n", | |
| " for conv in example[\"conversations\"]:\n", | |
| " role = conv[\"from\"]\n", | |
| " if role == \"system\":\n", | |
| " messages.append({\"role\": \"system\", \"content\": conv[\"value\"]})\n", | |
| " elif role == \"human\":\n", | |
| " messages.append({\"role\": \"user\", \"content\": conv[\"value\"]})\n", | |
| " elif role == \"gpt\":\n", | |
| " messages.append({\"role\": \"assistant\", \"content\": conv[\"value\"]})\n", | |
| " return {\"text\": tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)}\n", | |
| "\n", | |
| "dataset = Dataset.from_list(training_data)\n", | |
| "dataset = dataset.map(format_example, remove_columns=dataset.column_names)\n", | |
| "\n", | |
| "print(f\"Dataset prepared with {len(dataset)} examples\")\n", | |
| "print(f\"\\nSample (truncated):\\n{dataset[0]['text'][:500]}...\")" | |
| ], | |
| "metadata": { | |
| "id": "prepare_data" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# Step 6: Train!\n", | |
| "from trl import SFTTrainer\n", | |
| "from transformers import TrainingArguments\n", | |
| "\n", | |
| "trainer = SFTTrainer(\n", | |
| " model=model,\n", | |
| " tokenizer=tokenizer,\n", | |
| " train_dataset=dataset,\n", | |
| " dataset_text_field=\"text\",\n", | |
| " max_seq_length=max_seq_length,\n", | |
| " args=TrainingArguments(\n", | |
| " output_dir=\"./design-lora\",\n", | |
| " per_device_train_batch_size=2,\n", | |
| " gradient_accumulation_steps=4,\n", | |
| " warmup_steps=10,\n", | |
| " num_train_epochs=3,\n", | |
| " learning_rate=2e-4,\n", | |
| " fp16=not torch.cuda.is_bf16_supported(),\n", | |
| " bf16=torch.cuda.is_bf16_supported(),\n", | |
| " logging_steps=1,\n", | |
| " save_strategy=\"epoch\",\n", | |
| " optim=\"adamw_8bit\",\n", | |
| " weight_decay=0.01,\n", | |
| " lr_scheduler_type=\"linear\",\n", | |
| " seed=42,\n", | |
| " ),\n", | |
| ")\n", | |
| "\n", | |
| "print(\"Starting training...\")\n", | |
| "trainer_stats = trainer.train()\n", | |
| "\n", | |
| "print(f\"\\nTraining complete!\")\n", | |
| "print(f\"Training time: {trainer_stats.metrics['train_runtime'] / 60:.2f} minutes\")" | |
| ], | |
| "metadata": { | |
| "id": "train" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# Step 7: Test the fine-tuned model\n", | |
| "FastLanguageModel.for_inference(model)\n", | |
| "\n", | |
| "test_prompt = \"\"\"\n", | |
| "Generate a dashboard UI with sidebar navigation, 4 metrics cards, and a data table.\n", | |
| "\n", | |
| "Apply these design tokens:\n", | |
| "- Primary Color: #E07C4C\n", | |
| "- Background: #FAF8F5\n", | |
| "- Corners: rounded-2xl\n", | |
| "\n", | |
| "UI_PATTERN: dashboard\n", | |
| "\"\"\"\n", | |
| "\n", | |
| "messages = [\n", | |
| " {\"role\": \"system\", \"content\": \"You are an expert UI designer. Generate complete, self-contained HTML using ONLY Tailwind CSS utility classes.\"},\n", | |
| " {\"role\": \"user\", \"content\": test_prompt},\n", | |
| "]\n", | |
| "\n", | |
| "inputs = tokenizer.apply_chat_template(\n", | |
| " messages,\n", | |
| " tokenize=True,\n", | |
| " add_generation_prompt=True,\n", | |
| " return_tensors=\"pt\",\n", | |
| ").to(\"cuda\")\n", | |
| "\n", | |
| "outputs = model.generate(\n", | |
| " input_ids=inputs,\n", | |
| " max_new_tokens=4096,\n", | |
| " temperature=0.7,\n", | |
| " top_p=0.9,\n", | |
| ")\n", | |
| "\n", | |
| "response = tokenizer.decode(outputs[0], skip_special_tokens=True)\n", | |
| "print(\"Generated design (first 2000 chars):\")\n", | |
| "print(response[-2000:])" | |
| ], | |
| "metadata": { | |
| "id": "test" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# Step 8: Save and export\n", | |
| "\n", | |
| "# Save LoRA adapters\n", | |
| "model.save_pretrained(\"design-lora\")\n", | |
| "tokenizer.save_pretrained(\"design-lora\")\n", | |
| "print(\"LoRA adapters saved!\")\n", | |
| "\n", | |
| "# Save GGUF for Ollama (Q4_K_M quantization)\n", | |
| "print(\"\\nExporting to GGUF (this takes a few minutes)...\")\n", | |
| "model.save_pretrained_gguf(\n", | |
| " \"design-lora-gguf\",\n", | |
| " tokenizer,\n", | |
| " quantization_method=\"q4_k_m\"\n", | |
| ")\n", | |
| "print(\"GGUF export complete!\")" | |
| ], | |
| "metadata": { | |
| "id": "save" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "source": [ | |
| "# Step 9: Download the model\n", | |
| "from google.colab import files\n", | |
| "import shutil\n", | |
| "\n", | |
| "# Zip the GGUF model for download\n", | |
| "shutil.make_archive(\"design-lora-gguf\", \"zip\", \"design-lora-gguf\")\n", | |
| "\n", | |
| "print(\"Downloading GGUF model...\")\n", | |
| "files.download(\"design-lora-gguf.zip\")\n", | |
| "\n", | |
| "print(\"\\n\" + \"=\"*50)\n", | |
| "print(\"NEXT STEPS:\")\n", | |
| "print(\"=\"*50)\n", | |
| "print(\"1. Extract the downloaded zip file\")\n", | |
| "print(\"2. Copy the .gguf file to your Ollama models directory\")\n", | |
| "print(\"3. Create a Modelfile with:\")\n", | |
| "print(\" FROM ./design-lora-q4_k_m.gguf\")\n", | |
| "print(\"4. Run: ollama create design-coder -f Modelfile\")\n", | |
| "print(\"5. Update config.json to use 'design-coder'\")" | |
| ], | |
| "metadata": { | |
| "id": "download" | |
| }, | |
| "execution_count": null, | |
| "outputs": [] | |
| } | |
| ] | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment