chaddy81 · November 26, 2025 17:36
diff --git a/train_colab.ipynb b/train_colab.ipynb
 {
 "nbformat": 4,
 "nbformat_minor": 0,
 "metadata": {
  "colab": {
   "provenance": [],
   "gpuType": "T4"
  },
  "kernelspec": {
   "name": "python3",
   "display_name": "Python 3"
  },
  "accelerator": "GPU"
 },
 "cells": [
  {
   "cell_type": "markdown",
   "source": "# Design Generator Fine-tuning with Unsloth\n\nThis notebook fine-tunes Qwen2.5-Coder for better HTML/CSS design generation.\n\n**Requirements:**\n- Google Colab with GPU (T4 free tier works, A100 recommended for faster training)\n- Training data in ShareGPT format\n\n**Steps:**\n1. Install dependencies\n2. Upload training data\n3. Load model and add LoRA\n4. Train\n5. Export to GGUF for Ollama",
   "metadata": {
    "id": "intro"
   }
  },
  {
   "cell_type": "code",
   "source": [
    "# Step 1: Install Unsloth (optimized for Colab)\n",
    "%%capture\n",
    "!pip install \"unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git\"\n",
    "!pip install --no-deps trl peft accelerate bitsandbytes"
   ],
   "metadata": {
    "id": "install"
   },
   "execution_count": null,
   "outputs": []
  },
  {
   "cell_type": "code",
   "source": [
    "# Step 2: Upload training data\n",
    "from google.colab import files\n",
    "import json\n",
    "\n",
    "print(\"Upload your training_sharegpt.json file:\")\n",
    "uploaded = files.upload()\n",
    "\n",
    "# Load the data\n",
    "filename = list(uploaded.keys())[0]\n",
    "with open(filename) as f:\n",
    "    training_data = json.load(f)\n",
    "\n",
    "print(f\"Loaded {len(training_data)} training examples\")"
   ],
   "metadata": {
    "id": "upload"
   },
   "execution_count": null,
   "outputs": []
  },
  {
   "cell_type": "code",
   "source": "# Step 3: Load model with 4-bit quantization\nfrom unsloth import FastLanguageModel\nimport torch\n\n# Configuration\nmax_seq_length = 8192\ndtype = None  # Auto-detect (Float16 for T4, BFloat16 for A100)\nload_in_4bit = True\n\n# Load Qwen2.5-Coder (7B fits on T4, can try 14B on A100)\nmodel, tokenizer = FastLanguageModel.from_pretrained(\n    model_name=\"unsloth/Qwen2.5-Coder-7B-Instruct\",\n    max_seq_length=max_seq_length,\n    dtype=dtype,\n    load_in_4bit=load_in_4bit,\n)\n\nprint(f\"Model loaded! GPU memory used: {torch.cuda.memory_allocated() / 1e9:.2f} GB\")",
   "metadata": {
    "id": "load_model"
   },
   "execution_count": null,
   "outputs": []
  },
  {
   "cell_type": "code",
   "source": [
    "# Step 4: Add LoRA adapters\n",
    "model = FastLanguageModel.get_peft_model(\n",
    "    model,\n",
    "    r=64,  # LoRA rank (higher = more capacity but slower)\n",
    "    target_modules=[\n",
    "        \"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n",
    "        \"gate_proj\", \"up_proj\", \"down_proj\",\n",
    "    ],\n",
    "    lora_alpha=16,\n",
    "    lora_dropout=0.05,\n",
    "    bias=\"none\",\n",
    "    use_gradient_checkpointing=\"unsloth\",\n",
    "    random_state=42,\n",
    ")\n",
    "\n",
    "print(\"LoRA adapters added!\")\n",
    "print(f\"Trainable parameters: {model.print_trainable_parameters()}\")"
   ],
   "metadata": {
    "id": "add_lora"
   },
   "execution_count": null,
   "outputs": []
  },
  {
   "cell_type": "code",
   "source": [
    "# Step 5: Prepare dataset\n",
    "from unsloth.chat_templates import get_chat_template\n",
    "from datasets import Dataset\n",
    "\n",
    "# Set chat template (Qwen uses ChatML)\n",
    "tokenizer = get_chat_template(\n",
    "    tokenizer,\n",
    "    chat_template=\"chatml\",\n",
    ")\n",
    "\n",
    "def format_example(example):\n",
    "    messages = []\n",
    "    for conv in example[\"conversations\"]:\n",
    "        role = conv[\"from\"]\n",
    "        if role == \"system\":\n",
    "            messages.append({\"role\": \"system\", \"content\": conv[\"value\"]})\n",
    "        elif role == \"human\":\n",
    "            messages.append({\"role\": \"user\", \"content\": conv[\"value\"]})\n",
    "        elif role == \"gpt\":\n",
    "            messages.append({\"role\": \"assistant\", \"content\": conv[\"value\"]})\n",
    "    return {\"text\": tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)}\n",
    "\n",
    "dataset = Dataset.from_list(training_data)\n",
    "dataset = dataset.map(format_example, remove_columns=dataset.column_names)\n",
    "\n",
    "print(f\"Dataset prepared with {len(dataset)} examples\")\n",
    "print(f\"\\nSample (truncated):\\n{dataset[0]['text'][:500]}...\")"
   ],
   "metadata": {
    "id": "prepare_data"
   },
   "execution_count": null,
   "outputs": []
  },
  {
   "cell_type": "code",
   "source": [
    "# Step 6: Train!\n",
    "from trl import SFTTrainer\n",
    "from transformers import TrainingArguments\n",
    "\n",
    "trainer = SFTTrainer(\n",
    "    model=model,\n",
    "    tokenizer=tokenizer,\n",
    "    train_dataset=dataset,\n",
    "    dataset_text_field=\"text\",\n",
    "    max_seq_length=max_seq_length,\n",
    "    args=TrainingArguments(\n",
    "        output_dir=\"./design-lora\",\n",
    "        per_device_train_batch_size=2,\n",
    "        gradient_accumulation_steps=4,\n",
    "        warmup_steps=10,\n",
    "        num_train_epochs=3,\n",
    "        learning_rate=2e-4,\n",
    "        fp16=not torch.cuda.is_bf16_supported(),\n",
    "        bf16=torch.cuda.is_bf16_supported(),\n",
    "        logging_steps=1,\n",
    "        save_strategy=\"epoch\",\n",
    "        optim=\"adamw_8bit\",\n",
    "        weight_decay=0.01,\n",
    "        lr_scheduler_type=\"linear\",\n",
    "        seed=42,\n",
    "    ),\n",
    ")\n",
    "\n",
    "print(\"Starting training...\")\n",
    "trainer_stats = trainer.train()\n",
    "\n",
    "print(f\"\\nTraining complete!\")\n",
    "print(f\"Training time: {trainer_stats.metrics['train_runtime'] / 60:.2f} minutes\")"
   ],
   "metadata": {
    "id": "train"
   },
   "execution_count": null,
   "outputs": []
  },
  {
   "cell_type": "code",
   "source": [
    "# Step 7: Test the fine-tuned model\n",
    "FastLanguageModel.for_inference(model)\n",
    "\n",
    "test_prompt = \"\"\"\n",
    "Generate a dashboard UI with sidebar navigation, 4 metrics cards, and a data table.\n",
    "\n",
    "Apply these design tokens:\n",
    "- Primary Color: #E07C4C\n",
    "- Background: #FAF8F5\n",
    "- Corners: rounded-2xl\n",
    "\n",
    "UI_PATTERN: dashboard\n",
    "\"\"\"\n",
    "\n",
    "messages = [\n",
    "    {\"role\": \"system\", \"content\": \"You are an expert UI designer. Generate complete, self-contained HTML using ONLY Tailwind CSS utility classes.\"},\n",
    "    {\"role\": \"user\", \"content\": test_prompt},\n",
    "]\n",
    "\n",
    "inputs = tokenizer.apply_chat_template(\n",
    "    messages,\n",
    "    tokenize=True,\n",
    "    add_generation_prompt=True,\n",
    "    return_tensors=\"pt\",\n",
    ").to(\"cuda\")\n",
    "\n",
    "outputs = model.generate(\n",
    "    input_ids=inputs,\n",
    "    max_new_tokens=4096,\n",
    "    temperature=0.7,\n",
    "    top_p=0.9,\n",
    ")\n",
    "\n",
    "response = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
    "print(\"Generated design (first 2000 chars):\")\n",
    "print(response[-2000:])"
   ],
   "metadata": {
    "id": "test"
   },
   "execution_count": null,
   "outputs": []
  },
  {
   "cell_type": "code",
   "source": [
    "# Step 8: Save and export\n",
    "\n",
    "# Save LoRA adapters\n",
    "model.save_pretrained(\"design-lora\")\n",
    "tokenizer.save_pretrained(\"design-lora\")\n",
    "print(\"LoRA adapters saved!\")\n",
    "\n",
    "# Save GGUF for Ollama (Q4_K_M quantization)\n",
    "print(\"\\nExporting to GGUF (this takes a few minutes)...\")\n",
    "model.save_pretrained_gguf(\n",
    "    \"design-lora-gguf\",\n",
    "    tokenizer,\n",
    "    quantization_method=\"q4_k_m\"\n",
    ")\n",
    "print(\"GGUF export complete!\")"
   ],
   "metadata": {
    "id": "save"
   },
   "execution_count": null,
   "outputs": []
  },
  {
   "cell_type": "code",
   "source": [
    "# Step 9: Download the model\n",
    "from google.colab import files\n",
    "import shutil\n",
    "\n",
    "# Zip the GGUF model for download\n",
    "shutil.make_archive(\"design-lora-gguf\", \"zip\", \"design-lora-gguf\")\n",
    "\n",
    "print(\"Downloading GGUF model...\")\n",
    "files.download(\"design-lora-gguf.zip\")\n",
    "\n",
    "print(\"\\n\" + \"=\"*50)\n",
    "print(\"NEXT STEPS:\")\n",
    "print(\"=\"*50)\n",
    "print(\"1. Extract the downloaded zip file\")\n",
    "print(\"2. Copy the .gguf file to your Ollama models directory\")\n",
    "print(\"3. Create a Modelfile with:\")\n",
    "print(\"   FROM ./design-lora-q4_k_m.gguf\")\n",
    "print(\"4. Run: ollama create design-coder -f Modelfile\")\n",
    "print(\"5. Update config.json to use 'design-coder'\")"
   ],
   "metadata": {
    "id": "download"
   },
   "execution_count": null,
   "outputs": []
  }
 ]
 }
	{
	"nbformat": 4,
	"nbformat_minor": 0,
	"metadata": {
	"colab": {
	"provenance": [],
	"gpuType": "T4"
	},
	"kernelspec": {
	"name": "python3",
	"display_name": "Python 3"
	},
	"accelerator": "GPU"
	},
	"cells": [
	{
	"cell_type": "markdown",
	"source": "# Design Generator Fine-tuning with Unsloth\n\nThis notebook fine-tunes Qwen2.5-Coder for better HTML/CSS design generation.\n\nRequirements:\n- Google Colab with GPU (T4 free tier works, A100 recommended for faster training)\n- Training data in ShareGPT format\n\nSteps:\n1. Install dependencies\n2. Upload training data\n3. Load model and add LoRA\n4. Train\n5. Export to GGUF for Ollama",
	"metadata": {
	"id": "intro"
	}
	},
	{
	"cell_type": "code",
	"source": [
	"# Step 1: Install Unsloth (optimized for Colab)\n",
	"%%capture\n",
	"!pip install \"unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git\"\n",
	"!pip install --no-deps trl peft accelerate bitsandbytes"
	],
	"metadata": {
	"id": "install"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"# Step 2: Upload training data\n",
	"from google.colab import files\n",
	"import json\n",
	"\n",
	"print(\"Upload your training_sharegpt.json file:\")\n",
	"uploaded = files.upload()\n",
	"\n",
	"# Load the data\n",
	"filename = list(uploaded.keys())[0]\n",
	"with open(filename) as f:\n",
	" training_data = json.load(f)\n",
	"\n",
	"print(f\"Loaded {len(training_data)} training examples\")"
	],
	"metadata": {
	"id": "upload"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": "# Step 3: Load model with 4-bit quantization\nfrom unsloth import FastLanguageModel\nimport torch\n\n# Configuration\nmax_seq_length = 8192\ndtype = None # Auto-detect (Float16 for T4, BFloat16 for A100)\nload_in_4bit = True\n\n# Load Qwen2.5-Coder (7B fits on T4, can try 14B on A100)\nmodel, tokenizer = FastLanguageModel.from_pretrained(\n model_name=\"unsloth/Qwen2.5-Coder-7B-Instruct\",\n max_seq_length=max_seq_length,\n dtype=dtype,\n load_in_4bit=load_in_4bit,\n)\n\nprint(f\"Model loaded! GPU memory used: {torch.cuda.memory_allocated() / 1e9:.2f} GB\")",
	"metadata": {
	"id": "load_model"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"# Step 4: Add LoRA adapters\n",
	"model = FastLanguageModel.get_peft_model(\n",
	" model,\n",
	" r=64, # LoRA rank (higher = more capacity but slower)\n",
	" target_modules=[\n",
	" \"q_proj\", \"k_proj\", \"v_proj\", \"o_proj\",\n",
	" \"gate_proj\", \"up_proj\", \"down_proj\",\n",
	" ],\n",
	" lora_alpha=16,\n",
	" lora_dropout=0.05,\n",
	" bias=\"none\",\n",
	" use_gradient_checkpointing=\"unsloth\",\n",
	" random_state=42,\n",
	")\n",
	"\n",
	"print(\"LoRA adapters added!\")\n",
	"print(f\"Trainable parameters: {model.print_trainable_parameters()}\")"
	],
	"metadata": {
	"id": "add_lora"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"# Step 5: Prepare dataset\n",
	"from unsloth.chat_templates import get_chat_template\n",
	"from datasets import Dataset\n",
	"\n",
	"# Set chat template (Qwen uses ChatML)\n",
	"tokenizer = get_chat_template(\n",
	" tokenizer,\n",
	" chat_template=\"chatml\",\n",
	")\n",
	"\n",
	"def format_example(example):\n",
	" messages = []\n",
	" for conv in example[\"conversations\"]:\n",
	" role = conv[\"from\"]\n",
	" if role == \"system\":\n",
	" messages.append({\"role\": \"system\", \"content\": conv[\"value\"]})\n",
	" elif role == \"human\":\n",
	" messages.append({\"role\": \"user\", \"content\": conv[\"value\"]})\n",
	" elif role == \"gpt\":\n",
	" messages.append({\"role\": \"assistant\", \"content\": conv[\"value\"]})\n",
	" return {\"text\": tokenizer.apply_chat_template(messages, tokenize=False, add_generation_prompt=False)}\n",
	"\n",
	"dataset = Dataset.from_list(training_data)\n",
	"dataset = dataset.map(format_example, remove_columns=dataset.column_names)\n",
	"\n",
	"print(f\"Dataset prepared with {len(dataset)} examples\")\n",
	"print(f\"\\nSample (truncated):\\n{dataset[0]['text'][:500]}...\")"
	],
	"metadata": {
	"id": "prepare_data"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"# Step 6: Train!\n",
	"from trl import SFTTrainer\n",
	"from transformers import TrainingArguments\n",
	"\n",
	"trainer = SFTTrainer(\n",
	" model=model,\n",
	" tokenizer=tokenizer,\n",
	" train_dataset=dataset,\n",
	" dataset_text_field=\"text\",\n",
	" max_seq_length=max_seq_length,\n",
	" args=TrainingArguments(\n",
	" output_dir=\"./design-lora\",\n",
	" per_device_train_batch_size=2,\n",
	" gradient_accumulation_steps=4,\n",
	" warmup_steps=10,\n",
	" num_train_epochs=3,\n",
	" learning_rate=2e-4,\n",
	" fp16=not torch.cuda.is_bf16_supported(),\n",
	" bf16=torch.cuda.is_bf16_supported(),\n",
	" logging_steps=1,\n",
	" save_strategy=\"epoch\",\n",
	" optim=\"adamw_8bit\",\n",
	" weight_decay=0.01,\n",
	" lr_scheduler_type=\"linear\",\n",
	" seed=42,\n",
	" ),\n",
	")\n",
	"\n",
	"print(\"Starting training...\")\n",
	"trainer_stats = trainer.train()\n",
	"\n",
	"print(f\"\\nTraining complete!\")\n",
	"print(f\"Training time: {trainer_stats.metrics['train_runtime'] / 60:.2f} minutes\")"
	],
	"metadata": {
	"id": "train"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"# Step 7: Test the fine-tuned model\n",
	"FastLanguageModel.for_inference(model)\n",
	"\n",
	"test_prompt = \"\"\"\n",
	"Generate a dashboard UI with sidebar navigation, 4 metrics cards, and a data table.\n",
	"\n",
	"Apply these design tokens:\n",
	"- Primary Color: #E07C4C\n",
	"- Background: #FAF8F5\n",
	"- Corners: rounded-2xl\n",
	"\n",
	"UI_PATTERN: dashboard\n",
	"\"\"\"\n",
	"\n",
	"messages = [\n",
	" {\"role\": \"system\", \"content\": \"You are an expert UI designer. Generate complete, self-contained HTML using ONLY Tailwind CSS utility classes.\"},\n",
	" {\"role\": \"user\", \"content\": test_prompt},\n",
	"]\n",
	"\n",
	"inputs = tokenizer.apply_chat_template(\n",
	" messages,\n",
	" tokenize=True,\n",
	" add_generation_prompt=True,\n",
	" return_tensors=\"pt\",\n",
	").to(\"cuda\")\n",
	"\n",
	"outputs = model.generate(\n",
	" input_ids=inputs,\n",
	" max_new_tokens=4096,\n",
	" temperature=0.7,\n",
	" top_p=0.9,\n",
	")\n",
	"\n",
	"response = tokenizer.decode(outputs[0], skip_special_tokens=True)\n",
	"print(\"Generated design (first 2000 chars):\")\n",
	"print(response[-2000:])"
	],
	"metadata": {
	"id": "test"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"# Step 8: Save and export\n",
	"\n",
	"# Save LoRA adapters\n",
	"model.save_pretrained(\"design-lora\")\n",
	"tokenizer.save_pretrained(\"design-lora\")\n",
	"print(\"LoRA adapters saved!\")\n",
	"\n",
	"# Save GGUF for Ollama (Q4_K_M quantization)\n",
	"print(\"\\nExporting to GGUF (this takes a few minutes)...\")\n",
	"model.save_pretrained_gguf(\n",
	" \"design-lora-gguf\",\n",
	" tokenizer,\n",
	" quantization_method=\"q4_k_m\"\n",
	")\n",
	"print(\"GGUF export complete!\")"
	],
	"metadata": {
	"id": "save"
	},
	"execution_count": null,
	"outputs": []
	},
	{
	"cell_type": "code",
	"source": [
	"# Step 9: Download the model\n",
	"from google.colab import files\n",
	"import shutil\n",
	"\n",
	"# Zip the GGUF model for download\n",
	"shutil.make_archive(\"design-lora-gguf\", \"zip\", \"design-lora-gguf\")\n",
	"\n",
	"print(\"Downloading GGUF model...\")\n",
	"files.download(\"design-lora-gguf.zip\")\n",
	"\n",
	"print(\"\\n\" + \"=\"*50)\n",
	"print(\"NEXT STEPS:\")\n",
	"print(\"=\"*50)\n",
	"print(\"1. Extract the downloaded zip file\")\n",
	"print(\"2. Copy the .gguf file to your Ollama models directory\")\n",
	"print(\"3. Create a Modelfile with:\")\n",
	"print(\" FROM ./design-lora-q4_k_m.gguf\")\n",
	"print(\"4. Run: ollama create design-coder -f Modelfile\")\n",
	"print(\"5. Update config.json to use 'design-coder'\")"
	],
	"metadata": {
	"id": "download"
	},
	"execution_count": null,
	"outputs": []
	}
	]
	}
No results found