Skip to content

Instantly share code, notes, and snippets.

@tin2tin
Created February 12, 2026 22:35
Show Gist options
  • Select an option

  • Save tin2tin/059917aef1a8e7f975fa9a19fb08fc41 to your computer and use it in GitHub Desktop.

Select an option

Save tin2tin/059917aef1a8e7f975fa9a19fb08fc41 to your computer and use it in GitHub Desktop.
LTX2 + single file transformer + distill lora + 2. step upscale/decode
import os
import torch
import gc
import numpy as np
from diffusers import (
LTX2Pipeline,
LTX2VideoTransformer3DModel,
GGUFQuantizationConfig,
LTX2LatentUpsamplePipeline,
FlowMatchEulerDiscreteScheduler
)
from diffusers.pipelines.ltx2.latent_upsampler import LTX2LatentUpsamplerModel
from diffusers.pipelines.ltx2.utils import STAGE_2_DISTILLED_SIGMA_VALUES
from diffusers.utils import export_to_video
from diffusers.pipelines.ltx2.export_utils import encode_video
# -----------------------------
# CONFIGURATION
# -----------------------------
# Unsloth's Q4_K_M version (~11.5GB)
# This fits easily in your 32GB RAM and 24GB VRAM
GGUF_URL = "https://huggingface.co/unsloth/LTX-2-GGUF/blob/main/ltx-2-19b-dev-Q4_K_M.gguf"
OFFICIAL_REPO = "Lightricks/LTX-2"
DEVICE = "cuda"
DTYPE = torch.bfloat16
PROMPT = "A beautiful sunset over the ocean, cinematic, 4k, highly detailed, photorealistic"
NEGATIVE_PROMPT = "shaky, glitchy, low quality, worst quality, deformed, distorted, disfigured, motion smear, motion artifacts, fused fingers, bad anatomy, weird hand, ugly, transition, static."
# Base Resolution
WIDTH = 768
HEIGHT = 512
FRAME_RATE = 24.0
NUM_FRAMES = 121
# -----------------------------
# 1. LOAD GGUF TRANSFORMER
# -----------------------------
print("--- Step 1: Loading Unsloth GGUF Transformer ---")
# Configure GGUF to load in bfloat16 to match the pipeline
quantization_config = GGUFQuantizationConfig(compute_dtype=DTYPE)
# Load the single file transformer (Auto-downloads if needed)
transformer = LTX2VideoTransformer3DModel.from_single_file(
GGUF_URL,
config=OFFICIAL_REPO,
subfolder="transformer",
quantization_config=quantization_config,
torch_dtype=DTYPE,
)
# -----------------------------
# 2. SETUP PIPELINE
# -----------------------------
print("--- Step 2: Setting up Base Pipeline ---")
pipe = LTX2Pipeline.from_pretrained(
OFFICIAL_REPO,
transformer=transformer, # Inject GGUF transformer
torch_dtype=DTYPE,
)
pipe.enable_model_cpu_offload(device=DEVICE)
# -----------------------------
# 3. STAGE 1: BASE LATENTS
# -----------------------------
print("--- Step 3: Generating Base Latents (Stage 1) ---")
video_latent, audio_latent = pipe(
prompt=PROMPT,
negative_prompt=NEGATIVE_PROMPT,
width=WIDTH,
height=HEIGHT,
num_frames=NUM_FRAMES,
frame_rate=FRAME_RATE,
num_inference_steps=40, # Standard steps for base
guidance_scale=4.0,
output_type="latent",
return_dict=False,
)
# Clear VRAM for Upscaler
torch.cuda.empty_cache()
# -----------------------------
# 4. UPSCALING
# -----------------------------
print("--- Step 4: Upscaling Latents ---")
# Load Upscaler
latent_upsampler = LTX2LatentUpsamplerModel.from_pretrained(
OFFICIAL_REPO,
subfolder="latent_upsampler",
torch_dtype=DTYPE,
)
upsample_pipe = LTX2LatentUpsamplePipeline(
vae=pipe.vae,
latent_upsampler=latent_upsampler
)
upsample_pipe.enable_model_cpu_offload(device=DEVICE)
# Run Upscale
upscaled_video_latent = upsample_pipe(
latents=video_latent,
output_type="latent",
return_dict=False,
)[0]
# Cleanup Upscaler to free memory for Refinement
del upsample_pipe
del latent_upsampler
gc.collect()
torch.cuda.empty_cache()
# -----------------------------
# 5. STAGE 2: DISTILLED REFINEMENT
# -----------------------------
print("--- Step 5: Loading Distilled LoRA (Stage 2) ---")
# Load Distilled LoRA weights onto the main pipe
pipe.load_lora_weights(
OFFICIAL_REPO,
adapter_name="stage_2_distilled",
weight_name="ltx-2-19b-distilled-lora-384.safetensors"
)
pipe.set_adapters("stage_2_distilled", 1.0)
# ⚠️ CRITICAL FOR 24GB VRAM: Enable Tiling
# Without this, decoding the high-res video will OOM your GPU
pipe.vae.enable_tiling()
# Swap Scheduler for Distilled Sigmas
new_scheduler = FlowMatchEulerDiscreteScheduler.from_config(
pipe.scheduler.config,
use_dynamic_shifting=False,
shift_terminal=None
)
pipe.scheduler = new_scheduler
print("--- Step 6: Running Refinement ---")
video, audio = pipe(
latents=upscaled_video_latent,
audio_latents=audio_latent,
prompt=PROMPT,
negative_prompt=NEGATIVE_PROMPT,
width=WIDTH * 2, # 1536
height=HEIGHT * 2, # 1024
num_inference_steps=3, # Distilled needs very few steps
sigmas=STAGE_2_DISTILLED_SIGMA_VALUES,
guidance_scale=1.0, # Guidance 1.0 is mandatory for distilled
output_type="np",
return_dict=False,
)
# -----------------------------
# 6. EXPORT
# -----------------------------
print("--- Saving Video ---")
video = (video * 255).round().astype("uint8")
video_tensor = torch.from_numpy(video)[0]
# Output path in C:\tmp to ensure write permissions or current blend dir
output_filename = r"C:\tmp\ltx2_unsloth_distilled.mp4"
if not os.path.exists(r"C:\tmp"):
os.makedirs(r"C:\tmp")
# Handle audio if present
if audio is not None and len(audio) > 0:
encode_video(
video_tensor,
fps=FRAME_RATE,
audio=audio[0].float().cpu(),
audio_sample_rate=pipe.vocoder.config.output_sampling_rate,
output_path=output_filename,
)
else:
export_to_video(video_tensor, output_filename, fps=FRAME_RATE)
print(f"DONE! Saved to {os.path.abspath(output_filename)}")
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment