Skip to content

Instantly share code, notes, and snippets.

View davidberenstein1957's full-sized avatar
🦦

David Berenstein davidberenstein1957

🦦
View GitHub Profile
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
@davidberenstein1957
davidberenstein1957 / script.py
Last active August 5, 2025 07:04
Compress and optimize FLUX to make it run faster
# /// script
# requires-python = ">=3.11,<3.12"
# dependencies = [
# "pruna",
# "pyarrow<20"
# ]
# ///
import torch
import gc
import shutil
@davidberenstein1957
davidberenstein1957 / optimize_qwen_with_pruna.py
Last active May 16, 2025 09:04
Optimize AI Models QWEN3 Inference
# /// script
# requires-python = ">=3.11,<3.12"
# dependencies = [
# "pruna"
# ]
# ///
from transformers import pipeline
from pruna import SmashConfig, smash
# /// script
# requires-python = ">=3.11,<3.12"
# dependencies = [
# "ai-gradio[together]"
# ]
# ///
import gradio as gr
import ai_gradio
# /// script
# requires-python = ">=3.11,<3.12"
# dependencies = [
# "distilabel[hf-transformers, hf-inference-endpoints]",
# ]
# ///
from distilabel.models import InferenceEndpointsLLM
from distilabel.pipeline import InstructionResponsePipeline
repo_id = "deepseek-ai/DeepSeek-R1-Distill-Qwen-1.5B"
# /// script
# requires-python = ">=3.11,<3.12"
# dependencies = [
# "distilabel[mlx]",
# ]
# ///
from distilabel.models import MlxLLM
from distilabel.pipeline import InstructionResponsePipeline
llm = MlxLLM(
@davidberenstein1957
davidberenstein1957 / vector_search_hub_datasets.py
Last active February 28, 2025 11:10
vector search on the Hugging Face Hub
# /// script
# requires-python = ">=3.11,<3.12"
# dependencies = [
# "duckdb", sentence-transformers, huggingface_hub
# ]
# ///
from sentence_transformers import SentenceTransformer
import duckdb
from huggingface_hub import get_token