Skip to content

Instantly share code, notes, and snippets.

@koistya
Created February 11, 2026 18:20
Show Gist options
  • Select an option

  • Save koistya/aa9252b98ad13c08b3f072abb4bba287 to your computer and use it in GitHub Desktop.

Select an option

Save koistya/aa9252b98ad13c08b3f072abb4bba287 to your computer and use it in GitHub Desktop.
<!DOCTYPE html>
<html lang="en">
<body>
<h1>Browser LLM Demo</h1>
<div id="output">Loading model...</div>
<script type="module">
// 1. Import the library from CDN
import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0';
// 2. Define an async function to load the model
async function runModel() {
const outputDiv = document.getElementById('output');
// 3. Load a specific web-optimized model
// We use a small model (SmolLM) quantized for the web
const generator = await pipeline(
'text-generation',
'HuggingFaceTB/SmolLM-135M-Instruct',
{
dtype: 'q4', // Load 4-bit quantized version (smaller/faster)
device: 'webgpu', // Use 'webgpu' for speed, or 'wasm' for CPU compatibility
}
);
outputDiv.textContent = "Model loaded! Generating text...";
// 4. Run inference
const messages = [
{ role: "system", content: "You are a helpful assistant." },
{ role: "user", content: "Write a haiku about coding." },
];
const output = await generator(messages, { max_new_tokens: 50 });
// 5. Display result
outputDiv.textContent = output[0].generated_text.at(-1).content;
}
runModel();
</script>
</body>
</html>
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment