koistya · February 11, 2026 18:20
diff --git a/index.html b/index.html
 <!DOCTYPE html>
 <html lang="en">
 <body>
    <h1>Browser LLM Demo</h1>
    <div id="output">Loading model...</div>

    <script type="module">
        // 1. Import the library from CDN
        import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0';

        // 2. Define an async function to load the model
        async function runModel() {
            const outputDiv = document.getElementById('output');
            
            // 3. Load a specific web-optimized model
            // We use a small model (SmolLM) quantized for the web
            const generator = await pipeline(
                'text-generation', 
                'HuggingFaceTB/SmolLM-135M-Instruct',
                {
                    dtype: 'q4', // Load 4-bit quantized version (smaller/faster)
                    device: 'webgpu', // Use 'webgpu' for speed, or 'wasm' for CPU compatibility
                }
            );

            outputDiv.textContent = "Model loaded! Generating text...";

            // 4. Run inference
            const messages = [
                { role: "system", content: "You are a helpful assistant." },
                { role: "user", content: "Write a haiku about coding." },
            ];

            const output = await generator(messages, { max_new_tokens: 50 });
            
            // 5. Display result
            outputDiv.textContent = output[0].generated_text.at(-1).content;
        }

        runModel();
    </script>
 </body>
 </html>
	<!DOCTYPE html>
	<html lang="en">
	<body>
	<h1>Browser LLM Demo</h1>
	<div id="output">Loading model...</div>

	<script type="module">
	// 1. Import the library from CDN
	import { pipeline } from 'https://cdn.jsdelivr.net/npm/@huggingface/transformers@3.0.0';

	// 2. Define an async function to load the model
	async function runModel() {
	const outputDiv = document.getElementById('output');

	// 3. Load a specific web-optimized model
	// We use a small model (SmolLM) quantized for the web
	const generator = await pipeline(
	'text-generation',
	'HuggingFaceTB/SmolLM-135M-Instruct',
	{
	dtype: 'q4', // Load 4-bit quantized version (smaller/faster)
	device: 'webgpu', // Use 'webgpu' for speed, or 'wasm' for CPU compatibility
	}
	);

	outputDiv.textContent = "Model loaded! Generating text...";

	// 4. Run inference
	const messages = [
	{ role: "system", content: "You are a helpful assistant." },
	{ role: "user", content: "Write a haiku about coding." },
	];

	const output = await generator(messages, { max_new_tokens: 50 });

	// 5. Display result
	outputDiv.textContent = output[0].generated_text.at(-1).content;
	}

	runModel();
	</script>
	</body>
	</html>
No results found