sdemontfort · January 31, 2026 09:39 · sdemontfort · Jan 31, 2026
diff --git a/cursor-proxy.js b/cursor-proxy.js
 /**
 * Cursor → Local LLM compatibility proxy
 *
 * - Public HTTPS via ngrok (or other tunnel)
 * - Forces Cursor Agent tool usage
 * - Converts full JSON completions into OpenAI-style SSE deltas
 * - Strips Cursor tool overload
 *
 * Tested with:
 * - Cursor Agent
 * - LM Studio
 * - Qwen2.5-Coder
 */

 const express = require("express");

 const app = express();
 app.use(express.json({ limit: "50mb" }));

 // ================= CONFIG =================

 const PORT = process.env.PORT || 3333;

 // IMPORTANT:
 // This should be reachable FROM THIS MACHINE.
 // Prefer LAN IP if possible (no ngrok on backend hop).
 const UPSTREAM_CHAT_COMPLETIONS = `${process.env.UPSTREAM_BASE_URL}/v1/chat/completions`

 // Only keep tools that actually modify files
 const ALLOWED_TOOLS = new Set([
  "Write",
  "StrReplace",
  "Delete",
 ]);

 // Hard system coercion — this matters
 const HARD_SYSTEM_PROMPT = `
 You are a file-editing agent.

 STRICT RULES:
 - If the user asks to create a file, you MUST call Write.
 - If the user asks to modify a file, you MUST call StrReplace or Write.
 - Do NOT explain.
 - Do NOT ask questions.
 - Do NOT output prose or markdown.
 - ONLY respond via tool calls when writing files.
 `.trim();

 // ==========================================

 app.post("/v1/chat/completions", async (req, res) => {
  try {
    // Clone body safely
    const body = JSON.parse(JSON.stringify(req.body));

    // Cursor Agent ALWAYS expects streaming,
    // so we ignore body.stream and always return SSE
    body.stream = false;
    delete body.stream_options;

    // Strip tool list down to mutation-only tools
    if (Array.isArray(body.tools)) {
      body.tools = body.tools.filter(
        (t) =>
          t &&
          t.function &&
          ALLOWED_TOOLS.has(t.function.name)
      );
    }

    // Inject hard system prompt at the front
    body.messages = [
      { role: "system", content: HARD_SYSTEM_PROMPT },
      ...(body.messages || []),
    ];

    // Forward to upstream LLM
    const upstream = await fetch(UPSTREAM_CHAT_COMPLETIONS, {
      method: "POST",
      headers: {
        "Content-Type": "application/json",
      },
      body: JSON.stringify(body),
    });

    const text = await upstream.text();

    // Parse upstream JSON completion
    let completion;
    try {
      completion = JSON.parse(text);
    } catch (err) {
      console.error("Upstream returned non-JSON:", text);
      throw err;
    }

    // ================= SSE RESPONSE =================

    // Cursor REQUIRES streaming deltas
    res.status(200);
    res.setHeader("Content-Type", "text/event-stream; charset=utf-8");
    res.setHeader("Cache-Control", "no-cache, no-transform");
    res.setHeader("Connection", "keep-alive");

    const message = completion?.choices?.[0]?.message;

    // Emit content delta (chat text)
    if (message?.content) {
      res.write(
        `data: ${JSON.stringify({
          choices: [{ delta: { content: message.content } }],
        })}\n\n`
      );
    }

    // Emit tool_calls delta (this is what triggers file edits)
    if (Array.isArray(message?.tool_calls) && message.tool_calls.length > 0) {
      res.write(
        `data: ${JSON.stringify({
          choices: [{ delta: { tool_calls: message.tool_calls } }],
        })}\n\n`
      );
    }

    // Required terminator
    res.write("data: [DONE]\n\n");
    res.end();
  } catch (err) {
    console.error("Proxy error:", err);

    // Cursor expects SSE even on error
    res.status(200);
    res.setHeader("Content-Type", "text/event-stream; charset=utf-8");
    res.write(
      `data: ${JSON.stringify({
        choices: [{ delta: { content: "Provider error" } }],
      })}\n\n`
    );
    res.write("data: [DONE]\n\n");
    res.end();
  }
 });

 // Start server
 app.listen(PORT, () => {
  console.log(`🧠 Cursor proxy listening on http://localhost:${PORT}`);
  console.log(`➡️  Upstream LLM: ${UPSTREAM_CHAT_COMPLETIONS}`);
 });
	/**
	* Cursor → Local LLM compatibility proxy
	*
	* - Public HTTPS via ngrok (or other tunnel)
	* - Forces Cursor Agent tool usage
	* - Converts full JSON completions into OpenAI-style SSE deltas
	* - Strips Cursor tool overload
	*
	* Tested with:
	* - Cursor Agent
	* - LM Studio
	* - Qwen2.5-Coder
	*/

	const express = require("express");

	const app = express();
	app.use(express.json({ limit: "50mb" }));

	// ================= CONFIG =================

	const PORT = process.env.PORT \|\| 3333;

	// IMPORTANT:
	// This should be reachable FROM THIS MACHINE.
	// Prefer LAN IP if possible (no ngrok on backend hop).
	const UPSTREAM_CHAT_COMPLETIONS = `${process.env.UPSTREAM_BASE_URL}/v1/chat/completions`

	// Only keep tools that actually modify files
	const ALLOWED_TOOLS = new Set([
	"Write",
	"StrReplace",
	"Delete",
	]);

	// Hard system coercion — this matters
	const HARD_SYSTEM_PROMPT = `
	You are a file-editing agent.

	STRICT RULES:
	- If the user asks to create a file, you MUST call Write.
	- If the user asks to modify a file, you MUST call StrReplace or Write.
	- Do NOT explain.
	- Do NOT ask questions.
	- Do NOT output prose or markdown.
	- ONLY respond via tool calls when writing files.
	`.trim();

	// ==========================================

	app.post("/v1/chat/completions", async (req, res) => {
	try {
	// Clone body safely
	const body = JSON.parse(JSON.stringify(req.body));

	// Cursor Agent ALWAYS expects streaming,
	// so we ignore body.stream and always return SSE
	body.stream = false;
	delete body.stream_options;

	// Strip tool list down to mutation-only tools
	if (Array.isArray(body.tools)) {
	body.tools = body.tools.filter(
	(t) =>
	t &&
	t.function &&
	ALLOWED_TOOLS.has(t.function.name)
	);
	}

	// Inject hard system prompt at the front
	body.messages = [
	{ role: "system", content: HARD_SYSTEM_PROMPT },
	...(body.messages \|\| []),
	];

	// Forward to upstream LLM
	const upstream = await fetch(UPSTREAM_CHAT_COMPLETIONS, {
	method: "POST",
	headers: {
	"Content-Type": "application/json",
	},
	body: JSON.stringify(body),
	});

	const text = await upstream.text();

	// Parse upstream JSON completion
	let completion;
	try {
	completion = JSON.parse(text);
	} catch (err) {
	console.error("Upstream returned non-JSON:", text);
	throw err;
	}

	// ================= SSE RESPONSE =================

	// Cursor REQUIRES streaming deltas
	res.status(200);
	res.setHeader("Content-Type", "text/event-stream; charset=utf-8");
	res.setHeader("Cache-Control", "no-cache, no-transform");
	res.setHeader("Connection", "keep-alive");

	const message = completion?.choices?.[0]?.message;

	// Emit content delta (chat text)
	if (message?.content) {
	res.write(
	`data: ${JSON.stringify({
	choices: [{ delta: { content: message.content } }],
	})}\n\n`
	);
	}

	// Emit tool_calls delta (this is what triggers file edits)
	if (Array.isArray(message?.tool_calls) && message.tool_calls.length > 0) {
	res.write(
	`data: ${JSON.stringify({
	choices: [{ delta: { tool_calls: message.tool_calls } }],
	})}\n\n`
	);
	}

	// Required terminator
	res.write("data: [DONE]\n\n");
	res.end();
	} catch (err) {
	console.error("Proxy error:", err);

	// Cursor expects SSE even on error
	res.status(200);
	res.setHeader("Content-Type", "text/event-stream; charset=utf-8");
	res.write(
	`data: ${JSON.stringify({
	choices: [{ delta: { content: "Provider error" } }],
	})}\n\n`
	);
	res.write("data: [DONE]\n\n");
	res.end();
	}
	});

	// Start server
	app.listen(PORT, () => {
	console.log(`🧠 Cursor proxy listening on http://localhost:${PORT}`);
	console.log(`➡️ Upstream LLM: ${UPSTREAM_CHAT_COMPLETIONS}`);
	});
No results found