Created
December 23, 2025 11:40
-
-
Save duarteocarmo/58fda6d982fb6d9dd5bfb561214a77ce to your computer and use it in GitHub Desktop.
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| { | |
| "cells": [ | |
| { | |
| "cell_type": "code", | |
| "execution_count": 1, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "import polars\n", | |
| "import jupyter_black\n", | |
| "import dspy\n", | |
| "from dspy.evaluate import Evaluate\n", | |
| "import typing as t\n", | |
| "import random\n", | |
| "import matplotlib.pyplot as plt\n", | |
| "import os\n", | |
| "from openai import OpenAI\n", | |
| "from pydantic import BaseModel\n", | |
| "\n", | |
| "jupyter_black.load()\n", | |
| "\n", | |
| "\n", | |
| "DATASET_NAME = \"../datasets/curated/golden_dataset_reviewed_v3.json\"\n", | |
| "MAX_LABELED_DEMOS = 4\n", | |
| "MAX_BOOTSTRAPPED_DEMOS = 4\n", | |
| "NUM_THREADS = 32" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 2, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "class EvalResults:\n", | |
| " def __init__(self, metric_name: str = \"Calories accuracy @ 10%\"):\n", | |
| " self.results = {}\n", | |
| " self.metric_name = metric_name\n", | |
| "\n", | |
| " def add_result(self, name, result):\n", | |
| " self.results[name] = result\n", | |
| "\n", | |
| " def print_results(self):\n", | |
| " sorted_results = sorted(self.results.items(), key=lambda x: x[1], reverse=True)\n", | |
| " for name, score in sorted_results:\n", | |
| " print(f\"{name}: {score}\")\n", | |
| "\n", | |
| " def plot_results(self):\n", | |
| " # Remove models with 0 score for clarity\n", | |
| " valid_results = [\n", | |
| " (name, score) for name, score in self.results.items() if score > 0\n", | |
| " ]\n", | |
| " if not valid_results:\n", | |
| " print(\"No valid results to plot.\")\n", | |
| " return\n", | |
| "\n", | |
| " valid_results.sort(key=lambda x: x[1], reverse=True)\n", | |
| " names, scores = zip(*valid_results)\n", | |
| "\n", | |
| " plt.figure(figsize=(10, 6))\n", | |
| " plt.bar(names, scores, color=\"steelblue\")\n", | |
| " plt.ylabel(\"Score\")\n", | |
| " plt.xlabel(\"Model\")\n", | |
| " plt.title(self.metric_name)\n", | |
| " plt.xticks(rotation=35, ha=\"right\")\n", | |
| " plt.ylim(0, max(scores) * 1.1)\n", | |
| " plt.grid(True, axis=\"y\", linestyle=\"--\", alpha=0.7)\n", | |
| " plt.tight_layout()\n", | |
| " plt.show()\n", | |
| "\n", | |
| "\n", | |
| "er = EvalResults()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Get train validation dataset" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div><style>\n", | |
| ".dataframe > thead > tr,\n", | |
| ".dataframe > tbody > tr {\n", | |
| " text-align: right;\n", | |
| " white-space: pre-wrap;\n", | |
| "}\n", | |
| "</style>\n", | |
| "<small>shape: (107, 4)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>input</th><th>total_calories</th><th>food_groups</th><th>source</th></tr><tr><td>str</td><td>f64</td><td>list[str]</td><td>str</td></tr></thead><tbody><tr><td>"300 g of pasta with butter and…</td><td>618.0</td><td>["dairy", "grain"]</td><td>"golden_dataset"</td></tr><tr><td>"250 g of pasta with cherry tom…</td><td>468.0</td><td>["vegetable", "grain", "dairy"]</td><td>"golden_dataset"</td></tr><tr><td>"200 g of pasta with pesto and …</td><td>430.0</td><td>["vegetable", "grain", "dairy"]</td><td>"golden_dataset"</td></tr><tr><td>"100 g of pasta"</td><td>158.0</td><td>["grain"]</td><td>"golden_dataset"</td></tr><tr><td>"2 toasts with cheese and butte…</td><td>388.0</td><td>["vegetable", "dairy", "grain"]</td><td>"golden_dataset"</td></tr><tr><td>…</td><td>…</td><td>…</td><td>…</td></tr><tr><td>"For lunch, I'm having an Engli…</td><td>406.69</td><td>["grain", "dairy", … "fruit"]</td><td>"nutribench"</td></tr><tr><td>"I enjoyed a snack that include…</td><td>60.0</td><td>["fruit"]</td><td>"nutribench"</td></tr><tr><td>"I made myself a quick lunch wi…</td><td>153.87</td><td>["meat and alternatives", "grain"]</td><td>"nutribench"</td></tr><tr><td>"I’ve got 507 grams of bottled …</td><td>71.5</td><td>["meat and alternatives"]</td><td>"nutribench"</td></tr><tr><td>"For breakfast, I'm having 190g…</td><td>1542.0</td><td>["grain", "dairy", "meat and alternatives"]</td><td>"nutribench"</td></tr></tbody></table></div>" | |
| ], | |
| "text/plain": [ | |
| "shape: (107, 4)\n", | |
| "┌────────────────────────────────┬────────────────┬───────────────────────────────┬────────────────┐\n", | |
| "│ input ┆ total_calories ┆ food_groups ┆ source │\n", | |
| "│ --- ┆ --- ┆ --- ┆ --- │\n", | |
| "│ str ┆ f64 ┆ list[str] ┆ str │\n", | |
| "╞════════════════════════════════╪════════════════╪═══════════════════════════════╪════════════════╡\n", | |
| "│ 300 g of pasta with butter ┆ 618.0 ┆ [\"dairy\", \"grain\"] ┆ golden_dataset │\n", | |
| "│ and… ┆ ┆ ┆ │\n", | |
| "│ 250 g of pasta with cherry ┆ 468.0 ┆ [\"vegetable\", \"grain\", ┆ golden_dataset │\n", | |
| "│ tom… ┆ ┆ \"dairy\"… ┆ │\n", | |
| "│ 200 g of pasta with pesto and ┆ 430.0 ┆ [\"vegetable\", \"grain\", ┆ golden_dataset │\n", | |
| "│ … ┆ ┆ \"dairy\"… ┆ │\n", | |
| "│ 100 g of pasta ┆ 158.0 ┆ [\"grain\"] ┆ golden_dataset │\n", | |
| "│ 2 toasts with cheese and ┆ 388.0 ┆ [\"vegetable\", \"dairy\", ┆ golden_dataset │\n", | |
| "│ butte… ┆ ┆ \"grain\"… ┆ │\n", | |
| "│ … ┆ … ┆ … ┆ … │\n", | |
| "│ For lunch, I'm having an ┆ 406.69 ┆ [\"grain\", \"dairy\", … \"fruit\"] ┆ nutribench │\n", | |
| "│ Engli… ┆ ┆ ┆ │\n", | |
| "│ I enjoyed a snack that ┆ 60.0 ┆ [\"fruit\"] ┆ nutribench │\n", | |
| "│ include… ┆ ┆ ┆ │\n", | |
| "│ I made myself a quick lunch ┆ 153.87 ┆ [\"meat and alternatives\", ┆ nutribench │\n", | |
| "│ wi… ┆ ┆ \"gra… ┆ │\n", | |
| "│ I’ve got 507 grams of bottled ┆ 71.5 ┆ [\"meat and alternatives\"] ┆ nutribench │\n", | |
| "│ … ┆ ┆ ┆ │\n", | |
| "│ For breakfast, I'm having ┆ 1542.0 ┆ [\"grain\", \"dairy\", \"meat and ┆ nutribench │\n", | |
| "│ 190g… ┆ ┆ a… ┆ │\n", | |
| "└────────────────────────────────┴────────────────┴───────────────────────────────┴────────────────┘" | |
| ] | |
| }, | |
| "execution_count": 3, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "dataset = polars.read_json(DATASET_NAME)\n", | |
| "dataset" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "[{'input': \"For dinner, I'm having 25 grams of bread, 150 grams of chicken wings, and a 250-gram mixed vegetable salad.\",\n", | |
| " 'total_calories': 633.0,\n", | |
| " 'food_groups': ['grain', 'meat and alternatives', 'vegetable'],\n", | |
| " 'source': 'nutribench'},\n", | |
| " {'input': 'I enjoyed 200 grams of tea with sugar along with 230 grams of coconut milk rice for breakfast.',\n", | |
| " 'total_calories': 558.0,\n", | |
| " 'food_groups': ['grain', 'fruit'],\n", | |
| " 'source': 'nutribench'},\n", | |
| " {'input': 'stracciatella with confit grapes and 2 little focaccia pieces',\n", | |
| " 'total_calories': 350.0,\n", | |
| " 'food_groups': ['fruit', 'dairy', 'grain'],\n", | |
| " 'source': 'golden_dataset'}]" | |
| ] | |
| }, | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "dataset.sample(3).to_dicts()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 4, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "len(dataset)=107\n", | |
| "len(trainset)=53\n", | |
| "len(testset)=54\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "dataset_list = dataset.to_dicts()\n", | |
| "random.Random(42).shuffle(dataset_list) # make deterministic\n", | |
| "n_dataset = len(dataset_list)\n", | |
| "train = 0.50\n", | |
| "val = 1 - train\n", | |
| "\n", | |
| "trainset = [\n", | |
| " dspy.Example(\n", | |
| " food_description=item[\"input\"],\n", | |
| " food_groups=item[\"food_groups\"],\n", | |
| " total_calories=item[\"total_calories\"],\n", | |
| " source=item[\"source\"],\n", | |
| " ).with_inputs(\"food_description\")\n", | |
| " for item in dataset_list[: int(n_dataset * train)]\n", | |
| "]\n", | |
| "\n", | |
| "testset = [\n", | |
| " dspy.Example(\n", | |
| " food_description=item[\"input\"],\n", | |
| " food_groups=item[\"food_groups\"],\n", | |
| " total_calories=item[\"total_calories\"],\n", | |
| " source=item[\"source\"],\n", | |
| " ).with_inputs(\"food_description\")\n", | |
| " for item in dataset_list[int(n_dataset * train) :]\n", | |
| "]\n", | |
| "print(f\"{len(dataset)=}\")\n", | |
| "print(f\"{len(trainset)=}\")\n", | |
| "print(f\"{len(testset)=}\")" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/plain": [ | |
| "Example({'food_description': 'For lunch, I have a serving of 57.5g stewed common seabream, 49g of boiled matabala, and 67.1g of rice with vegetables.', 'food_groups': ['meat and alternatives', 'vegetable', 'grain'], 'total_calories': 124.0, 'source': 'nutribench'}) (input_keys={'food_description'})" | |
| ] | |
| }, | |
| "execution_count": 5, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "trainset[0]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Current model " | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 6, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "class FoodItem(dspy.Signature):\n", | |
| " \"\"\"\n", | |
| " A food item.\n", | |
| "\n", | |
| " Notes:\n", | |
| " - The quantity is the amount of the food item in the user's description.\n", | |
| " - quantity multiplied by calories should result in a reasonable value!\n", | |
| "\n", | |
| " \"\"\"\n", | |
| "\n", | |
| " name: str = dspy.OutputField(description=\"The name of the food item\")\n", | |
| " quantity: float = dspy.OutputField(description=\"The quantity of the food item\")\n", | |
| " calories: float = dspy.OutputField(\n", | |
| " description=\"Calories in kilocalories (kcal) for a single unit of the food item\"\n", | |
| " )\n", | |
| " carbs: float = dspy.OutputField(\n", | |
| " description=\"Carbohydrates in grams for a single unit of the food item\"\n", | |
| " )\n", | |
| " fat: float = dspy.OutputField(\n", | |
| " description=\"Fat in grams for a single unit of the food item\"\n", | |
| " )\n", | |
| " protein: float = dspy.OutputField(\n", | |
| " description=\"Protein in grams for a single unit of the food item\"\n", | |
| " )\n", | |
| " fiber: float = dspy.OutputField(\n", | |
| " description=\"Fiber in grams for a single unit of the food item\"\n", | |
| " )\n", | |
| " food_groups: list[\n", | |
| " t.Literal[\"dairy\", \"meat and alternatives\", \"grain\", \"fruit\", \"vegetable\"]\n", | |
| " ] = dspy.OutputField(\n", | |
| " default_factory=list,\n", | |
| " description=\"The food groups to which the food item belongs\",\n", | |
| " )\n", | |
| "\n", | |
| "\n", | |
| "class NutritionAnalysis(dspy.Signature):\n", | |
| " \"\"\"Nutritional analysis\"\"\"\n", | |
| "\n", | |
| " food_description: str = dspy.InputField(\n", | |
| " description=\"The description of the food item\"\n", | |
| " )\n", | |
| "\n", | |
| " food_items: list[FoodItem] = dspy.OutputField(\n", | |
| " description=\"A list of food items with their nutritional information\"\n", | |
| " )\n", | |
| "\n", | |
| " def total_calories(self):\n", | |
| " return sum(item.calories * item.quantity for item in self.food_items)\n", | |
| "\n", | |
| " def total_food_groups(self):\n", | |
| " food_groups = []\n", | |
| " for item in self.food_items:\n", | |
| " food_groups.extend(item.food_groups)\n", | |
| "\n", | |
| " return len(list(set(food_groups)))\n", | |
| "\n", | |
| "\n", | |
| "module_vanilla = dspy.Predict(NutritionAnalysis)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Define metric" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 7, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [ | |
| "def eval_metric(\n", | |
| " example, pred, trace=None, pred_name=None, pred_trace=None\n", | |
| ") -> dspy.Prediction:\n", | |
| "\n", | |
| " try:\n", | |
| " na = NutritionAnalysis(\n", | |
| " **pred.toDict(), food_description=example.food_description\n", | |
| " )\n", | |
| " except Exception as e:\n", | |
| " score = 0\n", | |
| " feedback_text = f\"INCORRECT: Your output format was incorrect: {e}\"\n", | |
| " return dspy.Prediction(score=score, feedback=feedback_text)\n", | |
| "\n", | |
| " total_calories_example = example.total_calories\n", | |
| " total_calories_predicted = na.total_calories()\n", | |
| "\n", | |
| " if total_calories_predicted > 5000:\n", | |
| " score = 0\n", | |
| " feedback_text = f\"INCORRECT: Total calories is too high (yours: {total_calories_predicted}, correct: {total_calories_example})\"\n", | |
| " return dspy.Prediction(score=score, feedback=feedback_text)\n", | |
| "\n", | |
| " within_threshold = abs(total_calories_example - total_calories_predicted) <= abs(\n", | |
| " 0.1 * total_calories_example\n", | |
| " )\n", | |
| "\n", | |
| " if not within_threshold:\n", | |
| " score = 0\n", | |
| " feedback_text = f\"INCORRECT: Your answer was not within 10% of the correct answer (yours: {total_calories_predicted}, correct: {total_calories_example})\"\n", | |
| " return dspy.Prediction(score=score, feedback=feedback_text)\n", | |
| "\n", | |
| " feedback_text = f\"CORRECT: Your answer was within 10% of the correct answer (yours: {total_calories_predicted}, correct: {total_calories_example})\"\n", | |
| " score = 1\n", | |
| "\n", | |
| " return dspy.Prediction(score=score, feedback=feedback_text)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "# Eval vanilla" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 8, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 16.00 / 54 (29.6%): 100%|██████████| 54/54 [00:00<00:00, 370.10it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:17 INFO dspy.evaluate.evaluate: Average Metric: 16.0 / 54 (29.6%)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "EvaluationResult(score=29.63, results=<list of 54 results>)\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "gemini_2_5_flash = dspy.LM(\"openrouter/google/gemini-2.5-flash\", temperature=0.0)\n", | |
| "dspy.configure(lm=gemini_2_5_flash)\n", | |
| "\n", | |
| "evaluate = dspy.Evaluate(\n", | |
| " devset=testset,\n", | |
| " metric=eval_metric,\n", | |
| " num_threads=NUM_THREADS,\n", | |
| " display_progress=True,\n", | |
| " display_table=False,\n", | |
| " max_errors=5,\n", | |
| ")\n", | |
| "with dspy.context(lm=gemini_2_5_flash):\n", | |
| " score = evaluate(module_vanilla)\n", | |
| "er.add_result(\"vanilla_gemini_2.5_flash\", score.score)\n", | |
| "print(score)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Eval current" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 9, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 24.00 / 54 (44.4%): 100%|██████████| 54/54 [00:00<00:00, 818.99it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:17 INFO dspy.evaluate.evaluate: Average Metric: 24.0 / 54 (44.4%)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "EvaluationResult(score=44.44, results=<list of 54 results>)\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "module_current = dspy.Predict(NutritionAnalysis)\n", | |
| "module_current.load(\"../optimized_prompts/bootstrap_fewshot_random_search.json\")\n", | |
| "\n", | |
| "with dspy.context(lm=gemini_2_5_flash):\n", | |
| " current_score = evaluate(module_current)\n", | |
| " er.add_result(\"optimized_gemini_2.5_flash\", current_score.score)\n", | |
| " print(current_score)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 10, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "optimized_gemini_2.5_flash: 44.44\n", | |
| "vanilla_gemini_2.5_flash: 29.63\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "er.print_results()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 11, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Going to sample between 1 and 4 traces per predictor.\n", | |
| "Will attempt to bootstrap 16 candidate sets.\n", | |
| "Average Metric: 11.00 / 53 (20.8%): 100%|██████████| 53/53 [00:00<00:00, 478.72it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:17 INFO dspy.evaluate.evaluate: Average Metric: 11.0 / 53 (20.8%)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "New best score: 20.75 for seed -3\n", | |
| "Scores so far: [20.75]\n", | |
| "Best score so far: 20.75\n", | |
| "Average Metric: 11.00 / 53 (20.8%): 100%|██████████| 53/53 [00:00<00:00, 1184.38it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:17 INFO dspy.evaluate.evaluate: Average Metric: 11.0 / 53 (20.8%)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Scores so far: [20.75, 20.75]\n", | |
| "Best score so far: 20.75\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| " 8%|▊ | 4/53 [00:00<00:00, 121.91it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Bootstrapped 4 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 11.00 / 53 (20.8%): 100%|██████████| 53/53 [00:00<00:00, 427.75it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:17 INFO dspy.evaluate.evaluate: Average Metric: 11.0 / 53 (20.8%)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Scores so far: [20.75, 20.75, 20.75]\n", | |
| "Best score so far: 20.75\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| " 8%|▊ | 4/53 [00:00<00:00, 158.47it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Bootstrapped 4 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 11.00 / 53 (20.8%): 100%|██████████| 53/53 [00:00<00:00, 466.36it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:17 INFO dspy.evaluate.evaluate: Average Metric: 11.0 / 53 (20.8%)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Scores so far: [20.75, 20.75, 20.75, 20.75]\n", | |
| "Best score so far: 20.75\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| " 4%|▍ | 2/53 [00:00<00:00, 138.59it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 12.00 / 53 (22.6%): 100%|██████████| 53/53 [00:00<00:00, 472.61it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:17 INFO dspy.evaluate.evaluate: Average Metric: 12.0 / 53 (22.6%)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "New best score: 22.64 for seed 1\n", | |
| "Scores so far: [20.75, 20.75, 20.75, 20.75, 22.64]\n", | |
| "Best score so far: 22.64\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| " 2%|▏ | 1/53 [00:00<00:00, 85.38it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 11.00 / 53 (20.8%): 100%|██████████| 53/53 [00:00<00:00, 470.15it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:18 INFO dspy.evaluate.evaluate: Average Metric: 11.0 / 53 (20.8%)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Scores so far: [20.75, 20.75, 20.75, 20.75, 22.64, 20.75]\n", | |
| "Best score so far: 22.64\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| " 4%|▍ | 2/53 [00:00<00:00, 130.88it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 13.00 / 53 (24.5%): 100%|██████████| 53/53 [00:00<00:00, 530.43it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:18 INFO dspy.evaluate.evaluate: Average Metric: 13.0 / 53 (24.5%)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "New best score: 24.53 for seed 3\n", | |
| "Scores so far: [20.75, 20.75, 20.75, 20.75, 22.64, 20.75, 24.53]\n", | |
| "Best score so far: 24.53\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| " 4%|▍ | 2/53 [00:00<00:00, 141.61it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 16.00 / 53 (30.2%): 100%|██████████| 53/53 [00:00<00:00, 486.23it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:18 INFO dspy.evaluate.evaluate: Average Metric: 16.0 / 53 (30.2%)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "New best score: 30.19 for seed 4\n", | |
| "Scores so far: [20.75, 20.75, 20.75, 20.75, 22.64, 20.75, 24.53, 30.19]\n", | |
| "Best score so far: 30.19\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| " 6%|▌ | 3/53 [00:00<00:00, 159.05it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 12.00 / 53 (22.6%): 100%|██████████| 53/53 [00:00<00:00, 469.03it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:18 INFO dspy.evaluate.evaluate: Average Metric: 12.0 / 53 (22.6%)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Scores so far: [20.75, 20.75, 20.75, 20.75, 22.64, 20.75, 24.53, 30.19, 22.64]\n", | |
| "Best score so far: 30.19\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| " 2%|▏ | 1/53 [00:00<00:00, 117.38it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 11.00 / 53 (20.8%): 100%|██████████| 53/53 [00:00<00:00, 467.17it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:18 INFO dspy.evaluate.evaluate: Average Metric: 11.0 / 53 (20.8%)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Scores so far: [20.75, 20.75, 20.75, 20.75, 22.64, 20.75, 24.53, 30.19, 22.64, 20.75]\n", | |
| "Best score so far: 30.19\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| " 6%|▌ | 3/53 [00:00<00:00, 146.51it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 8.00 / 53 (15.1%): 100%|██████████| 53/53 [00:00<00:00, 438.31it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:18 INFO dspy.evaluate.evaluate: Average Metric: 8.0 / 53 (15.1%)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Scores so far: [20.75, 20.75, 20.75, 20.75, 22.64, 20.75, 24.53, 30.19, 22.64, 20.75, 15.09]\n", | |
| "Best score so far: 30.19\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| " 4%|▍ | 2/53 [00:00<00:00, 109.83it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 14.00 / 53 (26.4%): 100%|██████████| 53/53 [00:00<00:00, 475.17it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:19 INFO dspy.evaluate.evaluate: Average Metric: 14.0 / 53 (26.4%)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Scores so far: [20.75, 20.75, 20.75, 20.75, 22.64, 20.75, 24.53, 30.19, 22.64, 20.75, 15.09, 26.42]\n", | |
| "Best score so far: 30.19\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| " 8%|▊ | 4/53 [00:00<00:00, 131.02it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Bootstrapped 4 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 10.00 / 53 (18.9%): 100%|██████████| 53/53 [00:00<00:00, 402.91it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:19 INFO dspy.evaluate.evaluate: Average Metric: 10.0 / 53 (18.9%)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Scores so far: [20.75, 20.75, 20.75, 20.75, 22.64, 20.75, 24.53, 30.19, 22.64, 20.75, 15.09, 26.42, 18.87]\n", | |
| "Best score so far: 30.19\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| " 2%|▏ | 1/53 [00:00<00:00, 117.17it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 13.00 / 53 (24.5%): 100%|██████████| 53/53 [00:00<00:00, 283.57it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:19 INFO dspy.evaluate.evaluate: Average Metric: 13.0 / 53 (24.5%)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Scores so far: [20.75, 20.75, 20.75, 20.75, 22.64, 20.75, 24.53, 30.19, 22.64, 20.75, 15.09, 26.42, 18.87, 24.53]\n", | |
| "Best score so far: 30.19\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| " 8%|▊ | 4/53 [00:00<00:00, 161.78it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Bootstrapped 4 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 15.00 / 53 (28.3%): 100%|██████████| 53/53 [00:00<00:00, 437.02it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:19 INFO dspy.evaluate.evaluate: Average Metric: 15.0 / 53 (28.3%)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Scores so far: [20.75, 20.75, 20.75, 20.75, 22.64, 20.75, 24.53, 30.19, 22.64, 20.75, 15.09, 26.42, 18.87, 24.53, 28.3]\n", | |
| "Best score so far: 30.19\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| " 8%|▊ | 4/53 [00:00<00:00, 147.53it/s]\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Bootstrapped 4 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.\n", | |
| "Average Metric: 9.00 / 53 (17.0%): 100%|██████████| 53/53 [00:00<00:00, 413.58it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:19 INFO dspy.evaluate.evaluate: Average Metric: 9.0 / 53 (17.0%)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Scores so far: [20.75, 20.75, 20.75, 20.75, 22.64, 20.75, 24.53, 30.19, 22.64, 20.75, 15.09, 26.42, 18.87, 24.53, 28.3, 16.98]\n", | |
| "Best score so far: 30.19\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| " 6%|▌ | 3/53 [00:00<00:00, 106.80it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Bootstrapped 3 full traces after 3 examples for up to 1 rounds, amounting to 3 attempts.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 11.00 / 53 (20.8%): 100%|██████████| 53/53 [00:00<00:00, 441.03it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:19 INFO dspy.evaluate.evaluate: Average Metric: 11.0 / 53 (20.8%)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Scores so far: [20.75, 20.75, 20.75, 20.75, 22.64, 20.75, 24.53, 30.19, 22.64, 20.75, 15.09, 26.42, 18.87, 24.53, 28.3, 16.98, 20.75]\n", | |
| "Best score so far: 30.19\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| " 2%|▏ | 1/53 [00:00<00:00, 117.60it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 11.00 / 53 (20.8%): 100%|██████████| 53/53 [00:00<00:00, 4034.01it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:19 INFO dspy.evaluate.evaluate: Average Metric: 11.0 / 53 (20.8%)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Scores so far: [20.75, 20.75, 20.75, 20.75, 22.64, 20.75, 24.53, 30.19, 22.64, 20.75, 15.09, 26.42, 18.87, 24.53, 28.3, 16.98, 20.75, 20.75]\n", | |
| "Best score so far: 30.19\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| " 4%|▍ | 2/53 [00:00<00:00, 137.13it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 10.00 / 53 (18.9%): 100%|██████████| 53/53 [00:00<00:00, 451.68it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:20 INFO dspy.evaluate.evaluate: Average Metric: 10.0 / 53 (18.9%)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Scores so far: [20.75, 20.75, 20.75, 20.75, 22.64, 20.75, 24.53, 30.19, 22.64, 20.75, 15.09, 26.42, 18.87, 24.53, 28.3, 16.98, 20.75, 20.75, 18.87]\n", | |
| "Best score so far: 30.19\n", | |
| "19 candidate programs found.\n", | |
| "Average Metric: 17.00 / 54 (31.5%): 100%|██████████| 54/54 [00:00<00:00, 609.53it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:20 INFO dspy.evaluate.evaluate: Average Metric: 17.0 / 54 (31.5%)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "EvaluationResult(score=31.48, results=<list of 54 results>)\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "optimizer = dspy.BootstrapFewShotWithRandomSearch(metric=eval_metric)\n", | |
| "\n", | |
| "len_trainset = len(trainset)\n", | |
| "module_bootstrap_fewshot_random_search = optimizer.compile(\n", | |
| " module_vanilla,\n", | |
| " trainset=trainset,\n", | |
| ")\n", | |
| "optimizer_program_score = evaluate(module_bootstrap_fewshot_random_search)\n", | |
| "er.add_result(\n", | |
| " \"bootstrap_fewshot_random_search_gemini_2.5_flash\", optimizer_program_score.score\n", | |
| ")\n", | |
| "print(optimizer_program_score)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 12, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:20 INFO dspy.teleprompt.mipro_optimizer_v2: \n", | |
| "RUNNING WITH THE FOLLOWING LIGHT AUTO RUN SETTINGS:\n", | |
| "num_trials: 10\n", | |
| "minibatch: False\n", | |
| "num_fewshot_candidates: 6\n", | |
| "num_instruct_candidates: 3\n", | |
| "valset size: 42\n", | |
| "\n", | |
| "2025/12/20 10:00:20 INFO dspy.teleprompt.mipro_optimizer_v2: \n", | |
| "==> STEP 1: BOOTSTRAP FEWSHOT EXAMPLES <==\n", | |
| "2025/12/20 10:00:20 INFO dspy.teleprompt.mipro_optimizer_v2: These will be used as few-shot example candidates for our program and for creating instructions.\n", | |
| "\n", | |
| "2025/12/20 10:00:20 INFO dspy.teleprompt.mipro_optimizer_v2: Bootstrapping N=6 sets of demonstrations...\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Bootstrapping set 1/6\n", | |
| "Bootstrapping set 2/6\n", | |
| "Bootstrapping set 3/6\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| " 36%|███▋ | 4/11 [00:00<00:00, 116.83it/s]\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Bootstrapped 4 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.\n", | |
| "Bootstrapping set 4/6\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| " 18%|█▊ | 2/11 [00:00<00:00, 169.33it/s]\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Bootstrapped 2 full traces after 2 examples for up to 1 rounds, amounting to 2 attempts.\n", | |
| "Bootstrapping set 5/6\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| " 9%|▉ | 1/11 [00:00<00:00, 153.89it/s]\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Bootstrapped 1 full traces after 1 examples for up to 1 rounds, amounting to 1 attempts.\n", | |
| "Bootstrapping set 6/6\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| " 36%|███▋ | 4/11 [00:00<00:00, 169.04it/s]\n", | |
| "2025/12/20 10:00:20 INFO dspy.teleprompt.mipro_optimizer_v2: \n", | |
| "==> STEP 2: PROPOSE INSTRUCTION CANDIDATES <==\n", | |
| "2025/12/20 10:00:20 INFO dspy.teleprompt.mipro_optimizer_v2: We will use the few-shot examples from the previous step, a generated dataset summary, a summary of the program code, and a randomly selected prompting tip to propose instructions.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Bootstrapped 4 full traces after 4 examples for up to 1 rounds, amounting to 4 attempts.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:20 INFO dspy.teleprompt.mipro_optimizer_v2: \n", | |
| "Proposing N=3 instructions...\n", | |
| "\n", | |
| "2025/12/20 10:00:20 INFO dspy.teleprompt.mipro_optimizer_v2: Proposed Instructions for Predictor 0:\n", | |
| "\n", | |
| "2025/12/20 10:00:20 INFO dspy.teleprompt.mipro_optimizer_v2: 0: Nutritional analysis\n", | |
| "\n", | |
| "2025/12/20 10:00:20 INFO dspy.teleprompt.mipro_optimizer_v2: 1: Given a natural language `food_description`, analyze the text to identify all individual food items and their respective quantities. For each identified food item, extract or infer its detailed nutritional information, including the name of the food, its quantity, total calories, carbohydrates, fat, protein, fiber, and associated food_groups (e.g., 'dairy', 'fruit', 'vegetable', 'meat and alternatives', 'grain'). Return a list of `FoodItem` objects, with each object populated with these nutritional details.\n", | |
| "\n", | |
| "2025/12/20 10:00:20 INFO dspy.teleprompt.mipro_optimizer_v2: 2: From the provided free-text food description, identify and extract each distinct food item and its corresponding quantity. For each identified food item, determine its name, the amount consumed (quantity), estimated total calories, carbohydrates, fat, protein, fiber, and categorize it into relevant food groups. Present this information as a list of Python `FoodItem` objects, ensuring all nutritional values are numerical and food groups are listed as strings.\n", | |
| "\n", | |
| "2025/12/20 10:00:20 INFO dspy.teleprompt.mipro_optimizer_v2: \n", | |
| "\n", | |
| "2025/12/20 10:00:20 INFO dspy.teleprompt.mipro_optimizer_v2: ==> STEP 3: FINDING OPTIMAL PROMPT PARAMETERS <==\n", | |
| "2025/12/20 10:00:20 INFO dspy.teleprompt.mipro_optimizer_v2: We will evaluate the program over a series of trials with different combinations of instructions and few-shot examples to find the optimal combination using Bayesian Optimization.\n", | |
| "\n", | |
| "2025/12/20 10:00:20 INFO dspy.teleprompt.mipro_optimizer_v2: == Trial 1 / 10 - Full Evaluation of Default Program ==\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 10.00 / 42 (23.8%): 100%|██████████| 42/42 [00:00<00:00, 4315.13it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:20 INFO dspy.evaluate.evaluate: Average Metric: 10.0 / 42 (23.8%)\n", | |
| "2025/12/20 10:00:20 INFO dspy.teleprompt.mipro_optimizer_v2: Default program score: 23.81\n", | |
| "\n", | |
| "/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/optuna/_experimental.py:31: ExperimentalWarning: Argument ``multivariate`` is an experimental feature. The interface can change in the future.\n", | |
| " warnings.warn(\n", | |
| "2025/12/20 10:00:20 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 2 / 10 =====\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Average Metric: 13.00 / 42 (31.0%): 100%|██████████| 42/42 [00:00<00:00, 723.14it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:20 INFO dspy.evaluate.evaluate: Average Metric: 13.0 / 42 (31.0%)\n", | |
| "2025/12/20 10:00:20 INFO dspy.teleprompt.mipro_optimizer_v2: \u001b[92mBest full score so far!\u001b[0m Score: 30.95\n", | |
| "2025/12/20 10:00:20 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 30.95 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 3'].\n", | |
| "2025/12/20 10:00:20 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [23.81, 30.95]\n", | |
| "2025/12/20 10:00:20 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 30.95\n", | |
| "2025/12/20 10:00:20 INFO dspy.teleprompt.mipro_optimizer_v2: ========================\n", | |
| "\n", | |
| "\n", | |
| "2025/12/20 10:00:20 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 3 / 10 =====\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Average Metric: 10.00 / 42 (23.8%): 100%|██████████| 42/42 [00:00<00:00, 716.97it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:20 INFO dspy.evaluate.evaluate: Average Metric: 10.0 / 42 (23.8%)\n", | |
| "2025/12/20 10:00:20 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 23.81 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 0'].\n", | |
| "2025/12/20 10:00:20 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [23.81, 30.95, 23.81]\n", | |
| "2025/12/20 10:00:20 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 30.95\n", | |
| "2025/12/20 10:00:20 INFO dspy.teleprompt.mipro_optimizer_v2: ========================\n", | |
| "\n", | |
| "\n", | |
| "2025/12/20 10:00:20 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 4 / 10 =====\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Average Metric: 12.00 / 42 (28.6%): 100%|██████████| 42/42 [00:00<00:00, 683.91it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:21 INFO dspy.evaluate.evaluate: Average Metric: 12.0 / 42 (28.6%)\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 28.57 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 5'].\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [23.81, 30.95, 23.81, 28.57]\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 30.95\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: ========================\n", | |
| "\n", | |
| "\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 5 / 10 =====\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Average Metric: 9.00 / 42 (21.4%): 100%|██████████| 42/42 [00:00<00:00, 653.72it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:21 INFO dspy.evaluate.evaluate: Average Metric: 9.0 / 42 (21.4%)\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 21.43 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 2'].\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [23.81, 30.95, 23.81, 28.57, 21.43]\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 30.95\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: ========================\n", | |
| "\n", | |
| "\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 6 / 10 =====\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Average Metric: 14.00 / 42 (33.3%): 100%|██████████| 42/42 [00:00<00:00, 691.87it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:21 INFO dspy.evaluate.evaluate: Average Metric: 14.0 / 42 (33.3%)\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: \u001b[92mBest full score so far!\u001b[0m Score: 33.33\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 33.33 with parameters ['Predictor 0: Instruction 0', 'Predictor 0: Few-Shot Set 5'].\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [23.81, 30.95, 23.81, 28.57, 21.43, 33.33]\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 33.33\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: ========================\n", | |
| "\n", | |
| "\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 7 / 10 =====\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Average Metric: 10.00 / 42 (23.8%): 100%|██████████| 42/42 [00:00<00:00, 4105.93it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:21 INFO dspy.evaluate.evaluate: Average Metric: 10.0 / 42 (23.8%)\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 23.81 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 0'].\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [23.81, 30.95, 23.81, 28.57, 21.43, 33.33, 23.81]\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 33.33\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: ========================\n", | |
| "\n", | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 8 / 10 =====\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 16.00 / 42 (38.1%): 100%|██████████| 42/42 [00:00<00:00, 723.47it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:21 INFO dspy.evaluate.evaluate: Average Metric: 16.0 / 42 (38.1%)\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: \u001b[92mBest full score so far!\u001b[0m Score: 38.1\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 38.1 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 5'].\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [23.81, 30.95, 23.81, 28.57, 21.43, 33.33, 23.81, 38.1]\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 38.1\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: ========================\n", | |
| "\n", | |
| "\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 9 / 10 =====\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Average Metric: 10.00 / 42 (23.8%): 100%|██████████| 42/42 [00:00<00:00, 687.62it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:21 INFO dspy.evaluate.evaluate: Average Metric: 10.0 / 42 (23.8%)\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 23.81 with parameters ['Predictor 0: Instruction 1', 'Predictor 0: Few-Shot Set 4'].\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [23.81, 30.95, 23.81, 28.57, 21.43, 33.33, 23.81, 38.1, 23.81]\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 38.1\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: ========================\n", | |
| "\n", | |
| "\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 10 / 10 =====\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Average Metric: 16.00 / 42 (38.1%): 100%|██████████| 42/42 [00:00<00:00, 4116.00it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:21 INFO dspy.evaluate.evaluate: Average Metric: 16.0 / 42 (38.1%)\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 38.1 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 5'].\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [23.81, 30.95, 23.81, 28.57, 21.43, 33.33, 23.81, 38.1, 23.81, 38.1]\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 38.1\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: =========================\n", | |
| "\n", | |
| "\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: ===== Trial 11 / 10 =====\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Average Metric: 10.00 / 42 (23.8%): 100%|██████████| 42/42 [00:00<00:00, 4954.60it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:21 INFO dspy.evaluate.evaluate: Average Metric: 10.0 / 42 (23.8%)\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: Score: 23.81 with parameters ['Predictor 0: Instruction 2', 'Predictor 0: Few-Shot Set 1'].\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: Scores so far: [23.81, 30.95, 23.81, 28.57, 21.43, 33.33, 23.81, 38.1, 23.81, 38.1, 23.81]\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: Best score so far: 38.1\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: =========================\n", | |
| "\n", | |
| "\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.mipro_optimizer_v2: Returning best identified program with score 38.1!\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Average Metric: 16.00 / 54 (29.6%): 100%|██████████| 54/54 [00:00<00:00, 578.75it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:21 INFO dspy.evaluate.evaluate: Average Metric: 16.0 / 54 (29.6%)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "EvaluationResult(score=29.63, results=<list of 54 results>)\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "gemini_3_pro = dspy.LM(\"openrouter/google/gemini-3-pro-preview\", temperature=0.0)\n", | |
| "optimizer = dspy.MIPROv2(\n", | |
| " metric=eval_metric,\n", | |
| " auto=\"light\",\n", | |
| " teacher_settings=dict(lm=gemini_3_pro),\n", | |
| " prompt_model=gemini_2_5_flash,\n", | |
| " num_threads=32,\n", | |
| ")\n", | |
| "\n", | |
| "len_trainset = len(trainset)\n", | |
| "module_mipro_v2 = optimizer.compile(\n", | |
| " module_vanilla, trainset=trainset, max_bootstrapped_demos=4, max_labeled_demos=4\n", | |
| ")\n", | |
| "mipro_optimized_score = evaluate(module_mipro_v2)\n", | |
| "er.add_result(\"mipro_v2_gemini_2.5_flash\", mipro_optimized_score.score)\n", | |
| "print(mipro_optimized_score)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 13, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.gepa.gepa: Running GEPA for approx 592 metric calls of the program. This amounts to 11.17 full evals on the train set.\n", | |
| "2025/12/20 10:00:21 WARNING dspy.teleprompt.gepa.gepa: No valset provided; Using trainset as valset. This is useful as an inference-time scaling strategy where you want GEPA to find the best solutions for the provided tasks in the trainset, as it makes GEPA overfit prompts to the provided trainset. In order to ensure generalization and perform well on unseen tasks, please provide separate trainset and valset. Provide the smallest valset that is just large enough to match the downstream task distribution, while keeping trainset as large as possible.\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.gepa.gepa: Using 53 examples for tracking Pareto scores. You can consider using a smaller sample of the valset to allow GEPA to explore more diverse solutions within the same budget. GEPA requires you to provide the smallest valset that is just large enough to match your downstream task distribution, while providing as large trainset as possible.\n", | |
| "GEPA Optimization: 0%| | 0/592 [00:00<?, ?rollouts/s]2025/12/20 10:00:21 INFO dspy.evaluate.evaluate: Average Metric: 11.0 / 53 (20.8%)\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.gepa.gepa: Iteration 0: Base program full valset score: 0.20754716981132076\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.gepa.gepa: Iteration 1: Selected program 0 score: 0.20754716981132076\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:00<00:00, 3968.12it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:21 INFO dspy.evaluate.evaluate: Average Metric: 2.0 / 3 (66.7%)\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.gepa.gepa: Iteration 1: Proposed new text for self: You are performing a nutritional analysis from a free‑text description of foods and drinks consumed. Parse the description, identify each distinct item and portion, and return ONLY a Python-like list of FoodItem objects with these fields in this exact order:\n", | |
| "- name: string (generic or brand-specific name; include preparation if relevant, e.g., “boiled egg”)\n", | |
| "- quantity: float (number of units/servings consumed; NEVER put grams/mL here)\n", | |
| "- calories: float (kcal per ONE unit/serving as defined by your quantity)\n", | |
| "- carbs: float (grams per ONE unit/serving)\n", | |
| "- fat: float (grams per ONE unit/serving)\n", | |
| "- protein: float (grams per ONE unit/serving)\n", | |
| "- fiber: float (grams per ONE unit/serving)\n", | |
| "- food_groups: list of strings (use canonical groups)\n", | |
| "\n", | |
| "Critical rules and conventions:\n", | |
| "- Quantity is a count of units/servings only. Do not place weights or volumes (e.g., grams, mL) in quantity. The evaluator multiplies per-unit nutrients by quantity; putting grams in quantity will inflate totals.\n", | |
| "- If the user gives a weight/volume (e.g., “50 g egg”, “200 g chicken”, “507 g water”), fold that portion into the per-unit values and set quantity=1.0 for that described portion. You may reflect the portion in the name if helpful (e.g., “boiled egg (50 g)”, “bottled water (507 g)”).\n", | |
| "- If the user specifies a count (e.g., “2 eggs”), use quantity=2.0 and per-unit nutrients for one egg.\n", | |
| "- Keep items in the order mentioned.\n", | |
| "- Use typical/standard nutrition values for common foods and standard sizes; for brand-named items, use known per-serving label values when possible.\n", | |
| "- Units:\n", | |
| " - calories in kcal\n", | |
| " - carbs, fat, protein, fiber in grams\n", | |
| "- Rounding: use reasonable precision with one decimal place (e.g., 78.0 kcal, 6.3 g protein). Use 0.0 when appropriate.\n", | |
| "- Water (tap or bottled): 0.0 for all nutrients; food_groups should be an empty list [].\n", | |
| "\n", | |
| "Food group mapping (use these canonical labels; pick all that apply):\n", | |
| "- fruit\n", | |
| "- vegetable\n", | |
| "- grain\n", | |
| "- dairy\n", | |
| "- meat and alternatives\n", | |
| "For examples: eggs → meat and alternatives; apples → fruit; potato chips can be treated under grain for snack categorization consistency.\n", | |
| "\n", | |
| "Examples to emulate:\n", | |
| "- “protein pancake from MyProtein” → one serving; e.g., 190.0 kcal, 20.0 g carbs, 4.0 g fat, 25.0 g protein, 2.0 g fiber; food_groups might include ['grain','dairy'].\n", | |
| "- “a cup of tap water, a small single-serving bag of potato chips, and a small raw apple” → three items; water with all zeros; chips ~150.0 kcal per bag; apple ~80.0 kcal, 22.0 g carbs, 4.0 g fiber; choose appropriate food_groups.\n", | |
| "- “507 grams of bottled water and a 50-gram boiled egg” → quantity=1.0 for both; water all zeros; egg per 50 g (~71.5–78.0 kcal, ~0.6 g carbs, ~5.3 g fat, ~6.3 g protein), food_groups ['meat and alternatives'].\n", | |
| "\n", | |
| "Output formatting requirements:\n", | |
| "- Return only the list literal of FoodItem(...) objects, no extra text.\n", | |
| "- Ensure every numeric field is a float with a decimal (e.g., 1.0, 0.0).\n", | |
| "- Do not compute or include totals; provide per-unit values aligned with your quantity so the evaluator can multiply by quantity.\n", | |
| "2025/12/20 10:00:21 INFO dspy.evaluate.evaluate: Average Metric: 3.0 / 3 (100.0%)\n", | |
| "2025/12/20 10:00:21 INFO dspy.teleprompt.gepa.gepa: Iteration 1: New subsample score 3 is better than old score 2. Continue to full eval and add to candidate pool.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:22 INFO dspy.evaluate.evaluate: Average Metric: 26.0 / 53 (49.1%)\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 1: New program is on the linear pareto front\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 1: Full valset score for new program: 0.49056603773584906\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 1: Full train_val score for new program: 0.49056603773584906\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 1: Individual valset scores for new program: [0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0]\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 1: New valset pareto front scores: [0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 0]\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 1: Full valset pareto front score: 0.5094339622641509\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 1: Updated valset pareto front programs: [{0, 1}, {0, 1}, {0, 1}, {1}, {0, 1}, {1}, {0, 1}, {1}, {0, 1}, {1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {1}, {1}, {0, 1}, {1}, {0, 1}, {1}, {0, 1}, {1}, {0}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {1}, {1}, {1}, {0, 1}, {1}, {0, 1}, {0, 1}, {0, 1}, {1}, {0, 1}, {0, 1}, {1}, {0, 1}, {0, 1}, {0, 1}, {1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}, {0, 1}]\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 1: Best valset aggregate score so far: 0.49056603773584906\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 1: Best program as per aggregate score on train_val: 1\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 1: Best program as per aggregate score on valset: 1\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 1: Best score on valset: 0.49056603773584906\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 1: Best score on train_val: 0.49056603773584906\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 1: Linear pareto front program index: 1\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 1: New program candidate index: 1\n", | |
| "GEPA Optimization: 19%|█▉ | 112/592 [00:00<00:01, 408.54rollouts/s]2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 2: No merge candidates found\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 2: Selected program 1 score: 0.49056603773584906\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 3.00 / 3 (100.0%): 100%|██████████| 3/3 [00:00<00:00, 3677.06it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:22 INFO dspy.evaluate.evaluate: Average Metric: 3.0 / 3 (100.0%)\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 2: All subsample scores perfect. Skipping.\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 2: Reflective mutation did not propose a new candidate\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 3: Selected program 0 score: 0.20754716981132076\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Average Metric: 1.00 / 3 (33.3%): 100%|██████████| 3/3 [00:00<00:00, 5067.62it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:22 INFO dspy.evaluate.evaluate: Average Metric: 1.0 / 3 (33.3%)\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 3: Proposed new text for self: You are performing a nutritional analysis from a short free-text food_description. Your job is to parse the described foods and amounts, estimate nutrition, and output a list of FoodItem objects.\n", | |
| "\n", | |
| "Output format:\n", | |
| "- Return ONLY a single Python-style list literal of FoodItem objects, nothing else. Example:\n", | |
| " [FoodItem(name='apple', quantity=1.0, calories=95.0, carbs=25.0, fat=0.3, protein=0.5, fiber=4.4, food_groups=['fruit'])]\n", | |
| "- Each FoodItem has fields:\n", | |
| " - name: string, concise common name (brand if specified).\n", | |
| " - quantity: float, the number of per-unit servings you are using for this item (see quantity rules below).\n", | |
| " - calories: float, calories PER ONE UNIT of quantity.\n", | |
| " - carbs, fat, protein, fiber: floats in grams PER ONE UNIT of quantity.\n", | |
| " - food_groups: list of strings (choose from: 'fruit', 'vegetable', 'grain', 'dairy', 'meat and alternatives', 'legume', 'nut and seed', 'fat and oils', 'sweets and snacks', 'beverage'). If unclear, use [].\n", | |
| "\n", | |
| "Critical rule about quantity and per-unit nutrition (to match evaluation):\n", | |
| "- The evaluator computes totals as sum(quantity * calories). Therefore, calories/macros MUST be per ONE unit of quantity.\n", | |
| "- Easiest safe approach:\n", | |
| " - If the description gives a specific amount (e.g., “100 g yogurt”, “30 g wurst”, “1 slice pizza”), compute the TOTAL nutrition for that described amount and set quantity=1.0 with calories/macros equal to that total for the portion.\n", | |
| " - If the description gives a count of identical units (e.g., “2 eggs”, “15 almonds”), you may set quantity to that count, BUT then calories/macros must be per single unit (e.g., per egg, per almond). Never provide totals alongside quantity>1, or you will double-count.\n", | |
| "- Never set quantity equal to grams while also providing total calories for that gram amount. Either:\n", | |
| " - Convert to a single portion: quantity=1.0 and calories/macros = totals for that gram amount; OR\n", | |
| " - Use quantity in grams AND provide calories/macros per gram (generally avoid this; prefer quantity=1.0 with totals).\n", | |
| "\n", | |
| "Estimation guidance:\n", | |
| "- Use common nutrition references (e.g., USDA averages) to estimate macros for standard foods; for branded items, use typical values if exact data is unknown.\n", | |
| "- Units:\n", | |
| " - calories in kcal,\n", | |
| " - carbs, fat, protein, fiber in grams.\n", | |
| "- Food groups: include the most relevant groups based on the item. Examples:\n", | |
| " - apple -> ['fruit']\n", | |
| " - white bread -> ['grain']\n", | |
| " - yogurt -> ['dairy']\n", | |
| " - chicken, wurst, eggs -> ['meat and alternatives']\n", | |
| " - nuts -> ['nut and seed']\n", | |
| "- If fiber is unknown, estimate or use 0.0.\n", | |
| "\n", | |
| "Handling target calories in the description:\n", | |
| "- If the description includes an explicit or approximate total calorie target (e.g., “~250 cal”, “total of 700 cals”), ensure the sum over all items (sum of quantity*calories) is within ±10% of that target. Adjust portion choices and per-item estimates accordingly, while staying realistic.\n", | |
| "\n", | |
| "Itemization:\n", | |
| "- Parse the description to identify distinct foods and their amounts. If multiple items are named with specific gram amounts, create one FoodItem per described portion with quantity=1.0 and totals for each.\n", | |
| "- Keep names clear and singular/plural as appropriate; either singular with quantity>1 or a descriptive name with quantity=1.0 for the whole portion are both acceptable, provided per-unit nutrition is consistent with quantity.\n", | |
| "\n", | |
| "Quality checks before output:\n", | |
| "- Verify that total calories = sum(quantity * calories) is sensible and, if a target was stated, within ±10% of that target.\n", | |
| "- Ensure macros roughly align with calories (kcal ≈ 4*carbs + 4*protein + 9*fat, allowing for rounding and fiber).\n", | |
| "- Do not include any explanatory text, headings, or keys—only the list of FoodItem(...) objects.\n", | |
| "2025/12/20 10:00:22 INFO dspy.evaluate.evaluate: Average Metric: 2.0 / 3 (66.7%)\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 3: New subsample score 2 is better than old score 1. Continue to full eval and add to candidate pool.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:22 INFO dspy.evaluate.evaluate: Average Metric: 22.0 / 53 (41.5%)\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 3: Full valset score for new program: 0.41509433962264153\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 3: Full train_val score for new program: 0.41509433962264153\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 3: Individual valset scores for new program: [0, 0, 0, 0, 0, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0]\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 3: New valset pareto front scores: [0, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 0, 0, 0, 0, 0]\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 3: Full valset pareto front score: 0.5283018867924528\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 3: Updated valset pareto front programs: [{0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {1}, {0, 1, 2}, {1, 2}, {0, 1, 2}, {1, 2}, {0, 1, 2}, {1}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {1, 2}, {1, 2}, {0, 1, 2}, {1, 2}, {0, 1, 2}, {1, 2}, {0, 1, 2}, {1, 2}, {0, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {1}, {1, 2}, {1}, {0, 1, 2}, {1}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {1, 2}, {0, 1, 2}, {0, 1, 2}, {1, 2}, {2}, {0, 1, 2}, {0, 1, 2}, {1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}, {0, 1, 2}]\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 3: Best valset aggregate score so far: 0.49056603773584906\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 3: Best program as per aggregate score on train_val: 1\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 3: Best program as per aggregate score on valset: 1\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 3: Best score on valset: 0.49056603773584906\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 3: Best score on train_val: 0.49056603773584906\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 3: Linear pareto front program index: 1\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 3: New program candidate index: 2\n", | |
| "GEPA Optimization: 29%|██▉ | 174/592 [00:00<00:01, 291.63rollouts/s]2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 4: No merge candidates found\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 4: Selected program 2 score: 0.41509433962264153\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 0.00 / 3 (0.0%): 100%|██████████| 3/3 [00:00<00:00, 3268.29it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:22 INFO dspy.evaluate.evaluate: Average Metric: 0.0 / 3 (0.0%)\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 4: Proposed new text for self: You are performing a nutritional analysis from a short free-text food_description. Your job is to parse the described foods and amounts, estimate nutrition, and output a list of FoodItem objects.\n", | |
| "\n", | |
| "Output format:\n", | |
| "- Return ONLY a single Python-style list literal of FoodItem objects, nothing else. Example:\n", | |
| " [FoodItem(name='apple', quantity=1.0, calories=95.0, carbs=25.0, fat=0.3, protein=0.5, fiber=4.4, food_groups=['fruit'])]\n", | |
| "- Each FoodItem has fields:\n", | |
| " - name: string, concise common name (brand if specified).\n", | |
| " - quantity: float, the number of per-unit servings you are using for this item (see quantity rules below).\n", | |
| " - calories: float, calories PER ONE UNIT of quantity.\n", | |
| " - carbs, fat, protein, fiber: floats in grams PER ONE UNIT of quantity.\n", | |
| " - food_groups: list of strings (choose from: 'fruit', 'vegetable', 'grain', 'dairy', 'meat and alternatives', 'legume', 'nut and seed', 'fat and oils', 'sweets and snacks', 'beverage'). If unclear, use [].\n", | |
| "\n", | |
| "Critical rule about quantity and per-unit nutrition (to match evaluation):\n", | |
| "- The evaluator computes totals as sum(quantity * calories). Therefore, calories/macros MUST be per ONE unit of quantity.\n", | |
| "- Easiest safe approach:\n", | |
| " - If the description gives a specific amount (e.g., “100 g yogurt”, “30 g wurst”, “1 slice pizza”), compute the TOTAL nutrition for that described amount and set quantity=1.0 with calories/macros equal to that total for the portion.\n", | |
| " - If the description gives a count of identical units (e.g., “2 eggs”, “15 almonds”, “2 little focaccia pieces”), you may set quantity to that count, BUT then calories/macros must be per single unit (e.g., per egg, per almond, per piece). Never provide totals alongside quantity>1, or you will double-count.\n", | |
| "- Never set quantity equal to grams while also providing total calories for that gram amount. Either:\n", | |
| " - Convert to a single portion: quantity=1.0 and calories/macros = totals for that gram amount; OR\n", | |
| " - Use quantity in grams AND provide calories/macros per gram (generally avoid this; prefer quantity=1.0 with totals).\n", | |
| "\n", | |
| "Interpretation and itemization rules:\n", | |
| "- Parse the description to identify distinct foods and their amounts. When amounts are in grams, compute totals proportionally from per-100 g references and set quantity=1.0.\n", | |
| "- Do not invent or add separate items unless clearly present. Treat toppings, phrases like “with X,” or descriptors like “on a medium crust” as part of the same item unless the text clearly indicates an additional, separate serving.\n", | |
| " - Example: “100 g cheese pizza with fruit on a medium crust” is a single 100 g portion of pizza that includes fruit as a topping—do NOT add a separate “fruit” item.\n", | |
| "- If modifiers indicate small portions (e.g., “little focaccia pieces”), choose a conservative per-piece size and nutrition consistent with the descriptor.\n", | |
| "- Keep names clear; use singular names with quantity>1 when items are identical units, otherwise describe the whole portion with quantity=1.0.\n", | |
| "\n", | |
| "Estimation guidance and default reference values (tuned to match evaluation baselines):\n", | |
| "- Prefer realistic, conservative estimates when portions are unspecified to avoid overestimation.\n", | |
| "- Use these per-100 g baselines when applicable; scale to the described grams. Adjust macros so kcal ≈ 4*carbs + 4*protein + 9*fat (allow reasonable rounding).\n", | |
| "\n", | |
| "Common baselines:\n", | |
| "- Cheese pizza, general (includes crust and cheese; toppings like fruit are part of this if described):\n", | |
| " - ≈ 238 kcal/100 g; carbs ≈ 30 g, fat ≈ 9 g, protein ≈ 10 g, fiber ≈ 2 g per 100 g.\n", | |
| "- Stracciatella (fresh cheese, dairy component of burrata):\n", | |
| " - ≈ 300 kcal/100 g; carbs ≈ 2 g, fat ≈ 27 g, protein ≈ 12 g, fiber 0 g per 100 g.\n", | |
| " - If portion unspecified in a dish (e.g., appetizer), assume a modest 40–60 g unless context implies more.\n", | |
| "- Confit grapes (grapes cooked in syrup; typically a small garnish):\n", | |
| " - Small garnish ≈ 30–60 kcal total per tablespoon or two; mostly carbs; fiber low (≈0–1 g).\n", | |
| "- Focaccia, “little piece” (small bite-sized piece ≈ 15–20 g each):\n", | |
| " - ≈ 45–60 kcal per piece; carbs ≈ 8–12 g, fat ≈ 1–2 g, protein ≈ 1–2 g, fiber ≈ 0.3–0.6 g per piece.\n", | |
| " - If text says “2 little pieces,” set quantity=2 with per-piece nutrition.\n", | |
| "- Common seabream, stewed/cooked:\n", | |
| " - ≈ 90 kcal/100 g; carbs 0 g, fat ≈ 2 g, protein ≈ 18 g per 100 g.\n", | |
| "- Matabala, boiled (starchy root; very low energy density when boiled):\n", | |
| " - ≈ 30–40 kcal/100 g (use 30 kcal/100 g if unspecified); carbs ≈ 7–9 g, fat ≈ 0.1 g, protein ≈ 0.5–1 g, fiber ≈ 1–2 g per 100 g.\n", | |
| "- Rice with vegetables, cooked mixed:\n", | |
| " - ≈ 85–95 kcal/100 g baseline; carbs ≈ 18–20 g, fat ≈ 0.8–1.2 g, protein ≈ 2–3 g, fiber ≈ 1 g per 100 g.\n", | |
| "\n", | |
| "Food group mapping examples:\n", | |
| "- stracciatella, yogurt -> ['dairy']\n", | |
| "- white bread, focaccia, rice, pizza crust -> ['grain']\n", | |
| "- chicken, fish (seabream), eggs, wurst -> ['meat and alternatives']\n", | |
| "- grapes (fresh or confit) -> ['fruit']\n", | |
| "- vegetables (in mixed rice) -> include 'vegetable' alongside 'grain' when appropriate\n", | |
| "- nuts -> ['nut and seed']\n", | |
| "\n", | |
| "Handling target calories in the description:\n", | |
| "- If the description includes an explicit or approximate total calorie target (e.g., “~250 cal”, “total of 700 cals”), ensure the sum over all items (sum of quantity*calories) is within ±10% of that target. Adjust portion assumptions and per-item estimates accordingly, while staying realistic.\n", | |
| "\n", | |
| "Quality checks before output:\n", | |
| "- Verify that total calories = sum(quantity * calories) is sensible and, if a target was stated, within ±10% of that target.\n", | |
| "- Ensure macros roughly align with calories (kcal ≈ 4*carbs + 4*protein + 9*fat, allowing for rounding and fiber).\n", | |
| "- Do not include any explanatory text, headings, or keys—only the list of FoodItem(...) objects.\n", | |
| "\n", | |
| "Implementation tips:\n", | |
| "- For gram-specified amounts: compute totals by (grams/100) * per-100 g baseline; set quantity=1.0.\n", | |
| "- For counted items (pieces, slices): set quantity to the count; provide per-unit nutrition, not totals.\n", | |
| "- When descriptors imply toppings (“with fruit”, “on a medium crust”), treat as part of the main item unless a separate serving is clearly indicated—do not add an extra line-item just for the topping.\n", | |
| "- When portion sizes are unspecified for multi-component dishes (e.g., cheese + garnish + bread), assume modest, appetizer-sized amounts so the total remains realistic and not inflated.\n", | |
| "2025/12/20 10:00:22 INFO dspy.evaluate.evaluate: Average Metric: 2.0 / 3 (66.7%)\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 4: New subsample score 2 is better than old score 0. Continue to full eval and add to candidate pool.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:22 INFO dspy.evaluate.evaluate: Average Metric: 29.0 / 53 (54.7%)\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 4: New program is on the linear pareto front\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 4: Full valset score for new program: 0.5471698113207547\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 4: Full train_val score for new program: 0.5471698113207547\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 4: Individual valset scores for new program: [1, 0, 0, 0, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 0, 1, 1, 0, 0, 0, 1]\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 4: New valset pareto front scores: [1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1]\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 4: Full valset pareto front score: 0.660377358490566\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 4: Updated valset pareto front programs: [{3}, {0, 1, 2, 3}, {0, 1, 2, 3}, {1}, {0, 1, 2, 3}, {1, 2}, {3}, {1, 2, 3}, {0, 1, 2, 3}, {1, 3}, {0, 1, 2, 3}, {0, 1, 2, 3}, {0, 1, 2, 3}, {0, 1}, {0, 1, 2, 3}, {0, 1, 2, 3}, {0, 1, 2, 3}, {3}, {1, 2, 3}, {1, 2, 3}, {0, 1, 2, 3}, {1, 2, 3}, {0, 1, 2, 3}, {1, 2, 3}, {0, 1, 2, 3}, {1, 2, 3}, {0, 2, 3}, {0, 1, 2, 3}, {0, 1, 2, 3}, {0, 1, 2, 3}, {0, 1, 2, 3}, {3}, {1}, {1, 2, 3}, {1}, {3}, {1, 3}, {0, 1, 2, 3}, {0, 1, 2, 3}, {0, 1, 2, 3}, {1, 2, 3}, {0, 1, 2, 3}, {0, 1, 2, 3}, {1, 2, 3}, {2}, {0, 1, 2, 3}, {0, 1, 2, 3}, {1, 2, 3}, {3}, {0, 1, 2, 3}, {0, 1, 2, 3}, {0, 1, 2, 3}, {3}]\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 4: Best valset aggregate score so far: 0.5471698113207547\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 4: Best program as per aggregate score on train_val: 3\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 4: Best program as per aggregate score on valset: 3\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 4: Best score on valset: 0.5471698113207547\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 4: Best score on train_val: 0.5471698113207547\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 4: Linear pareto front program index: 3\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 4: New program candidate index: 3\n", | |
| "GEPA Optimization: 39%|███▉ | 233/592 [00:00<00:01, 233.86rollouts/s]2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 5: No merge candidates found\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 5: Selected program 3 score: 0.5471698113207547\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 1.00 / 3 (33.3%): 100%|██████████| 3/3 [00:00<00:00, 4183.15it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:22 INFO dspy.evaluate.evaluate: Average Metric: 1.0 / 3 (33.3%)\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 5: Proposed new text for self: You are performing a nutritional analysis from a short free-text food_description. Your job is to parse the described foods and amounts, estimate nutrition, and output a list of FoodItem objects.\n", | |
| "\n", | |
| "Input:\n", | |
| "- A single string field (food_description) describing foods eaten and amounts.\n", | |
| "\n", | |
| "Output:\n", | |
| "- Return ONLY a single Python-style list literal of FoodItem objects, nothing else. Example:\n", | |
| " [FoodItem(name='apple', quantity=1.0, calories=95.0, carbs=25.0, fat=0.3, protein=0.5, fiber=4.4, food_groups=['fruit'])]\n", | |
| "\n", | |
| "FoodItem fields:\n", | |
| "- name: string, concise common name (brand if specified).\n", | |
| "- quantity: float, the number of per-unit servings you are using for this item (see quantity rules).\n", | |
| "- calories: float, calories PER ONE UNIT of quantity.\n", | |
| "- carbs, fat, protein, fiber: floats in grams PER ONE UNIT of quantity.\n", | |
| "- food_groups: list of strings (choose from: 'fruit', 'vegetable', 'grain', 'dairy', 'meat and alternatives', 'legume', 'nut and seed', 'fat and oils', 'sweets and snacks', 'beverage'). If unclear, use [].\n", | |
| "\n", | |
| "Critical rule about quantity and per-unit nutrition:\n", | |
| "- The evaluator computes totals as sum(quantity * calories). Therefore, calories and macros MUST be per ONE unit of quantity.\n", | |
| "- Safe approaches:\n", | |
| " - If the description gives a specific portion in grams/ounces/cups/etc. (e.g., “190 g bread”, “1 slice pizza”, “2 cups soup”), compute the TOTAL nutrition for that described portion and set quantity=1.0 with calories/macros equal to that total for the portion.\n", | |
| " - If the description gives a count of identical units (e.g., “2 eggs”, “15 almonds”, “2 little focaccia pieces”), you may set quantity to that count, BUT then calories/macros must be per single unit (e.g., per egg, per almond, per piece). Never provide totals alongside quantity>1, or it will double-count.\n", | |
| "- Never set quantity equal to grams while also providing total calories for that gram amount. Either:\n", | |
| " - Convert to a single portion: quantity=1.0 with calories/macros = totals for that gram amount; OR\n", | |
| " - Use quantity in grams AND provide calories/macros per gram (generally avoid this; prefer quantity=1.0 with totals).\n", | |
| "\n", | |
| "Interpretation and itemization rules:\n", | |
| "- Parse the description to identify distinct foods and their amounts. When amounts are in grams, compute totals proportionally from per-100 g references and set quantity=1.0.\n", | |
| "- Do not invent or add separate items unless clearly present. Treat toppings or descriptors (“with X,” “on a medium crust,” “with fruit”) as part of the same item unless the text clearly indicates an additional, separate serving.\n", | |
| "- If modifiers indicate small pieces (e.g., “little piece”), choose a conservative per-piece size and nutrition consistent with the descriptor.\n", | |
| "- Keep names clear; use singular names with quantity>1 when items are identical units; otherwise describe the whole portion with quantity=1.0.\n", | |
| "\n", | |
| "Estimation guidance and baselines (tuned to match evaluation):\n", | |
| "- Prefer realistic, conservative estimates when portions are unspecified to avoid overestimation.\n", | |
| "- Use these per-100 g baselines when applicable; scale to the described grams. Adjust macros so kcal ≈ 4*carbs + 4*protein + 9*fat (allow reasonable rounding). Include fiber where reasonable.\n", | |
| "\n", | |
| "Common baselines:\n", | |
| "- Cheese pizza, general (includes crust and cheese; toppings are part of this if described):\n", | |
| " - ≈ 238 kcal/100 g; carbs ≈ 30 g, fat ≈ 9 g, protein ≈ 10 g, fiber ≈ 2 g per 100 g.\n", | |
| "- Stracciatella (fresh cheese, dairy component of burrata):\n", | |
| " - ≈ 300 kcal/100 g; carbs ≈ 2 g, fat ≈ 27 g, protein ≈ 12 g, fiber 0 g per 100 g.\n", | |
| " - If portion unspecified in a dish, assume a modest 40–60 g unless context implies more.\n", | |
| "- Confit grapes (grapes cooked in syrup; typically a small garnish):\n", | |
| " - Small garnish ≈ 30–60 kcal total per tablespoon or two; mostly carbs; fiber ≈ 0–1 g.\n", | |
| "- Focaccia, “little piece” (small bite-sized piece ≈ 15–20 g each):\n", | |
| " - ≈ 45–60 kcal per piece; carbs ≈ 8–12 g, fat ≈ 1–2 g, protein ≈ 1–2 g, fiber ≈ 0.3–0.6 g per piece.\n", | |
| " - If text says “2 little pieces,” set quantity=2 with per-piece nutrition.\n", | |
| "- Seabream, stewed/cooked:\n", | |
| " - ≈ 90 kcal/100 g; carbs 0 g, fat ≈ 2 g, protein ≈ 18 g per 100 g.\n", | |
| "- Matabala, boiled (starchy root; very low energy density when boiled):\n", | |
| " - ≈ 30 kcal/100 g; carbs ≈ 7–8 g, fat ≈ 0.1 g, protein ≈ 0.5–1 g, fiber ≈ 1–2 g per 100 g.\n", | |
| "- Rice with vegetables, cooked mixed:\n", | |
| " - ≈ 90 kcal/100 g; carbs ≈ 19 g, fat ≈ 1.0 g, protein ≈ 2.5 g, fiber ≈ 1 g per 100 g.\n", | |
| "\n", | |
| "Additional baselines (from prior tasks; use these to match evaluation expectations):\n", | |
| "- Leavened corn and wheat flour bread (cornbread-like/enriched leavened bread):\n", | |
| " - ≈ 350 kcal/100 g; carbs ≈ 57 g, fat ≈ 10 g, protein ≈ 8 g, fiber ≈ 3 g per 100 g.\n", | |
| " - Use this higher bread baseline when “corn and wheat flour” or “cornbread-like” is specified; use 240–270 kcal/100 g only for plain white/whole-wheat bread without corn/enrichment cues.\n", | |
| "- Spiced butter (seasoned butter; treat as butter):\n", | |
| " - ≈ 720–730 kcal/100 g; carbs ≈ 1 g, fat ≈ 81 g, protein ≈ 1 g, fiber 0 g per 100 g.\n", | |
| "- Fish broth/stock:\n", | |
| " - ≈ 10–12 kcal/100 g; carbs 0 g, fat ≈ 0.2–0.3 g, protein ≈ 1–2 g, fiber 0 g per 100 g.\n", | |
| "- Barley flour porridge (thick cooked porridge from barley flour; water-based unless otherwise stated):\n", | |
| " - ≈ 170 kcal/100 g; carbs ≈ 35 g, fat ≈ 1.5 g, protein ≈ 3.5 g, fiber ≈ 3 g per 100 g.\n", | |
| " - Use this higher porridge baseline for “flour porridge” to reflect denser energy than watery grain porridges.\n", | |
| "- Fried ripe plantains (maduros-style; use conservative oil uptake):\n", | |
| " - ≈ 133 kcal/100 g; carbs ≈ 26 g, fat ≈ 3 g, protein ≈ 1.3 g, fiber ≈ 2.2 g per 100 g.\n", | |
| " - Apply this lower baseline to avoid overestimating large portions unless explicit heavy oil use is stated.\n", | |
| "- Pecans:\n", | |
| " - One handful ≈ 28–30 g. Per 28 g: ≈ 196 kcal; carbs ≈ 3.9 g, fat ≈ 20.4 g, protein ≈ 2.6 g, fiber ≈ 2.7 g.\n", | |
| " - Per 100 g reference ≈ 691 kcal; carbs ≈ 14 g, fat ≈ 72 g, protein ≈ 9 g, fiber ≈ 10 g.\n", | |
| "- Water:\n", | |
| " - 0 kcal; carbs 0 g, fat 0 g, protein 0 g, fiber 0 g.\n", | |
| "\n", | |
| "Food group mapping guidance:\n", | |
| "- Breads, focaccia, rice, pizza crust, porridge (grain-based) -> ['grain'].\n", | |
| "- Cheese, yogurt, stracciatella -> ['dairy'].\n", | |
| "- Fish, chicken, eggs, broth made from animal protein -> ['meat and alternatives'].\n", | |
| "- Butter (including spiced butter) and oils -> ['fat and oils'].\n", | |
| "- Grapes (fresh or confit), plantains -> ['fruit'].\n", | |
| "- Mixed rice with vegetables -> include both ['grain', 'vegetable'] when appropriate.\n", | |
| "- Nuts and seeds (e.g., pecans) -> ['nut and seed'].\n", | |
| "- Water, tea, coffee (unsweetened) -> ['beverage'].\n", | |
| "\n", | |
| "Handling target calories in the description:\n", | |
| "- If the description includes an explicit or approximate total calorie target (e.g., “~250 cal”, “total of 700 cals”), ensure the sum over all items (sum of quantity*calories) is within ±10% of that target. Adjust portion assumptions and per-item estimates accordingly, while staying realistic.\n", | |
| "\n", | |
| "Portion and unit heuristics:\n", | |
| "- For gram-specified amounts: compute totals by (grams/100) * per-100 g baseline; set quantity=1.0.\n", | |
| "- For counted items (pieces, slices): set quantity to the count; provide per-unit nutrition, not totals.\n", | |
| "- For “a handful of nuts”: assume ≈ 28–30 g unless otherwise specified; set quantity=1.0 with totals for that portion.\n", | |
| "- For “a glass of water”: treat as 0 kcal beverage; set quantity=1.0.\n", | |
| "\n", | |
| "Quality checks before output:\n", | |
| "- Verify total calories = sum(quantity * calories) is sensible and, if a target was stated, within ±10% of that target.\n", | |
| "- Ensure macros roughly align with calories (kcal ≈ 4*carbs + 4*protein + 9*fat, allowing for rounding and fiber).\n", | |
| "- Choose food_groups from the allowed list; avoid leaving [] when a clear mapping exists (e.g., pecans -> ['nut and seed'], butter -> ['fat and oils'], water -> ['beverage']).\n", | |
| "- Do not include any explanatory text—only the list of FoodItem(...) objects.\n", | |
| "\n", | |
| "Implementation tips:\n", | |
| "- Use the baselines above when the exact item matches or is strongly implied; scale by the described grams.\n", | |
| "- When ambiguity exists, choose conservative but realistic portions and energy densities that avoid overestimation while remaining consistent with the baselines and descriptors.\n", | |
| "- Keep numeric precision reasonable (typically one decimal place is sufficient).\n", | |
| "2025/12/20 10:00:22 INFO dspy.evaluate.evaluate: Average Metric: 3.0 / 3 (100.0%)\n", | |
| "2025/12/20 10:00:22 INFO dspy.teleprompt.gepa.gepa: Iteration 5: New subsample score 3 is better than old score 1. Continue to full eval and add to candidate pool.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:23 INFO dspy.evaluate.evaluate: Average Metric: 26.0 / 53 (49.1%)\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 5: Full valset score for new program: 0.49056603773584906\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 5: Full train_val score for new program: 0.49056603773584906\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 5: Individual valset scores for new program: [1, 0, 0, 0, 1, 0, 0, 1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 0, 0, 0, 1, 0, 0, 0, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 0, 0, 1, 0, 0, 0, 0, 1]\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 5: New valset pareto front scores: [1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 0, 1, 1, 0, 0, 0, 1]\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 5: Full valset pareto front score: 0.6981132075471698\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 5: Updated valset pareto front programs: [{3, 4}, {0, 1, 2, 3, 4}, {0, 1, 2, 3, 4}, {1}, {4}, {1, 2}, {3}, {1, 2, 3, 4}, {0, 1, 2, 3, 4}, {1, 3}, {0, 1, 2, 3, 4}, {0, 1, 2, 3, 4}, {0, 1, 2, 3, 4}, {0, 1, 4}, {0, 1, 2, 3, 4}, {0, 1, 2, 3, 4}, {0, 1, 2, 3, 4}, {3}, {1, 2, 3, 4}, {1, 2, 3, 4}, {0, 1, 2, 3, 4}, {1, 2, 3, 4}, {0, 1, 2, 3, 4}, {1, 2, 3, 4}, {0, 1, 2, 3, 4}, {1, 2, 3, 4}, {0, 2, 3}, {0, 1, 2, 3, 4}, {0, 1, 2, 3, 4}, {0, 1, 2, 3, 4}, {0, 1, 2, 3, 4}, {3}, {1}, {1, 2, 3, 4}, {1, 4}, {3, 4}, {1, 3}, {0, 1, 2, 3, 4}, {0, 1, 2, 3, 4}, {0, 1, 2, 3, 4}, {1, 2, 3, 4}, {0, 1, 2, 3}, {4}, {1, 2, 3, 4}, {2, 4}, {0, 1, 2, 3, 4}, {0, 1, 2, 3, 4}, {1, 2, 3, 4}, {3}, {0, 1, 2, 3, 4}, {0, 1, 2, 3, 4}, {0, 1, 2, 3, 4}, {3, 4}]\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 5: Best valset aggregate score so far: 0.5471698113207547\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 5: Best program as per aggregate score on train_val: 3\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 5: Best program as per aggregate score on valset: 3\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 5: Best score on valset: 0.5471698113207547\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 5: Best score on train_val: 0.5471698113207547\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 5: Linear pareto front program index: 3\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 5: New program candidate index: 4\n", | |
| "GEPA Optimization: 49%|████▉ | 292/592 [00:01<00:01, 230.43rollouts/s]2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 6: No merge candidates found\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 6: Selected program 3 score: 0.5471698113207547\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 1.00 / 3 (33.3%): 100%|██████████| 3/3 [00:00<00:00, 2817.49it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:23 INFO dspy.evaluate.evaluate: Average Metric: 1.0 / 3 (33.3%)\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 6: Proposed new text for self: You are performing a nutritional analysis from a short free-text food_description. Your job is to parse the described foods and amounts, estimate nutrition, and output a list of FoodItem objects.\n", | |
| "\n", | |
| "Output format:\n", | |
| "- Return ONLY a single Python-style list literal of FoodItem objects, nothing else. Example:\n", | |
| " [FoodItem(name='apple', quantity=1.0, calories=95.0, carbs=25.0, fat=0.3, protein=0.5, fiber=4.4, food_groups=['fruit'])]\n", | |
| "- Each FoodItem has fields:\n", | |
| " - name: string, concise common name (brand if specified).\n", | |
| " - quantity: float, the number of per-unit servings you are using for this item (see quantity rules below).\n", | |
| " - calories: float, calories PER ONE UNIT of quantity.\n", | |
| " - carbs, fat, protein, fiber: floats in grams PER ONE UNIT of quantity.\n", | |
| " - food_groups: list of strings (choose from: 'fruit', 'vegetable', 'grain', 'dairy', 'meat and alternatives', 'legume', 'nut and seed', 'fat and oils', 'sweets and snacks', 'beverage'). If unclear, use [].\n", | |
| "\n", | |
| "Critical rule about quantity and per-unit nutrition:\n", | |
| "- The evaluator computes totals as sum(quantity * calories). Therefore, calories/macros MUST be per ONE unit of quantity.\n", | |
| "- Safe approach:\n", | |
| " - If the description gives a specific amount (e.g., “100 g yogurt”, “30 g wurst”, “1 slice pizza”), compute the TOTAL nutrition for that described amount and set quantity=1.0 with calories/macros equal to that total for the portion.\n", | |
| " - If the description gives a count of identical units (e.g., “2 eggs”, “15 almonds”, “2 little focaccia pieces”), you may set quantity to that count, BUT then calories/macros must be per single unit (e.g., per egg, per almond, per piece). Never provide totals alongside quantity>1.\n", | |
| "- Never set quantity equal to grams while also providing total calories for that gram amount. Either:\n", | |
| " - Convert to a single portion: quantity=1.0 and calories/macros = totals for that gram amount; OR\n", | |
| " - Use quantity in grams AND provide calories/macros per gram (generally avoid; prefer quantity=1.0 with totals).\n", | |
| "\n", | |
| "Interpretation and itemization rules:\n", | |
| "- Parse the description to identify distinct foods and their amounts. When amounts are in grams, compute totals proportionally from per-100 g references and set quantity=1.0.\n", | |
| "- Do not invent or add separate items unless clearly present. Treat toppings, phrases like “with X,” or descriptors like “on a medium crust” as part of the same item unless the text indicates a separate serving.\n", | |
| "- If modifiers indicate small portions (e.g., “little focaccia pieces”), choose a conservative per-piece size consistent with the descriptor.\n", | |
| "- Keep names clear; use singular names with quantity>1 when items are identical units, otherwise describe the whole portion with quantity=1.0.\n", | |
| "\n", | |
| "Estimation guidance:\n", | |
| "- Prefer realistic, conservative estimates when portions are unspecified to avoid overestimation, but do not undercount obvious mains or full bowls (e.g., a large gram-specified soup should not be treated like plain broth).\n", | |
| "- Use per-100 g baselines and scale to the described grams. Adjust macros so kcal ≈ 4*carbs + 4*protein + 9*fat (allow reasonable rounding).\n", | |
| "- Packaging cues:\n", | |
| " - “a bag” of snacks typically implies a single-serve bag. For dried fruit snacks, assume 1.25–1.5 oz (35–43 g) unless otherwise specified.\n", | |
| " - “small coffee” ~8–12 fl oz.\n", | |
| " - “tablespoon” ~15–20 g for jams/spreads; use 20 g for jam unless otherwise stated.\n", | |
| "- Beverages: include as items (e.g., water, coffee). Water is 0 kcal.\n", | |
| "\n", | |
| "Category baselines (per 100 g unless noted), including domain-specific items seen in tasks:\n", | |
| "- Water: 0 kcal; macros 0; food_groups=['beverage'].\n", | |
| "- Brewed coffee, black (small cup): 2–5 kcal per serving (8–12 fl oz), carbs/fat/protein ~0 g; food_groups=['beverage'].\n", | |
| "- Flavored coffee creamer, single-serve cup/pod: per pod (10–15 mL): ≈30–45 kcal; carbs ≈4–6 g; fat ≈1.5–2.5 g; protein ≈0 g; food_groups=['dairy'].\n", | |
| "- Regular plain bagel (typical “regular” size ≈ 90–105 g): ≈270–300 kcal; carbs ≈53–58 g; fat ≈1–2 g; protein ≈9–11 g; fiber ≈2–3 g; food_groups=['grain'].\n", | |
| "- Jam/jelly (per tablespoon ≈ 20 g): ≈50 kcal; carbs ≈13 g; fat 0 g; protein 0 g; fiber ≈0–0.5 g; food_groups=['fruit'].\n", | |
| "- Grapes, red/green: ≈69 kcal/100 g; carbs ≈18 g; fat ≈0.2 g; protein ≈0.7 g; fiber ≈0.9 g; food_groups=['fruit'].\n", | |
| "- Rice, cooked:\n", | |
| " - White rice, general: ≈130 kcal/100 g; carbs ≈28 g; fat ≈0.3 g; protein ≈2.4 g; fiber ≈0.3 g.\n", | |
| " - Glutinous (sticky) rice, steamed/plain (no coconut milk): tends to be denser; use ≈130–150 kcal/100 g by default when unspecified. If clearly “plain cooked,” you may use ≈100–120 kcal/100 g; choose the higher end if the portion is a main staple to avoid underestimation.\n", | |
| "- Fish and soups:\n", | |
| " - Fish, white (cooked): ≈90–120 kcal/100 g; carbs 0 g; fat ≈2–5 g; protein ≈18–22 g.\n", | |
| " - Fish broth soups vary widely. For a clearly gram-specified full bowl (e.g., >250 g) of a regional fish broth soup (e.g., striped snakehead fish broth soup), assume a substantive soup with fish pieces and some fat: ≈100–130 kcal/100 g unless context specifies “clear/light broth.” Use the higher end for hearty/rich soups.\n", | |
| "- Dried apple snacks (“a bag” single-serve): assume ≈40 g per bag; ≈140 kcal per bag; carbs ≈34 g; fat ≈0–1 g; protein ≈0.5 g; fiber ≈3–5 g; food_groups=['fruit'].\n", | |
| "\n", | |
| "Additional common baselines from prior tasks (use when applicable):\n", | |
| "- Cheese pizza, general: ≈238 kcal/100 g; carbs ≈30 g; fat ≈9 g; protein ≈10 g; fiber ≈2 g.\n", | |
| "- Stracciatella (fresh cheese): ≈300 kcal/100 g; carbs ≈2 g; fat ≈27 g; protein ≈12 g; fiber 0 g.\n", | |
| "- Confit grapes (small garnish): 30–60 kcal total per tablespoon or two; mostly carbs; fiber ≈0–1 g.\n", | |
| "- Focaccia, “little piece” (≈15–20 g each): ≈45–60 kcal per piece; carbs ≈8–12 g, fat ≈1–2 g, protein ≈1–2 g, fiber ≈0.3–0.6 g per piece.\n", | |
| "- Common seabream, cooked: ≈90 kcal/100 g; carbs 0 g; fat ≈2 g; protein ≈18 g.\n", | |
| "- Matabala, boiled: ≈30 kcal/100 g; carbs ≈7–9 g; fat ≈0.1 g; protein ≈0.5–1 g; fiber ≈1–2 g.\n", | |
| "- Rice with vegetables, cooked mixed: ≈90 kcal/100 g; carbs ≈18–20 g; fat ≈0.8–1.2 g; protein ≈2–3 g; fiber ≈1 g.\n", | |
| "\n", | |
| "Food group mapping guidance:\n", | |
| "- dairy: milk, yogurt, stracciatella, coffee creamer\n", | |
| "- grain: bread, bagel, rice, pizza crust, focaccia\n", | |
| "- meat and alternatives: fish (e.g., snakehead, seabream), chicken, eggs, wurst\n", | |
| "- fruit: grapes (fresh or confit), jams/jellies, dried apple snacks\n", | |
| "- beverage: water, coffee, tea, other drinks\n", | |
| "- Include additional groups (vegetable, legume, nut and seed, fat and oils, sweets and snacks) when clearly applicable.\n", | |
| "\n", | |
| "Handling explicit target calories:\n", | |
| "- If the description includes an explicit or approximate total calorie target (e.g., “~250 cal”, “total of 700 cals”), ensure the sum over all items is within ±10% of that target. Adjust portion assumptions and per-item estimates accordingly, while staying realistic.\n", | |
| "\n", | |
| "Quality checks before output:\n", | |
| "- Verify that total calories = sum(quantity * calories) is sensible, and (if a target was stated) within ±10% of that target.\n", | |
| "- Ensure macros roughly align with calories (kcal ≈ 4*carbs + 4*protein + 9*fat).\n", | |
| "- For gram-specified amounts: compute totals by (grams/100) * per-100 g baseline; set quantity=1.0.\n", | |
| "- For counted items (pieces, slices): set quantity to the count; provide per-unit nutrition, not totals.\n", | |
| "- When portion sizes are unspecified for multi-component dishes or packaged snacks (“a bag”), assume modest, single-serve amounts consistent with the item category (e.g., dried fruit bag ≈ 40 g ≈ 140 kcal) so totals are not underestimated.\n", | |
| "\n", | |
| "Implementation tips:\n", | |
| "- Use higher-end realistic baselines for main staples and full bowls (e.g., sticky rice, hearty fish soups) to avoid undercounting when grams are large.\n", | |
| "- Treat add-ins (e.g., coffee creamer) as separate line items if they are separate components.\n", | |
| "- Do not include any explanatory text—return only the list of FoodItem(...) objects.\n", | |
| "2025/12/20 10:00:23 INFO dspy.evaluate.evaluate: Average Metric: 2.0 / 3 (66.7%)\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 6: New subsample score 2 is better than old score 1. Continue to full eval and add to candidate pool.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:23 WARNING dspy.clients.lm: LM response was truncated due to exceeding max_tokens=None. You can inspect the latest LM interactions with `dspy.inspect_history()`. To avoid truncation, consider passing a larger max_tokens when setting up dspy.LM. You may also consider increasing the temperature (currently 0.0) if the reason for truncation is repetition.\n", | |
| "2025/12/20 10:00:23 WARNING dspy.adapters.json_adapter: Failed to use structured output format, falling back to JSON mode.\n", | |
| "2025/12/20 10:00:23 ERROR dspy.utils.parallelizer: Error for Example({'food_description': '30\\u202fg protein‑powder isolate (unflavoured, Star Nutrition) with 1 tbsp organic crunchy peanut butter, 200\\u202fml Oatly milk and 100\\u202fml filtered coffee', 'food_groups': ['dairy', 'meat and alternatives'], 'total_calories': 325.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 2 validation errors for list[FoodItem]\n", | |
| "1.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='nut and seed', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error\n", | |
| "3.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='beverage', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error\n", | |
| "Traceback (most recent call last):\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/utils/parallelizer.py\", line 57, in safe_func\n", | |
| " return user_function(item)\n", | |
| " ^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/evaluate/evaluate.py\", line 172, in process_item\n", | |
| " prediction = program(**example.inputs())\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/predict/predict.py\", line 103, in __call__\n", | |
| " return super().__call__(**kwargs)\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/utils/callback.py\", line 326, in sync_wrapper\n", | |
| " return fn(instance, *args, **kwargs)\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/primitives/module.py\", line 81, in __call__\n", | |
| " return self.forward(*args, **kwargs)\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/predict/predict.py\", line 192, in forward\n", | |
| " completions = adapter(lm, lm_kwargs=config, signature=signature, demos=demos, inputs=kwargs)\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/adapters/chat_adapter.py\", line 47, in __call__\n", | |
| " return JSONAdapter()(lm, lm_kwargs, signature, demos, inputs)\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/adapters/json_adapter.py\", line 86, in __call__\n", | |
| " return super().__call__(lm, lm_kwargs, signature, demos, inputs)\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/adapters/chat_adapter.py\", line 46, in __call__\n", | |
| " raise e\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/adapters/chat_adapter.py\", line 38, in __call__\n", | |
| " return super().__call__(lm, lm_kwargs, signature, demos, inputs)\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/adapters/base.py\", line 199, in __call__\n", | |
| " return self._call_postprocess(processed_signature, signature, outputs, lm)\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/adapters/base.py\", line 135, in _call_postprocess\n", | |
| " value = self.parse(processed_signature, text)\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/utils/callback.py\", line 326, in sync_wrapper\n", | |
| " return fn(instance, *args, **kwargs)\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/adapters/json_adapter.py\", line 173, in parse\n", | |
| " fields[k] = parse_value(v, signature.output_fields[k].annotation)\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/adapters/utils.py\", line 164, in parse_value\n", | |
| " return TypeAdapter(annotation).validate_python(value)\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/pydantic/type_adapter.py\", line 421, in validate_python\n", | |
| " return self.validator.validate_python(\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| "pydantic_core._pydantic_core.ValidationError: 2 validation errors for list[FoodItem]\n", | |
| "1.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='nut and seed', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error\n", | |
| "3.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='beverage', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error\n", | |
| "\n", | |
| "2025/12/20 10:00:23 INFO dspy.evaluate.evaluate: Average Metric: 26.0 / 53 (49.1%)\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 6: Full valset score for new program: 0.49056603773584906\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 6: Full train_val score for new program: 0.49056603773584906\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 6: Individual valset scores for new program: [1, 0, 0, 0, 1, 1, 0, 1, 0, 0, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 0, 0, 0, 1, 0, 1, 0, 0, 1, 1, 1, 0, 0.0, 0, 0, 0, 0, 1]\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 6: New valset pareto front scores: [1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 1]\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 6: Full valset pareto front score: 0.7169811320754716\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 6: Updated valset pareto front programs: [{3, 4, 5}, {0, 1, 2, 3, 4, 5}, {0, 1, 2, 3, 4, 5}, {1}, {4, 5}, {1, 2, 5}, {3}, {1, 2, 3, 4, 5}, {0, 1, 2, 3, 4, 5}, {1, 3}, {0, 1, 2, 3, 4, 5}, {0, 1, 2, 3, 4, 5}, {0, 1, 2, 3, 4, 5}, {0, 1, 4}, {0, 1, 2, 3, 4, 5}, {0, 1, 2, 3, 4, 5}, {0, 1, 2, 3, 4, 5}, {3}, {1, 2, 3, 4, 5}, {1, 2, 3, 4}, {0, 1, 2, 3, 4, 5}, {1, 2, 3, 4, 5}, {0, 1, 2, 3, 4, 5}, {1, 2, 3, 4, 5}, {0, 1, 2, 3, 4, 5}, {1, 2, 3, 4, 5}, {0, 2, 3, 5}, {0, 1, 2, 3, 4, 5}, {0, 1, 2, 3, 4, 5}, {0, 1, 2, 3, 4, 5}, {0, 1, 2, 3, 4, 5}, {3, 5}, {1, 5}, {1, 2, 3, 4, 5}, {1, 4, 5}, {3, 4}, {1, 3}, {0, 1, 2, 3, 4, 5}, {0, 1, 2, 3, 4, 5}, {0, 1, 2, 3, 4, 5}, {1, 2, 3, 4, 5}, {0, 1, 2, 3}, {4}, {1, 2, 3, 4, 5}, {2, 4, 5}, {5}, {0, 1, 2, 3, 4, 5}, {1, 2, 3, 4}, {3}, {0, 1, 2, 3, 4, 5}, {0, 1, 2, 3, 4, 5}, {0, 1, 2, 3, 4, 5}, {3, 4, 5}]\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 6: Best valset aggregate score so far: 0.5471698113207547\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 6: Best program as per aggregate score on train_val: 3\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 6: Best program as per aggregate score on valset: 3\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 6: Best score on valset: 0.5471698113207547\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 6: Best score on train_val: 0.5471698113207547\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 6: Linear pareto front program index: 3\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 6: New program candidate index: 5\n", | |
| "GEPA Optimization: 59%|█████▉ | 351/592 [00:01<00:01, 223.32rollouts/s]2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 7: No merge candidates found\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 7: Selected program 4 score: 0.49056603773584906\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 1.00 / 3 (33.3%): 100%|██████████| 3/3 [00:00<00:00, 2021.68it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:23 INFO dspy.evaluate.evaluate: Average Metric: 1.0 / 3 (33.3%)\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 7: Proposed new text for self: You are performing a nutritional analysis from a short free-text food_description. Your job is to parse the described foods and amounts, estimate nutrition, and output a list of FoodItem objects.\n", | |
| "\n", | |
| "Input:\n", | |
| "- A single string field (food_description) describing foods eaten and amounts.\n", | |
| "\n", | |
| "Output:\n", | |
| "- Return ONLY a single Python-style list literal of FoodItem objects, nothing else. Example:\n", | |
| " [FoodItem(name='apple', quantity=1.0, calories=95.0, carbs=25.0, fat=0.3, protein=0.5, fiber=4.4, food_groups=['fruit'])]\n", | |
| "\n", | |
| "FoodItem fields:\n", | |
| "- name: string, concise common name (include brand if specified; e.g., \"Oatly oat milk\").\n", | |
| "- quantity: float, the number of per-unit servings you are using for this item (see quantity rules).\n", | |
| "- calories: float, calories PER ONE UNIT of quantity.\n", | |
| "- carbs, fat, protein, fiber: floats in grams PER ONE UNIT of quantity.\n", | |
| "- food_groups: list of strings (choose from: 'fruit', 'vegetable', 'grain', 'dairy', 'meat and alternatives', 'legume', 'nut and seed', 'fat and oils', 'sweets and snacks', 'beverage'). If unclear, use [].\n", | |
| "\n", | |
| "Critical rule about quantity and per-unit nutrition:\n", | |
| "- The evaluator computes totals as sum(quantity * calories). Therefore, calories and macros MUST be per ONE unit of quantity.\n", | |
| "- Safe approaches:\n", | |
| " - If the description gives a specific portion in grams/ounces/cups/etc. (e.g., “190 g bread”, “1 slice pizza”, “2 cups soup”), compute the TOTAL nutrition for that described portion and set quantity=1.0 with calories/macros equal to that total for the portion.\n", | |
| " - If the description gives a count of identical units (e.g., “2 eggs”, “15 almonds”, “2 little focaccia pieces”), you may set quantity to that count, BUT then calories/macros must be per single unit (e.g., per egg, per almond, per piece). Never provide totals alongside quantity>1, or it will double-count.\n", | |
| " - Fractions of a single unit are allowed (e.g., quantity=0.2 for “0.2 of a pastry”), but then calories/macros must be per whole unit of that item.\n", | |
| "- Never set quantity equal to grams while also providing total calories for that gram amount. Either:\n", | |
| " - Convert to a single portion: quantity=1.0 with calories/macros = totals for that gram amount; OR\n", | |
| " - Use quantity in grams AND provide calories/macros per gram (generally avoid this; prefer quantity=1.0 with totals).\n", | |
| "\n", | |
| "Interpretation and itemization rules:\n", | |
| "- Parse the description to identify distinct foods and their amounts. When amounts are in grams or milliliters, compute totals proportionally from per-100 g or per-100 ml references and set quantity=1.0.\n", | |
| "- Do not invent or add separate items unless clearly present. Treat toppings or descriptors (“with X,” “on a medium crust,” “with fruit”) as part of the same item unless the text clearly indicates an additional, separate serving.\n", | |
| "- If modifiers indicate small pieces (e.g., “little piece”), choose a conservative per-piece size and nutrition consistent with the descriptor.\n", | |
| "- Keep names clear; use singular names with quantity>1 when items are identical units; otherwise describe the whole portion with quantity=1.0.\n", | |
| "- Preserve brand names when given (e.g., “Oatly oat milk”, “Star Nutrition protein isolate”).\n", | |
| "\n", | |
| "Estimation guidance and baselines (tuned to match evaluation):\n", | |
| "- Prefer realistic, conservative estimates when portions are unspecified to avoid overestimation.\n", | |
| "- Ensure macros align with calories (kcal ≈ 4*carbs + 4*protein + 9*fat, allowing for rounding). Include fiber where reasonable.\n", | |
| "\n", | |
| "Common baselines:\n", | |
| "- Cheese pizza, general (includes crust and cheese; toppings are part of this if described):\n", | |
| " - ≈ 238 kcal/100 g; carbs ≈ 30 g, fat ≈ 9 g, protein ≈ 10 g, fiber ≈ 2 g per 100 g.\n", | |
| "- Stracciatella (fresh cheese, dairy component of burrata):\n", | |
| " - ≈ 300 kcal/100 g; carbs ≈ 2 g, fat ≈ 27 g, protein ≈ 12 g, fiber 0 g per 100 g.\n", | |
| " - If portion unspecified in a dish, assume a modest 40–60 g unless context implies more.\n", | |
| "- Confit grapes (grapes cooked in syrup; typically a small garnish):\n", | |
| " - Small garnish ≈ 30–60 kcal total per tablespoon or two; mostly carbs; fiber ≈ 0–1 g.\n", | |
| "- Focaccia, “little piece” (small bite-sized piece ≈ 15–20 g each):\n", | |
| " - ≈ 45–60 kcal per piece; carbs ≈ 8–12 g, fat ≈ 1–2 g, protein ≈ 1–2 g, fiber ≈ 0.3–0.6 g per piece.\n", | |
| " - If text says “2 little pieces,” set quantity=2 with per-piece nutrition.\n", | |
| "- Seabream, stewed/cooked:\n", | |
| " - ≈ 90 kcal/100 g; carbs 0 g, fat ≈ 2 g, protein ≈ 18 g per 100 g.\n", | |
| "- Matabala, boiled (starchy root; very low energy density when boiled):\n", | |
| " - ≈ 30 kcal/100 g; carbs ≈ 7–8 g, fat ≈ 0.1 g, protein ≈ 0.5–1 g, fiber ≈ 1–2 g per 100 g.\n", | |
| "- Rice with vegetables, cooked mixed:\n", | |
| " - ≈ 90 kcal/100 g; carbs ≈ 19 g, fat ≈ 1.0 g, protein ≈ 2.5 g, fiber ≈ 1 g per 100 g.\n", | |
| "\n", | |
| "Additional baselines (from prior tasks; use these to match evaluation expectations):\n", | |
| "- Leavened corn and wheat flour bread (cornbread-like/enriched leavened bread):\n", | |
| " - ≈ 350 kcal/100 g; carbs ≈ 57 g, fat ≈ 10 g, protein ≈ 8 g, fiber ≈ 3 g per 100 g.\n", | |
| "- Spiced butter (seasoned butter; treat as butter):\n", | |
| " - ≈ 720–730 kcal/100 g; carbs ≈ 1 g, fat ≈ 81 g, protein ≈ 1 g, fiber 0 g per 100 g.\n", | |
| "- Fish broth/stock:\n", | |
| " - ≈ 10–12 kcal/100 g; carbs 0 g, fat ≈ 0.2–0.3 g, protein ≈ 1–2 g, fiber 0 g per 100 g.\n", | |
| "- Barley flour porridge (thick cooked porridge from barley flour; water-based unless otherwise stated):\n", | |
| " - ≈ 170 kcal/100 g; carbs ≈ 35 g, fat ≈ 1.5 g, protein ≈ 3.5 g, fiber ≈ 3 g per 100 g.\n", | |
| "- Fried ripe plantains (maduros-style; use conservative oil uptake):\n", | |
| " - ≈ 133 kcal/100 g; carbs ≈ 26 g, fat ≈ 3 g, protein ≈ 1.3 g, fiber ≈ 2.2 g per 100 g.\n", | |
| "- Pecans:\n", | |
| " - One handful ≈ 28–30 g. Per 28 g: ≈ 196 kcal; carbs ≈ 3.9 g, fat ≈ 20.4 g, protein ≈ 2.6 g, fiber ≈ 2.7 g.\n", | |
| " - Per 100 g reference ≈ 691 kcal; carbs ≈ 14 g, fat ≈ 72 g, protein ≈ 9 g, fiber ≈ 10 g.\n", | |
| "- Water:\n", | |
| " - 0 kcal; carbs 0 g, fat 0 g, protein 0 g, fiber 0 g.\n", | |
| "\n", | |
| "Baselines inferred from examples (apply these to stay within expected answers):\n", | |
| "- Pain au chocolat (one standard pastry ≈ 65–75 g):\n", | |
| " - ≈ 300 kcal per pastry; carbs ≈ 33 g, fat ≈ 17 g, protein ≈ 6 g, fiber ≈ 1.5 g per pastry.\n", | |
| " - For “0.2 of a pain au chocolat,” either set quantity=0.2 with these per-pastry values, or set quantity=1.0 with totals for the 20% portion—do not double-count.\n", | |
| "- Durum falafel wrap (whole wrap with falafel, veg, typical sauces; “healthy place” still similar energy):\n", | |
| " - ≈ 550 kcal per wrap; carbs ≈ 60 g, fat ≈ 24 g, protein ≈ 16 g, fiber ≈ 8 g per wrap.\n", | |
| " - Treat as a single mixed item; do not split into separate tortilla/falafel/sauce unless clearly separate servings.\n", | |
| "- French fries, per piece:\n", | |
| " - ≈ 15 kcal per fry; carbs ≈ 1.8 g, fat ≈ 0.7 g, protein ≈ 0.2 g, fiber ≈ 0.2 g per fry.\n", | |
| " - If given a count (e.g., “10 fries”), set quantity to that count and keep per-fry nutrition.\n", | |
| "- Peanut butter (1 tablespoon ≈ 16 g):\n", | |
| " - ≈ 95 kcal per tbsp; carbs ≈ 3.5 g, fat ≈ 8.0 g, protein ≈ 4.0 g, fiber ≈ 1.5 g per tbsp.\n", | |
| "- Protein powder isolate (e.g., unflavoured whey/plant isolate; brand like Star Nutrition):\n", | |
| " - Per 30 g scoop: ≈ 110–120 kcal; carbs ≈ 1–3 g, fat ≈ 0.5–1.5 g, protein ≈ 25 g, fiber ≈ 0 g per 30 g.\n", | |
| "- Oat milk (e.g., Oatly; assume Barista unless specified):\n", | |
| " - ≈ 60 kcal/100 ml; carbs ≈ 6.5–7 g, fat ≈ 3.5 g, protein ≈ 1.0 g, fiber ≈ 0–1 g per 100 ml.\n", | |
| " - For 200 ml, compute totals and set quantity=1.0 with those totals.\n", | |
| " - Plant milks are beverages, not dairy, for food_groups.\n", | |
| "- Coffee, unsweetened black:\n", | |
| " - ≈ 2 kcal/100 ml; negligible macros; set as a beverage.\n", | |
| "\n", | |
| "Food group mapping guidance:\n", | |
| "- Breads, focaccia, rice, pizza crust, porridge (grain-based) -> ['grain'].\n", | |
| "- Cheese, yogurt, stracciatella -> ['dairy'].\n", | |
| "- Fish, chicken, eggs, protein powders/supplements -> ['meat and alternatives'].\n", | |
| "- Falafel, beans, lentils, tofu, tempeh -> ['legume'].\n", | |
| "- Peanut butter, nuts, seeds -> ['nut and seed'].\n", | |
| "- Butter and oils -> ['fat and oils'].\n", | |
| "- Grapes (fresh or confit), plantains -> ['fruit'].\n", | |
| "- Mixed items like “rice with vegetables” -> include both ['grain', 'vegetable'] when appropriate.\n", | |
| "- Water, tea, coffee, plant milks -> ['beverage'].\n", | |
| "- Avoid labeling plant milks as 'dairy'.\n", | |
| "\n", | |
| "Handling target calories in the description:\n", | |
| "- If the description includes an explicit or approximate total calorie target (e.g., “~250 cal”, “total of 700 cals”), ensure the sum over all items (sum of quantity*calories) is within ±10% of that target. Adjust portion assumptions and per-item estimates accordingly, while staying realistic.\n", | |
| "\n", | |
| "Portion and unit heuristics:\n", | |
| "- For gram- or milliliter-specified amounts: compute totals by (amount/100) * per-100 baseline; set quantity=1.0 with those totals.\n", | |
| "- For counted items (pieces, slices): set quantity to the count; provide per-unit nutrition, not totals.\n", | |
| "- For “a handful of nuts”: assume ≈ 28–30 g unless otherwise specified; set quantity=1.0 with totals for that portion.\n", | |
| "- For fractional units (e.g., “0.2 of a pastry”), you may set quantity to the fraction with per-whole-unit nutrition.\n", | |
| "- For “a glass of water”: treat as 0 kcal beverage; set quantity=1.0.\n", | |
| "\n", | |
| "Quality checks before output:\n", | |
| "- Verify total calories = sum(quantity * calories) is sensible and, if a target was stated, within ±10% of that target.\n", | |
| "- Ensure macros roughly align with calories (kcal ≈ 4*carbs + 4*protein + 9*fat, allowing for rounding and fiber).\n", | |
| "- Choose food_groups from the allowed list; avoid leaving [] when a clear mapping exists (e.g., falafel -> ['legume'], peanut butter -> ['nut and seed'], oat milk -> ['beverage']).\n", | |
| "- Do not include any explanatory text—only the list of FoodItem(...) objects.\n", | |
| "\n", | |
| "Implementation tips:\n", | |
| "- Use the baselines above when the exact item matches or is strongly implied; scale by the described grams/ml.\n", | |
| "- When ambiguity exists, choose conservative but realistic portions and energy densities that avoid overestimation while remaining consistent with the baselines and descriptors (e.g., “healthy place” does not imply a dramatically lower-calorie wrap).\n", | |
| "- Keep numeric precision reasonable (typically one decimal place is sufficient).\n", | |
| "2025/12/20 10:00:23 INFO dspy.evaluate.evaluate: Average Metric: 3.0 / 3 (100.0%)\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 7: New subsample score 3 is better than old score 1. Continue to full eval and add to candidate pool.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:23 INFO dspy.evaluate.evaluate: Average Metric: 33.0 / 53 (62.3%)\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 7: New program is on the linear pareto front\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 7: Full valset score for new program: 0.6226415094339622\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 7: Full train_val score for new program: 0.6226415094339622\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 7: Individual valset scores for new program: [1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 0, 0, 0, 1]\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 7: New valset pareto front scores: [1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1]\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 7: Full valset pareto front score: 0.7735849056603774\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 7: Updated valset pareto front programs: [{3, 4, 5, 6}, {0, 1, 2, 3, 4, 5, 6}, {0, 1, 2, 3, 4, 5, 6}, {1}, {4, 5, 6}, {1, 2, 5, 6}, {3, 6}, {1, 2, 3, 4, 5, 6}, {0, 1, 2, 3, 4, 5, 6}, {1, 3, 6}, {0, 1, 2, 3, 4, 5, 6}, {0, 1, 2, 3, 4, 5, 6}, {0, 1, 2, 3, 4, 5, 6}, {0, 1, 4}, {0, 1, 2, 3, 4, 5, 6}, {0, 1, 2, 3, 4, 5, 6}, {0, 1, 2, 3, 4, 5, 6}, {3}, {1, 2, 3, 4, 5, 6}, {1, 2, 3, 4}, {0, 1, 2, 3, 4, 5, 6}, {1, 2, 3, 4, 5, 6}, {0, 1, 2, 3, 4, 5, 6}, {1, 2, 3, 4, 5, 6}, {0, 1, 2, 3, 4, 5, 6}, {1, 2, 3, 4, 5, 6}, {0, 2, 3, 5, 6}, {6}, {6}, {0, 1, 2, 3, 4, 5, 6}, {0, 1, 2, 3, 4, 5, 6}, {3, 5}, {1, 5}, {1, 2, 3, 4, 5, 6}, {1, 4, 5, 6}, {3, 4, 6}, {1, 3, 6}, {0, 1, 2, 3, 4, 5, 6}, {0, 1, 2, 3, 4, 5, 6}, {0, 1, 2, 3, 4, 5, 6}, {1, 2, 3, 4, 5, 6}, {0, 1, 2, 3, 6}, {4, 6}, {1, 2, 3, 4, 5, 6}, {2, 4, 5, 6}, {5}, {6}, {1, 2, 3, 4, 6}, {3}, {0, 1, 2, 3, 4, 5, 6}, {0, 1, 2, 3, 4, 5, 6}, {0, 1, 2, 3, 4, 5, 6}, {3, 4, 5, 6}]\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 7: Best valset aggregate score so far: 0.6226415094339622\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 7: Best program as per aggregate score on train_val: 6\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 7: Best program as per aggregate score on valset: 6\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 7: Best score on valset: 0.6226415094339622\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 7: Best score on train_val: 0.6226415094339622\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 7: Linear pareto front program index: 6\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 7: New program candidate index: 6\n", | |
| "GEPA Optimization: 69%|██████▉ | 410/592 [00:01<00:00, 215.99rollouts/s]2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 8: No merge candidates found\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 8: Selected program 5 score: 0.49056603773584906\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 0.00 / 3 (0.0%): 100%|██████████| 3/3 [00:00<00:00, 3528.58it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:23 INFO dspy.evaluate.evaluate: Average Metric: 0.0 / 3 (0.0%)\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 8: Proposed new text for self: You are performing a nutritional analysis from a short free-text food_description. Your job is to parse the described foods and amounts, estimate nutrition, and output a list of FoodItem objects.\n", | |
| "\n", | |
| "Output format:\n", | |
| "- Return ONLY a single Python-style list literal of FoodItem objects, nothing else. Example:\n", | |
| " [FoodItem(name='apple', quantity=1.0, calories=95.0, carbs=25.0, fat=0.3, protein=0.5, fiber=4.4, food_groups=['fruit'])]\n", | |
| "- Each FoodItem has fields:\n", | |
| " - name: string, concise common name (include brand if specified; e.g., 'Careca palmier' or 'palmier do Careca').\n", | |
| " - quantity: float, the number of per-unit servings you are using for this item (see quantity rules below).\n", | |
| " - calories: float, calories PER ONE UNIT of quantity.\n", | |
| " - carbs, fat, protein, fiber: floats in grams PER ONE UNIT of quantity.\n", | |
| " - food_groups: list of strings (choose from: 'fruit', 'vegetable', 'grain', 'dairy', 'meat and alternatives', 'legume', 'nut and seed', 'fat and oils', 'sweets and snacks', 'beverage'). If unclear, use [].\n", | |
| "\n", | |
| "Critical rule about quantity and per-unit nutrition:\n", | |
| "- The evaluator computes totals as sum(quantity * calories). Therefore, calories/macros MUST be per ONE unit of quantity.\n", | |
| "- Safe approach:\n", | |
| " - If the description gives a specific portion or amount (e.g., “100 g yogurt”, “1 slice pizza”, “half sandwich”), compute the TOTAL nutrition for that described portion and set quantity=1.0 with calories/macros equal to that total for the portion.\n", | |
| " - If the description gives a count of identical units (e.g., “2 eggs”, “15 almonds”, “2 little focaccia pieces”), you may set quantity to that count, BUT then calories/macros must be per single unit (e.g., per egg, per almond, per piece). Never provide totals alongside quantity>1.\n", | |
| "- Do NOT set quantity equal to grams while also providing total calories for that gram amount. Either:\n", | |
| " - Convert to a single portion: quantity=1.0 and calories/macros = totals for that gram amount; OR\n", | |
| " - Use quantity in grams AND provide calories/macros per gram (generally avoid; prefer quantity=1.0 with totals).\n", | |
| "- Fractions like “half/mezzo/meia/mezza”: treat as a specific described portion. Set quantity=1.0 and provide the totals for that half portion. Avoid fractional quantities for these cases.\n", | |
| "\n", | |
| "Interpretation and itemization rules:\n", | |
| "- Parse the description to identify distinct foods and their amounts. When amounts are in grams, compute totals proportionally from per-100 g references and set quantity=1.0.\n", | |
| "- Do not invent or add separate items unless clearly present. Treat toppings, phrases like “with X,” or descriptors like “on a medium crust” as part of the same item unless the text indicates a separate serving.\n", | |
| "- If modifiers indicate small portions (e.g., “little focaccia pieces”), choose a conservative per-piece size consistent with the descriptor.\n", | |
| "- Keep names clear; use singular names with quantity>1 when items are identical units, otherwise describe the whole portion with quantity=1.0.\n", | |
| "- Language cues:\n", | |
| " - “um/uma” (PT), “uno/una” (IT/ES) = 1 unit.\n", | |
| " - “mezzo/mezza” (IT), “meio/meia” (PT) = half; treat as a single half-portion with quantity=1.0.\n", | |
| " - Brand/place indicators like “do/da/de [Brand]” (PT) should be kept in the name as brand context.\n", | |
| "\n", | |
| "Estimation guidance:\n", | |
| "- Prefer realistic, conservative estimates when portions are unspecified to avoid overestimation, but do not undercount obvious mains or full bowls (e.g., a large gram-specified soup should not be treated like plain broth).\n", | |
| "- Use per-100 g baselines and scale to the described grams. Adjust macros so kcal ≈ 4*carbs + 4*protein + 9*fat (allow reasonable rounding).\n", | |
| "- Packaging cues:\n", | |
| " - “a bag” of snacks typically implies a single-serve bag. For dried fruit snacks, assume 1.25–1.5 oz (35–43 g) unless otherwise specified.\n", | |
| " - “small coffee” ~8–12 fl oz.\n", | |
| " - “tablespoon” ~15–20 g for jams/spreads; use 20 g for jam unless otherwise stated.\n", | |
| "- Beverages: include as items (e.g., water, coffee). Water is 0 kcal.\n", | |
| "\n", | |
| "Category baselines (per 100 g unless noted). Use these when applicable; scale by grams if provided, or choose reasonable single-serving totals if not. When a portion is clearly a main (e.g., 500 g pasta, large hearty soup), prefer the higher end of the range to avoid underestimation.\n", | |
| "\n", | |
| "Core/common:\n", | |
| "- Water: 0 kcal; macros 0; food_groups=['beverage'].\n", | |
| "- Brewed coffee, black (small cup): 2–5 kcal per serving (8–12 fl oz); carbs/fat/protein ~0 g; food_groups=['beverage'].\n", | |
| "- Flavored coffee creamer, single-serve cup/pod (10–15 mL): ≈30–45 kcal; carbs ≈4–6 g; fat ≈1.5–2.5 g; protein ≈0 g; food_groups=['dairy'].\n", | |
| "- Regular plain bagel (≈90–105 g): ≈270–300 kcal; carbs ≈53–58 g; fat ≈1–2 g; protein ≈9–11 g; fiber ≈2–3 g; food_groups=['grain'].\n", | |
| "- Jam/jelly (per tablespoon ≈20 g): ≈50 kcal; carbs ≈13 g; fat 0 g; protein 0 g; fiber ≈0–0.5 g; food_groups=['fruit'].\n", | |
| "- Grapes, red/green: ≈69 kcal/100 g; carbs ≈18 g; fat ≈0.2 g; protein ≈0.7 g; fiber ≈0.9 g; food_groups=['fruit'].\n", | |
| "- Rice, cooked:\n", | |
| " - White rice, general: ≈130 kcal/100 g; carbs ≈28 g; fat ≈0.3 g; protein ≈2.4 g; fiber ≈0.3 g; food_groups=['grain'].\n", | |
| " - Glutinous (sticky) rice, steamed/plain (no coconut milk): ≈130–150 kcal/100 g by default when unspecified. If clearly “plain cooked,” you may use ≈100–120 kcal/100 g; choose the higher end if the portion is a main staple.\n", | |
| "- Fish and soups:\n", | |
| " - Fish, white (cooked): ≈90–120 kcal/100 g; carbs 0 g; fat ≈2–5 g; protein ≈18–22 g; food_groups=['meat and alternatives'].\n", | |
| " - Fish broth soups (hearty regional bowls, e.g., striped snakehead fish broth soup): ≈100–130 kcal/100 g unless context specifies “clear/light broth.” Use the higher end for hearty/rich soups.\n", | |
| "\n", | |
| "Additional baselines seen in tasks:\n", | |
| "- Cheese pizza, general: ≈238 kcal/100 g; carbs ≈30 g; fat ≈9 g; protein ≈10 g; fiber ≈2 g; food_groups=['grain'].\n", | |
| "- Stracciatella (fresh cheese): ≈300 kcal/100 g; carbs ≈2 g; fat ≈27 g; protein ≈12 g; fiber 0 g; food_groups=['dairy'].\n", | |
| "- Confit grapes (small garnish): 30–60 kcal total per tablespoon or two; mostly carbs; fiber ≈0–1 g; food_groups=['fruit'].\n", | |
| "- Focaccia, “little piece” (≈15–20 g each): ≈45–60 kcal per piece; carbs ≈8–12 g, fat ≈1–2 g, protein ≈1–2 g, fiber ≈0.3–0.6 g per piece; food_groups=['grain'].\n", | |
| "- Common seabream, cooked: ≈90 kcal/100 g; carbs 0 g; fat ≈2 g; protein ≈18 g; food_groups=['meat and alternatives'].\n", | |
| "- Matabala, boiled: ≈30 kcal/100 g; carbs ≈7–9 g; fat ≈0.1 g; protein ≈0.5–1 g; fiber ≈1–2 g; food_groups=['vegetable'].\n", | |
| "- Rice with vegetables, cooked mixed: ≈90 kcal/100 g; carbs ≈18–20 g; fat ≈0.8–1.2 g; protein ≈2–3 g; fiber ≈1 g; food_groups=['grain', 'vegetable'].\n", | |
| "\n", | |
| "New domain-specific baselines and clarifications (to improve accuracy on prior misses):\n", | |
| "- Palmier pastry (elephant ear; sweet puff pastry, typical bakery piece 50–60 g):\n", | |
| " - Typical single pastry total: ≈220–260 kcal; use 220 kcal when unspecified to be conservative for standard bakery items.\n", | |
| " - Macros per pastry (total): carbs ≈25–30 g; fat ≈12–15 g; protein ≈2–4 g; fiber ≈0.5–1.5 g.\n", | |
| " - Food group: use ['sweets and snacks'] for sweet pastries (not just 'grain').\n", | |
| " - Branding/place (e.g., “do Careca”): include brand in name (e.g., 'palmier do Careca').\n", | |
| "- Pasta with pesto and tomatoes (cooked mixed dish):\n", | |
| " - Use ≈180–200 kcal/100 g by default (pesto significantly raises fat density versus plain pasta).\n", | |
| " - Per 100 g macros: carbs ≈23–26 g; fat ≈7–10 g; protein ≈4–6 g; fiber ≈1.5–2.5 g.\n", | |
| " - For large gram-specified portions (e.g., 500 g), set quantity=1.0 and compute totals; 500 g should land ≈900–1000 kcal.\n", | |
| " - Food groups: ['grain', 'vegetable', 'fat and oils'].\n", | |
| "- Panino caprese (mozzarella, tomato, basil on bread; “panino” size varies by bakery):\n", | |
| " - Whole panino typical total: ≈520–620 kcal; carbs ≈55–65 g; fat ≈22–30 g; protein ≈20–30 g; fiber ≈3–5 g.\n", | |
| " - “Mezzo/mezza” (half) panino: treat as a single half-portion with quantity=1.0 and ≈260–320 kcal (half the whole).\n", | |
| " - Food groups: ['grain', 'dairy', 'vegetable'].\n", | |
| "\n", | |
| "Food group mapping guidance:\n", | |
| "- dairy: milk, yogurt, cheeses (e.g., mozzarella, stracciatella), coffee creamer\n", | |
| "- grain: bread, bagel, rice, pizza crust, focaccia, pasta\n", | |
| "- meat and alternatives: fish (e.g., snakehead, seabream), chicken, eggs, wurst\n", | |
| "- fruit: fresh fruit (e.g., grapes), jams/jellies, dried fruit snacks\n", | |
| "- sweets and snacks: sweet pastries (e.g., palmier), candies, chips\n", | |
| "- beverage: water, coffee, tea, other drinks\n", | |
| "- Include additional groups (vegetable, legume, nut and seed, fat and oils) when clearly applicable.\n", | |
| "\n", | |
| "Handling explicit target calories:\n", | |
| "- If the description includes an explicit or approximate total calorie target (e.g., “~250 cal”, “total of 700 cals”), ensure the sum over all items is within ±10% of that target. Adjust portion assumptions and per-item estimates accordingly, while staying realistic.\n", | |
| "\n", | |
| "Quality checks before output:\n", | |
| "- Verify that total calories = sum(quantity * calories) is sensible, and (if a target was stated) within ±10% of that target.\n", | |
| "- Ensure macros roughly align with calories (kcal ≈ 4*carbs + 4*protein + 9*fat).\n", | |
| "- For gram-specified amounts: compute totals by (grams/100) * per-100 g baseline; set quantity=1.0.\n", | |
| "- For counted items (pieces, slices): set quantity to the count; provide per-unit nutrition, not totals.\n", | |
| "- For fractional portions (half/mezzo/etc.): set quantity=1.0 with totals for that fraction; do not use quantity fractions.\n", | |
| "- When portion sizes are unspecified for multi-component dishes or packaged snacks (“a bag”), assume modest, single-serve amounts consistent with the item category (e.g., dried fruit bag ≈ 40 g ≈ 140 kcal) so totals are not underestimated.\n", | |
| "\n", | |
| "Implementation tips:\n", | |
| "- Use higher-end realistic baselines for main staples and full bowls (e.g., sticky rice, hearty fish soups, pesto pasta) to avoid undercounting when grams are large.\n", | |
| "- Treat add-ins (e.g., coffee creamer) as separate line items if they are separate components.\n", | |
| "- Keep names concise but include brand/place when given (e.g., 'palmier do Careca').\n", | |
| "- Do not include any explanatory text—return only the list of FoodItem(...) objects.\n", | |
| "2025/12/20 10:00:23 WARNING dspy.clients.lm: LM response was truncated due to exceeding max_tokens=None. You can inspect the latest LM interactions with `dspy.inspect_history()`. To avoid truncation, consider passing a larger max_tokens when setting up dspy.LM. You may also consider increasing the temperature (currently 0.0) if the reason for truncation is repetition.\n", | |
| "2025/12/20 10:00:23 INFO dspy.evaluate.evaluate: Average Metric: 3.0 / 3 (100.0%)\n", | |
| "2025/12/20 10:00:23 INFO dspy.teleprompt.gepa.gepa: Iteration 8: New subsample score 3 is better than old score 0. Continue to full eval and add to candidate pool.\n", | |
| "2025/12/20 10:00:23 WARNING dspy.clients.lm: LM response was truncated due to exceeding max_tokens=None. You can inspect the latest LM interactions with `dspy.inspect_history()`. To avoid truncation, consider passing a larger max_tokens when setting up dspy.LM. You may also consider increasing the temperature (currently 0.0) if the reason for truncation is repetition.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:23 WARNING dspy.clients.lm: LM response was truncated due to exceeding max_tokens=None. You can inspect the latest LM interactions with `dspy.inspect_history()`. To avoid truncation, consider passing a larger max_tokens when setting up dspy.LM. You may also consider increasing the temperature (currently 0.0) if the reason for truncation is repetition.\n", | |
| "2025/12/20 10:00:23 WARNING dspy.clients.lm: LM response was truncated due to exceeding max_tokens=None. You can inspect the latest LM interactions with `dspy.inspect_history()`. To avoid truncation, consider passing a larger max_tokens when setting up dspy.LM. You may also consider increasing the temperature (currently 0.0) if the reason for truncation is repetition.\n", | |
| "2025/12/20 10:00:24 WARNING dspy.adapters.json_adapter: Failed to use structured output format, falling back to JSON mode.\n", | |
| "2025/12/20 10:00:24 ERROR dspy.utils.parallelizer: Error for Example({'food_description': '30\\u202fg protein‑powder isolate (unflavoured, Star Nutrition) with 1 tbsp organic crunchy peanut butter, 200\\u202fml Oatly milk and 100\\u202fml filtered coffee', 'food_groups': ['dairy', 'meat and alternatives'], 'total_calories': 325.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 3 validation errors for list[FoodItem]\n", | |
| "1.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='nut and seed', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error\n", | |
| "2.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='beverage', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error\n", | |
| "3.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='beverage', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error\n", | |
| "Traceback (most recent call last):\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/utils/parallelizer.py\", line 57, in safe_func\n", | |
| " return user_function(item)\n", | |
| " ^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/evaluate/evaluate.py\", line 172, in process_item\n", | |
| " prediction = program(**example.inputs())\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/predict/predict.py\", line 103, in __call__\n", | |
| " return super().__call__(**kwargs)\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/utils/callback.py\", line 326, in sync_wrapper\n", | |
| " return fn(instance, *args, **kwargs)\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/primitives/module.py\", line 81, in __call__\n", | |
| " return self.forward(*args, **kwargs)\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/predict/predict.py\", line 192, in forward\n", | |
| " completions = adapter(lm, lm_kwargs=config, signature=signature, demos=demos, inputs=kwargs)\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/adapters/chat_adapter.py\", line 47, in __call__\n", | |
| " return JSONAdapter()(lm, lm_kwargs, signature, demos, inputs)\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/adapters/json_adapter.py\", line 86, in __call__\n", | |
| " return super().__call__(lm, lm_kwargs, signature, demos, inputs)\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/adapters/chat_adapter.py\", line 46, in __call__\n", | |
| " raise e\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/adapters/chat_adapter.py\", line 38, in __call__\n", | |
| " return super().__call__(lm, lm_kwargs, signature, demos, inputs)\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/adapters/base.py\", line 199, in __call__\n", | |
| " return self._call_postprocess(processed_signature, signature, outputs, lm)\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/adapters/base.py\", line 135, in _call_postprocess\n", | |
| " value = self.parse(processed_signature, text)\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/utils/callback.py\", line 326, in sync_wrapper\n", | |
| " return fn(instance, *args, **kwargs)\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/adapters/json_adapter.py\", line 173, in parse\n", | |
| " fields[k] = parse_value(v, signature.output_fields[k].annotation)\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/adapters/utils.py\", line 164, in parse_value\n", | |
| " return TypeAdapter(annotation).validate_python(value)\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/pydantic/type_adapter.py\", line 421, in validate_python\n", | |
| " return self.validator.validate_python(\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| "pydantic_core._pydantic_core.ValidationError: 3 validation errors for list[FoodItem]\n", | |
| "1.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='nut and seed', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error\n", | |
| "2.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='beverage', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error\n", | |
| "3.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='beverage', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error\n", | |
| "\n", | |
| "2025/12/20 10:00:24 INFO dspy.evaluate.evaluate: Average Metric: 29.0 / 53 (54.7%)\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 8: Full valset score for new program: 0.5471698113207547\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 8: Full train_val score for new program: 0.5471698113207547\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 8: Individual valset scores for new program: [1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 0, 0, 0, 1, 0, 0, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 0, 0.0, 0, 0, 0, 0, 0]\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 8: New valset pareto front scores: [1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0, 1]\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 8: Full valset pareto front score: 0.7924528301886793\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 8: Updated valset pareto front programs: [{3, 4, 5, 6, 7}, {0, 1, 2, 3, 4, 5, 6, 7}, {0, 1, 2, 3, 4, 5, 6, 7}, {1, 7}, {4, 5, 6, 7}, {1, 2, 5, 6, 7}, {3, 6, 7}, {1, 2, 3, 4, 5, 6, 7}, {0, 1, 2, 3, 4, 5, 6, 7}, {1, 3, 6, 7}, {0, 1, 2, 3, 4, 5, 6, 7}, {7}, {0, 1, 2, 3, 4, 5, 6, 7}, {0, 1, 4, 7}, {0, 1, 2, 3, 4, 5, 6, 7}, {0, 1, 2, 3, 4, 5, 6, 7}, {0, 1, 2, 3, 4, 5, 6, 7}, {3, 7}, {1, 2, 3, 4, 5, 6, 7}, {1, 2, 3, 4}, {0, 1, 2, 3, 4, 5, 6, 7}, {1, 2, 3, 4, 5, 6, 7}, {0, 1, 2, 3, 4, 5, 6, 7}, {1, 2, 3, 4, 5, 6, 7}, {0, 1, 2, 3, 4, 5, 6, 7}, {1, 2, 3, 4, 5, 6, 7}, {0, 2, 3, 5, 6, 7}, {6, 7}, {6}, {0, 1, 2, 3, 4, 5, 6, 7}, {0, 1, 2, 3, 4, 5, 6, 7}, {3, 5}, {1, 5}, {1, 2, 3, 4, 5, 6, 7}, {1, 4, 5, 6}, {3, 4, 6}, {1, 3, 6}, {0, 1, 2, 3, 4, 5, 6, 7}, {0, 1, 2, 3, 4, 5, 6, 7}, {0, 1, 2, 3, 4, 5, 6, 7}, {1, 2, 3, 4, 5, 6, 7}, {0, 1, 2, 3, 6, 7}, {4, 6}, {1, 2, 3, 4, 5, 6, 7}, {2, 4, 5, 6, 7}, {5}, {6}, {1, 2, 3, 4, 6}, {3}, {0, 1, 2, 3, 4, 5, 6, 7}, {0, 1, 2, 3, 4, 5, 6, 7}, {0, 1, 2, 3, 4, 5, 6, 7}, {3, 4, 5, 6}]\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 8: Best valset aggregate score so far: 0.6226415094339622\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 8: Best program as per aggregate score on train_val: 6\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 8: Best program as per aggregate score on valset: 6\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 8: Best score on valset: 0.6226415094339622\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 8: Best score on train_val: 0.6226415094339622\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 8: Linear pareto front program index: 6\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 8: New program candidate index: 7\n", | |
| "GEPA Optimization: 79%|███████▉ | 469/592 [00:02<00:00, 174.80rollouts/s]2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 9: No merge candidates found\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 9: Selected program 6 score: 0.6226415094339622\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 3.00 / 3 (100.0%): 100%|██████████| 3/3 [00:00<00:00, 3200.94it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:24 INFO dspy.evaluate.evaluate: Average Metric: 3.0 / 3 (100.0%)\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 9: All subsample scores perfect. Skipping.\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 9: Reflective mutation did not propose a new candidate\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 10: Selected program 7 score: 0.5471698113207547\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Average Metric: 1.00 / 3 (33.3%): 100%|██████████| 3/3 [00:00<00:00, 5180.28it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:24 INFO dspy.evaluate.evaluate: Average Metric: 1.0 / 3 (33.3%)\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 10: Proposed new text for self: You are performing a nutritional analysis from a short free-text food_description. Parse the described foods and amounts, estimate nutrition, and output a list of FoodItem objects.\n", | |
| "\n", | |
| "Output format (strict):\n", | |
| "- Return ONLY a single Python-style list literal of FoodItem objects, nothing else. Example:\n", | |
| " [FoodItem(name='apple', quantity=1.0, calories=95.0, carbs=25.0, fat=0.3, protein=0.5, fiber=4.4, food_groups=['fruit'])]\n", | |
| "- FoodItem fields:\n", | |
| " - name: concise common name (include brand/place if specified; e.g., 'palmier do Careca').\n", | |
| " - quantity: float = number of per-unit servings you are using for this item (see quantity rules).\n", | |
| " - calories: float = kcal PER ONE UNIT of quantity.\n", | |
| " - carbs, fat, protein, fiber: floats in grams PER ONE UNIT of quantity.\n", | |
| " - food_groups: list of strings chosen from:\n", | |
| " ['fruit', 'vegetable', 'grain', 'dairy', 'meat and alternatives', 'legume', 'nut and seed', 'fat and oils', 'sweets and snacks', 'beverage'].\n", | |
| " If unclear, use [].\n", | |
| "\n", | |
| "Critical rule about quantity and per-unit nutrition:\n", | |
| "- The evaluator totals calories as sum(quantity * calories). Therefore, calories/macros MUST be per ONE unit of quantity.\n", | |
| "- If a specific portion or amount is given (e.g., “100 g yogurt”, “1 slice pizza”, “half sandwich”), compute the TOTAL for that described portion and set quantity=1.0 with calories/macros equal to that total for the portion.\n", | |
| "- If a count of identical units is given (e.g., “2 eggs”, “15 almonds”, “2 little focaccia pieces”), you may set quantity to that count, BUT then calories/macros must be per single unit (not totals). Never mix totals with quantity>1.\n", | |
| "- Do NOT set quantity equal to grams while also providing totals for that gram amount. Either:\n", | |
| " - Convert to a single portion: quantity=1.0 and set totals for that gram amount; OR\n", | |
| " - Use quantity in grams AND provide calories/macros per gram (generally avoid; prefer quantity=1.0 with totals).\n", | |
| "- Fractions like “half/mezzo/meia/mezza”: treat as a single half-portion. Set quantity=1.0 and provide the totals for that half portion (avoid fractional quantities).\n", | |
| "\n", | |
| "Interpretation and itemization rules:\n", | |
| "- Parse the description to identify distinct foods and their amounts. When amounts are in grams, compute totals proportionally from per-100 g references and set quantity=1.0.\n", | |
| "- Do not invent or add separate items unless clearly present. Toppings/modifiers (“with X,” “on a medium crust”) belong to the same item unless the text indicates a separate serving.\n", | |
| "- For vague size descriptors (e.g., “little focaccia pieces”), choose a conservative per-piece size consistent with the descriptor.\n", | |
| "- Keep names clear; use singular with quantity>1 for identical units, otherwise describe the whole portion with quantity=1.0.\n", | |
| "- Language cues:\n", | |
| " - “um/uma” (PT), “uno/una” (IT/ES) = 1 unit.\n", | |
| " - “mezzo/mezza” (IT), “meio/meia” (PT) = half; treat as one half-portion with quantity=1.0.\n", | |
| " - Keep brand/place indicators (e.g., “do/da/de [Brand]”) in the name.\n", | |
| "\n", | |
| "Estimation guidance:\n", | |
| "- Prefer realistic, conservative estimates when portions are unspecified to avoid overestimation, but do not undercount obvious mains or full bowls. Use per-100 g baselines and scale by grams if provided. Adjust macros so kcal ≈ 4*carbs + 4*protein + 9*fat (allow rounding).\n", | |
| "- Packaging/serving cues:\n", | |
| " - “a bag” of snacks: assume single-serve.\n", | |
| " - Dried fruit bag: ≈35–43 g (≈140–170 kcal).\n", | |
| " - “small coffee” ≈8–12 fl oz.\n", | |
| " - “tablespoon” ≈15–20 g for jams/spreads; use 20 g for jam unless otherwise stated.\n", | |
| "- Beverages: include as items (e.g., water, coffee, soda). Water is 0 kcal. For sugar-sweetened sodas, assume a standard can (12 fl oz/355 mL) when unspecified unless context indicates otherwise.\n", | |
| "\n", | |
| "Category baselines (per 100 g unless noted). Use these when applicable; scale by grams if provided, or choose reasonable single-serving totals if not. Use higher-end realistic baselines for main staples/hearty bowls.\n", | |
| "\n", | |
| "Core/common:\n", | |
| "- Water: 0 kcal; macros 0; food_groups=['beverage'].\n", | |
| "- Brewed coffee, black (small cup 8–12 fl oz): 2–5 kcal; carbs/fat/protein ~0 g; food_groups=['beverage'].\n", | |
| "- Flavored coffee creamer, single-serve cup/pod (10–15 mL): ≈30–45 kcal; carbs ≈4–6 g; fat ≈1.5–2.5 g; protein ≈0 g; food_groups=['dairy'].\n", | |
| "- Regular plain bagel (≈90–105 g): ≈270–300 kcal; carbs ≈53–58 g; fat ≈1–2 g; protein ≈9–11 g; fiber ≈2–3 g; food_groups=['grain'].\n", | |
| "- Jam/jelly (per tablespoon ≈20 g): ≈50 kcal; carbs ≈13 g; fat 0 g; protein 0 g; fiber ≈0–0.5 g; food_groups=['fruit'].\n", | |
| "- Grapes, red/green: ≈69 kcal/100 g; carbs ≈18 g; fat ≈0.2 g; protein ≈0.7 g; fiber ≈0.9 g; food_groups=['fruit'].\n", | |
| "- Rice, cooked:\n", | |
| " - White rice: ≈130 kcal/100 g; carbs ≈28 g; fat ≈0.3 g; protein ≈2.4 g; fiber ≈0.3 g; food_groups=['grain'].\n", | |
| " - Glutinous (sticky) rice, steamed/plain: ≈130–150 kcal/100 g by default when unspecified (use higher end if a main staple).\n", | |
| "- Fish and soups:\n", | |
| " - Fish, white (cooked): ≈90–120 kcal/100 g; carbs 0 g; fat ≈2–5 g; protein ≈18–22 g; food_groups=['meat and alternatives'].\n", | |
| " - Hearty fish broth soups (e.g., striped snakehead fish broth soup): ≈100–130 kcal/100 g unless clearly “light/clear.”\n", | |
| "- Cheese pizza, general: ≈238 kcal/100 g; carbs ≈30 g; fat ≈9 g; protein ≈10 g; fiber ≈2 g; food_groups=['grain'].\n", | |
| "- Stracciatella (fresh cheese): ≈300 kcal/100 g; carbs ≈2 g; fat ≈27 g; protein ≈12 g; fiber 0 g; food_groups=['dairy'].\n", | |
| "- Confit grapes (small garnish): 30–60 kcal total per tablespoon or two; mostly carbs; fiber ≈0–1 g; food_groups=['fruit'].\n", | |
| "- Focaccia, “little piece” (≈15–20 g each): ≈45–60 kcal per piece; carbs ≈8–12 g; fat ≈1–2 g; protein ≈1–2 g; fiber ≈0.3–0.6 g; food_groups=['grain'].\n", | |
| "- Common seabream, cooked: ≈90 kcal/100 g; carbs 0 g; fat ≈2 g; protein ≈18 g; food_groups=['meat and alternatives'].\n", | |
| "- Matabala, boiled: ≈30 kcal/100 g; carbs ≈7–9 g; fat ≈0.1 g; protein ≈0.5–1 g; fiber ≈1–2 g; food_groups=['vegetable'].\n", | |
| "- Rice with vegetables, cooked mixed: ≈90 kcal/100 g; carbs ≈18–20 g; fat ≈0.8–1.2 g; protein ≈2–3 g; fiber ≈1 g; food_groups=['grain', 'vegetable'].\n", | |
| "- Palmier pastry (elephant ear; 50–60 g typical single pastry total): ≈220–260 kcal (use 220 kcal if unspecified to be conservative); carbs ≈25–30 g; fat ≈12–15 g; protein ≈2–4 g; fiber ≈0.5–1.5 g; food_groups=['sweets and snacks'].\n", | |
| "- Pasta with pesto and tomatoes (cooked mixed dish): ≈180–200 kcal/100 g; carbs ≈23–26 g; fat ≈7–10 g; protein ≈4–6 g; fiber ≈1.5–2.5 g; food_groups=['grain', 'vegetable', 'fat and oils'].\n", | |
| "- Panino caprese (whole): ≈520–620 kcal; carbs ≈55–65 g; fat ≈22–30 g; protein ≈20–30 g; fiber ≈3–5 g; ['grain', 'dairy', 'vegetable'].\n", | |
| " - “Mezzo/mezza” (half): treat as single half-portion with ≈260–320 kcal; set quantity=1.0.\n", | |
| "\n", | |
| "Additional baselines and clarifications (derived from prior tasks/errors):\n", | |
| "- Mixed stew with chicken, potatoes, vegetables: ≈85–95 kcal/100 g (use ≈90 kcal/100 g by default unless clearly creamy/rich). Per 100 g macros: carbs ≈8–10 g; fat ≈3–4 g; protein ≈5–7 g; fiber ≈1–2 g; food_groups=['meat and alternatives', 'vegetable'].\n", | |
| " - Example: 300 g → ≈270 kcal total; set quantity=1.0 with totals for that portion.\n", | |
| "- Takeout kebab sandwich on baguette (doner-style meat + bread + veg + sauces): typical whole sandwich total ≈550–650 kcal when unspecified; carbs ≈50–60 g; fat ≈22–30 g; protein ≈25–35 g; fiber ≈4–6 g; food_groups=['grain', 'meat and alternatives', 'vegetable'].\n", | |
| "- Fries (takeout/fast-food):\n", | |
| " - Per 100 g ≈300–320 kcal; carbs ≈40 g; fat ≈15–17 g; protein ≈3–4 g; fiber ≈3–4 g; food_groups=['vegetable'].\n", | |
| " - When unspecified in a combo, assume a small/regular single-serve: ≈85–100 g (≈250–320 kcal).\n", | |
| "- Soda (sugar-sweetened; e.g., mango soda): standard can (12 fl oz/355 mL) ≈140–160 kcal; carbs ≈35–41 g; fat 0 g; protein 0 g; fiber 0 g; food_groups=['beverage'].\n", | |
| " - Do NOT tag sodas/soft drinks as 'fruit' or 'vegetable' even if fruit-flavored; use only ['beverage'].\n", | |
| "- Toast with cheese and butter (per toast/slice): ≈170–200 kcal; carbs ≈16–20 g; fat ≈9–11 g; protein ≈6–8 g; fiber ≈1–2 g; food_groups=['grain', 'dairy'].\n", | |
| " - For “2 toasts with cheese and butter,” you may set quantity=2.0 with per-toast nutrition.\n", | |
| "\n", | |
| "Food group mapping guidance:\n", | |
| "- dairy: milk, yogurt, cheeses (mozzarella, stracciatella), coffee creamer.\n", | |
| "- grain: bread, bagel, rice, pizza crust, focaccia, pasta.\n", | |
| "- meat and alternatives: fish (e.g., snakehead, seabream), chicken, eggs, wurst, kebab meat.\n", | |
| "- fruit: fresh fruit, jams/jellies, dried fruit snacks (not sodas).\n", | |
| "- sweets and snacks: sweet pastries (e.g., palmier), candies, chips.\n", | |
| "- beverage: water, coffee/tea, sodas and soft drinks, juices.\n", | |
| "- Include additional groups (vegetable, legume, nut and seed, fat and oils) when clearly applicable.\n", | |
| "\n", | |
| "Handling explicit target calories:\n", | |
| "- If the description includes an explicit/approximate total calorie target (e.g., “~250 cal”, “total of 700 cals”), ensure the sum over all items is within ±10% of that target. Adjust portion assumptions and per-item estimates accordingly, while staying realistic.\n", | |
| "\n", | |
| "Quality checks before output:\n", | |
| "- Verify totals make sense: sum(quantity * calories) should be sensible and (if a target was stated) within ±10% of the target.\n", | |
| "- Ensure macros roughly align with calories (kcal ≈ 4*carbs + 4*protein + 9*fat).\n", | |
| "- For gram-specified amounts: compute totals by (grams/100) * per-100 g baseline; set quantity=1.0.\n", | |
| "- For counted items (pieces, slices): set quantity to the count; provide per-unit nutrition, not totals.\n", | |
| "- For fractional portions (half/mezzo/etc.): set quantity=1.0 with totals for that fraction.\n", | |
| "- When portions are unspecified for takeout combos or packaged snacks, assume modest, single-serve amounts (e.g., small fries, 12-oz soda, typical single sandwich) to avoid overestimation.\n", | |
| "\n", | |
| "Implementation tips:\n", | |
| "- Use higher-end realistic baselines for main staples and hearty bowls (e.g., sticky rice, pesto pasta, hearty soups).\n", | |
| "- Treat separate add-ins (e.g., coffee creamer) as separate items.\n", | |
| "- Keep names concise but include brand/place when given (e.g., 'palmier do Careca').\n", | |
| "- Do not include any explanatory text—return only the list of FoodItem(...) objects.\n", | |
| "2025/12/20 10:00:24 INFO dspy.evaluate.evaluate: Average Metric: 3.0 / 3 (100.0%)\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 10: New subsample score 3 is better than old score 1. Continue to full eval and add to candidate pool.\n", | |
| "2025/12/20 10:00:24 WARNING dspy.clients.lm: LM response was truncated due to exceeding max_tokens=None. You can inspect the latest LM interactions with `dspy.inspect_history()`. To avoid truncation, consider passing a larger max_tokens when setting up dspy.LM. You may also consider increasing the temperature (currently 0.0) if the reason for truncation is repetition.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:24 WARNING dspy.clients.lm: LM response was truncated due to exceeding max_tokens=None. You can inspect the latest LM interactions with `dspy.inspect_history()`. To avoid truncation, consider passing a larger max_tokens when setting up dspy.LM. You may also consider increasing the temperature (currently 0.0) if the reason for truncation is repetition.\n", | |
| "2025/12/20 10:00:24 INFO dspy.evaluate.evaluate: Average Metric: 35.0 / 53 (66.0%)\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 10: New program is on the linear pareto front\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 10: Full valset score for new program: 0.660377358490566\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 10: Full train_val score for new program: 0.660377358490566\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 10: Individual valset scores for new program: [1, 0, 0, 1, 0, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 0, 0, 1, 1, 0, 0, 1, 0, 0, 1, 0, 1]\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 10: New valset pareto front scores: [1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1]\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 10: Full valset pareto front score: 0.8113207547169812\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 10: Updated valset pareto front programs: [{3, 4, 5, 6, 7, 8}, {0, 1, 2, 3, 4, 5, 6, 7, 8}, {0, 1, 2, 3, 4, 5, 6, 7, 8}, {8, 1, 7}, {4, 5, 6, 7}, {1, 2, 5, 6, 7, 8}, {8, 3, 6, 7}, {1, 2, 3, 4, 5, 6, 7, 8}, {0, 1, 2, 3, 4, 5, 6, 7, 8}, {1, 3, 6, 7, 8}, {0, 1, 2, 3, 4, 5, 6, 7, 8}, {8, 7}, {0, 1, 2, 3, 4, 5, 6, 7, 8}, {0, 1, 4, 7, 8}, {0, 1, 2, 3, 4, 5, 6, 7, 8}, {0, 1, 2, 3, 4, 5, 6, 7, 8}, {0, 1, 2, 3, 4, 5, 6, 7, 8}, {8, 3, 7}, {1, 2, 3, 4, 5, 6, 7, 8}, {1, 2, 3, 4}, {0, 1, 2, 3, 4, 5, 6, 7, 8}, {1, 2, 3, 4, 5, 6, 7, 8}, {0, 1, 2, 3, 4, 5, 6, 7, 8}, {1, 2, 3, 4, 5, 6, 7, 8}, {0, 1, 2, 3, 4, 5, 6, 7, 8}, {1, 2, 3, 4, 5, 6, 7, 8}, {0, 2, 3, 5, 6, 7, 8}, {8, 6, 7}, {8, 6}, {0, 1, 2, 3, 4, 5, 6, 7, 8}, {0, 1, 2, 3, 4, 5, 6, 7, 8}, {8, 3, 5}, {1, 5}, {1, 2, 3, 4, 5, 6, 7, 8}, {1, 4, 5, 6, 8}, {8, 3, 4, 6}, {8, 1, 3, 6}, {0, 1, 2, 3, 4, 5, 6, 7, 8}, {0, 1, 2, 3, 4, 5, 6, 7, 8}, {0, 1, 2, 3, 4, 5, 6, 7, 8}, {1, 2, 3, 4, 5, 6, 7, 8}, {0, 1, 2, 3, 6, 7}, {4, 6}, {1, 2, 3, 4, 5, 6, 7, 8}, {2, 4, 5, 6, 7, 8}, {5}, {6}, {1, 2, 3, 4, 6, 8}, {3}, {0, 1, 2, 3, 4, 5, 6, 7, 8}, {8}, {0, 1, 2, 3, 4, 5, 6, 7, 8}, {3, 4, 5, 6, 8}]\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 10: Best valset aggregate score so far: 0.660377358490566\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 10: Best program as per aggregate score on train_val: 8\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 10: Best program as per aggregate score on valset: 8\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 10: Best score on valset: 0.660377358490566\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 10: Best score on train_val: 0.660377358490566\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 10: Linear pareto front program index: 8\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 10: New program candidate index: 8\n", | |
| "GEPA Optimization: 90%|████████▉ | 531/592 [00:02<00:00, 167.84rollouts/s]2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 11: No merge candidates found\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 11: Selected program 5 score: 0.49056603773584906\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:00<00:00, 2405.91it/s] " | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:24 INFO dspy.evaluate.evaluate: Average Metric: 2.0 / 3 (66.7%)\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 11: Proposed new text for self: You are performing a nutritional analysis from a short free-text food_description. Your job is to parse the described foods and amounts, estimate nutrition, and output a list of FoodItem objects.\n", | |
| "\n", | |
| "Output format:\n", | |
| "- Return ONLY a single Python-style list literal of FoodItem objects, nothing else. Example:\n", | |
| " [FoodItem(name='apple', quantity=1.0, calories=95.0, carbs=25.0, fat=0.3, protein=0.5, fiber=4.4, food_groups=['fruit'])]\n", | |
| "- Each FoodItem has fields:\n", | |
| " - name: string, concise common name (brand if specified).\n", | |
| " - quantity: float, the number of per-unit servings you are using for this item (see quantity rules below).\n", | |
| " - calories: float, calories PER ONE UNIT of quantity.\n", | |
| " - carbs, fat, protein, fiber: floats in grams PER ONE UNIT of quantity.\n", | |
| " - food_groups: list of strings (choose from: 'fruit', 'vegetable', 'grain', 'dairy', 'meat and alternatives', 'legume', 'nut and seed', 'fat and oils', 'sweets and snacks', 'beverage'). If unclear, use [].\n", | |
| "\n", | |
| "Critical rule about quantity and per-unit nutrition:\n", | |
| "- The evaluator computes totals as sum(quantity * calories). Therefore, calories/macros MUST be per ONE unit of quantity.\n", | |
| "- Safe approach:\n", | |
| " - If the description gives a specific amount (e.g., “100 g yogurt”, “30 g wurst”, “1 slice pizza”), compute the TOTAL nutrition for that described amount and set quantity=1.0 with calories/macros equal to that total for the portion.\n", | |
| " - If the description gives a count of identical units (e.g., “2 eggs”, “15 almonds”, “2 little focaccia pieces”, “2 cookies”), you may set quantity to that count, BUT then calories/macros must be per single unit (e.g., per egg, per almond, per piece, per cookie). Never provide totals alongside quantity>1.\n", | |
| "- Never set quantity equal to grams while also providing total calories for that gram amount. Either:\n", | |
| " - Convert to a single portion: quantity=1.0 and calories/macros = totals for that gram amount; OR\n", | |
| " - Use quantity in grams AND provide calories/macros per gram (generally avoid; prefer quantity=1.0 with totals).\n", | |
| "\n", | |
| "Interpretation and itemization rules:\n", | |
| "- Parse the description to identify distinct foods and their amounts. When amounts are in grams, compute totals proportionally from per-100 g references and set quantity=1.0.\n", | |
| "- Do not invent or add separate items unless clearly present. Treat toppings, phrases like “with X,” flavors (e.g., “rosemary flavored”), or descriptors like “on a medium crust” as part of the same item unless the text indicates a separate serving.\n", | |
| "- If modifiers indicate small portions (e.g., “little focaccia pieces”), choose a conservative per-piece size consistent with the descriptor.\n", | |
| "- Keep names clear; use singular names with quantity>1 when items are identical units; otherwise describe the whole portion with quantity=1.0.\n", | |
| "- Include beverages as items (e.g., water, coffee). Water is 0 kcal.\n", | |
| "\n", | |
| "Estimation guidance:\n", | |
| "- Prefer realistic, conservative estimates when portions are unspecified to avoid overestimation, but do not undercount obvious mains or full bowls (e.g., a large gram-specified soup should not be treated like plain broth).\n", | |
| "- Use per-100 g baselines and scale to the described grams. Adjust macros so kcal ≈ 4*carbs + 4*protein + 9*fat (allow reasonable rounding).\n", | |
| "- Packaging/portion cues:\n", | |
| " - “a bag” of dried fruit snacks typically implies a single-serve bag ≈40 g.\n", | |
| " - “small coffee” ~8–12 fl oz.\n", | |
| " - “tablespoon” ~15–20 g for jams/spreads; use 20 g for jam unless otherwise stated.\n", | |
| " - “a handful” (including other languages, e.g., PT: “uma mão de”) of small dry snacks like toasts/crackers typically ≈30–40 g; use ≈35 g by default for seasoned toasts/crackers to avoid underestimation.\n", | |
| "- Be language-aware for common terms (e.g., PT: “tostas com sabor a rosmaninho” = rosemary-flavored toasts/crackers; “uma mão” = a handful).\n", | |
| "\n", | |
| "Category baselines (per 100 g unless noted), including domain-specific items seen in tasks:\n", | |
| "- Water: 0 kcal; macros 0; food_groups=['beverage'].\n", | |
| "- Brewed coffee, black (small cup): 2–5 kcal per serving (8–12 fl oz), carbs/fat/protein ~0 g; food_groups=['beverage'].\n", | |
| "- Flavored coffee creamer, single-serve cup/pod (10–15 mL): ≈30–45 kcal; carbs ≈4–6 g; fat ≈1.5–2.5 g; protein ≈0 g; food_groups=['dairy'].\n", | |
| "- Regular plain bagel (typical “regular” size ≈ 90–105 g): ≈270–300 kcal; carbs ≈53–58 g; fat ≈1–2 g; protein ≈9–11 g; fiber ≈2–3 g; food_groups=['grain'].\n", | |
| "- Jam/jelly (per tablespoon ≈20 g): ≈50 kcal; carbs ≈13 g; fat 0 g; protein 0 g; fiber ≈0–0.5 g; food_groups=['fruit'].\n", | |
| "- Grapes, red/green: ≈69 kcal/100 g; carbs ≈18 g; fat ≈0.2 g; protein ≈0.7 g; fiber ≈0.9 g; food_groups=['fruit'].\n", | |
| "- Rice, cooked:\n", | |
| " - White rice, general: ≈130 kcal/100 g; carbs ≈28 g; fat ≈0.3 g; protein ≈2.4 g; fiber ≈0.3 g.\n", | |
| " - Glutinous (sticky) rice, steamed/plain (no coconut milk): ≈130–150 kcal/100 g by default when unspecified. If clearly “plain cooked,” you may use ≈100–120 kcal/100 g; choose the higher end if the portion is a main staple to avoid underestimation.\n", | |
| "- Fish and soups:\n", | |
| " - Fish, white (cooked): ≈90–120 kcal/100 g; carbs 0 g; fat ≈2–5 g; protein ≈18–22 g.\n", | |
| " - Fish broth soups (e.g., striped snakehead fish broth soup): for gram-specified full bowls (>250 g), assume a substantive soup: ≈100–130 kcal/100 g unless “clear/light broth” is specified. Use the higher end for hearty/rich soups.\n", | |
| "- Dried apple snacks (“a bag” single-serve): assume ≈40 g per bag; ≈140 kcal per bag; carbs ≈34 g; fat ≈0–1 g; protein ≈0.5 g; fiber ≈3–5 g; food_groups=['fruit'].\n", | |
| "- Cheese pizza, general: ≈238 kcal/100 g; carbs ≈30 g; fat ≈9 g; protein ≈10 g; fiber ≈2 g.\n", | |
| "- Stracciatella (fresh cheese): ≈300 kcal/100 g; carbs ≈2 g; fat ≈27 g; protein ≈12 g; fiber 0 g.\n", | |
| "- Confit grapes (small garnish): 30–60 kcal total per tablespoon or two; mostly carbs; fiber ≈0–1 g.\n", | |
| "- Focaccia, “little piece” (≈15–20 g each): ≈45–60 kcal per piece; carbs ≈8–12 g, fat ≈1–2 g, protein ≈1–2 g, fiber ≈0.3–0.6 g per piece.\n", | |
| "- Common seabream, cooked: ≈90 kcal/100 g; carbs 0 g; fat ≈2 g; protein ≈18 g.\n", | |
| "- Matabala, boiled: ≈30 kcal/100 g; carbs ≈7–9 g; fat ≈0.1 g; protein ≈0.5–1 g; fiber ≈1–2 g.\n", | |
| "- Rice with vegetables, cooked mixed: ≈90 kcal/100 g; carbs ≈18–20 g; fat ≈0.8–1.2 g; protein ≈2–3 g; fiber ≈1 g.\n", | |
| "- Cookies (typical medium cookie ≈14 g): ≈70–80 kcal per cookie; carbs ≈9–11 g; fat ≈3–4 g; protein ≈1 g; fiber ≈0.3–0.5 g; food_groups=['sweets and snacks'].\n", | |
| "- Crackers/toasts/bagel chips, plain or flavored (e.g., rosemary toasts): ≈400–450 kcal/100 g; food_groups=['grain'].\n", | |
| " - “A handful” of small toasts/crackers: assume ≈35 g total (≈140–160 kcal using the above density).\n", | |
| "- Fruit-flavored drink powder (sugary beverage mix, dry powder only): ≈370–390 kcal/100 g; carbs ≈95–98 g; fat ≈0 g; protein ≈0 g; fiber ≈0 g; food_groups=['sweets and snacks'].\n", | |
| " - When mixed with water, count the powder as the caloric item and water as 0 kcal beverage; do not double-count the mixture.\n", | |
| "\n", | |
| "Food group mapping guidance:\n", | |
| "- dairy: milk, yogurt, stracciatella, coffee creamer\n", | |
| "- grain: bread, bagel, rice, pizza crust, focaccia, crackers/toasts\n", | |
| "- meat and alternatives: fish (e.g., snakehead, seabream), chicken, eggs, wurst\n", | |
| "- fruit: grapes (fresh or confit), jams/jellies, dried apple snacks\n", | |
| "- beverage: water, coffee, tea, other drinks (include water explicitly as beverage)\n", | |
| "- sweets and snacks: cookies, candies, sugary drink powders/snack-like sweets\n", | |
| "- Include additional groups (vegetable, legume, nut and seed, fat and oils) when clearly applicable.\n", | |
| "\n", | |
| "Handling explicit target calories:\n", | |
| "- If the description includes an explicit or approximate total calorie target (e.g., “~250 cal”, “total of 700 cals”), ensure the sum over all items is within ±10% of that target. Adjust portion assumptions and per-item estimates accordingly, while staying realistic.\n", | |
| "\n", | |
| "Quality checks before output:\n", | |
| "- Verify that total calories = sum(quantity * calories) is sensible, and (if a target was stated) within ±10% of that target.\n", | |
| "- Ensure macros roughly align with calories (kcal ≈ 4*carbs + 4*protein + 9*fat).\n", | |
| "- For gram-specified amounts: compute totals by (grams/100) * per-100 g baseline; set quantity=1.0.\n", | |
| "- For counted items (pieces, slices): set quantity to the count; provide per-unit nutrition, not totals.\n", | |
| "- When portion sizes are unspecified for multi-component dishes or packaged snacks (“a bag”, “a handful”), assume modest, single-serve amounts consistent with the item category (e.g., dried fruit bag ≈40 g ≈140 kcal; handful of toasts/crackers ≈35 g ≈150 kcal) so totals are not underestimated.\n", | |
| "\n", | |
| "Implementation tips:\n", | |
| "- Use higher-end realistic baselines for main staples and full bowls (e.g., sticky rice, hearty fish soups) to avoid undercounting when grams are large.\n", | |
| "- Treat add-ins (e.g., coffee creamer) as separate line items if they are separate components.\n", | |
| "- Do not include any explanatory text—return only the list of FoodItem(...) objects.\n", | |
| "2025/12/20 10:00:24 INFO dspy.evaluate.evaluate: Average Metric: 3.0 / 3 (100.0%)\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 11: New subsample score 3 is better than old score 2. Continue to full eval and add to candidate pool.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:24 WARNING dspy.adapters.json_adapter: Failed to use structured output format, falling back to JSON mode.\n", | |
| "2025/12/20 10:00:24 WARNING dspy.clients.lm: LM response was truncated due to exceeding max_tokens=None. You can inspect the latest LM interactions with `dspy.inspect_history()`. To avoid truncation, consider passing a larger max_tokens when setting up dspy.LM. You may also consider increasing the temperature (currently 0.0) if the reason for truncation is repetition.\n", | |
| "2025/12/20 10:00:24 WARNING dspy.adapters.json_adapter: Failed to use structured output format, falling back to JSON mode.\n", | |
| "2025/12/20 10:00:24 ERROR dspy.utils.parallelizer: Error for Example({'food_description': '30\\u202fg protein‑powder isolate (unflavoured, Star Nutrition) with 1 tbsp organic crunchy peanut butter, 200\\u202fml Oatly milk and 100\\u202fml filtered coffee', 'food_groups': ['dairy', 'meat and alternatives'], 'total_calories': 325.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 2 validation errors for list[FoodItem]\n", | |
| "1.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='nut and seed', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error\n", | |
| "3.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='beverage', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error\n", | |
| "Traceback (most recent call last):\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/utils/parallelizer.py\", line 57, in safe_func\n", | |
| " return user_function(item)\n", | |
| " ^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/evaluate/evaluate.py\", line 172, in process_item\n", | |
| " prediction = program(**example.inputs())\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/predict/predict.py\", line 103, in __call__\n", | |
| " return super().__call__(**kwargs)\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/utils/callback.py\", line 326, in sync_wrapper\n", | |
| " return fn(instance, *args, **kwargs)\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/primitives/module.py\", line 81, in __call__\n", | |
| " return self.forward(*args, **kwargs)\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/predict/predict.py\", line 192, in forward\n", | |
| " completions = adapter(lm, lm_kwargs=config, signature=signature, demos=demos, inputs=kwargs)\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/adapters/chat_adapter.py\", line 47, in __call__\n", | |
| " return JSONAdapter()(lm, lm_kwargs, signature, demos, inputs)\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/adapters/json_adapter.py\", line 86, in __call__\n", | |
| " return super().__call__(lm, lm_kwargs, signature, demos, inputs)\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/adapters/chat_adapter.py\", line 46, in __call__\n", | |
| " raise e\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/adapters/chat_adapter.py\", line 38, in __call__\n", | |
| " return super().__call__(lm, lm_kwargs, signature, demos, inputs)\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/adapters/base.py\", line 199, in __call__\n", | |
| " return self._call_postprocess(processed_signature, signature, outputs, lm)\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/adapters/base.py\", line 135, in _call_postprocess\n", | |
| " value = self.parse(processed_signature, text)\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/utils/callback.py\", line 326, in sync_wrapper\n", | |
| " return fn(instance, *args, **kwargs)\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/adapters/json_adapter.py\", line 173, in parse\n", | |
| " fields[k] = parse_value(v, signature.output_fields[k].annotation)\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/dspy/adapters/utils.py\", line 164, in parse_value\n", | |
| " return TypeAdapter(annotation).validate_python(value)\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| " File \"/Users/duarteocarmo/Repos/taralli-api/.env/lib/python3.12/site-packages/pydantic/type_adapter.py\", line 421, in validate_python\n", | |
| " return self.validator.validate_python(\n", | |
| " ^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^\n", | |
| "pydantic_core._pydantic_core.ValidationError: 2 validation errors for list[FoodItem]\n", | |
| "1.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='nut and seed', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error\n", | |
| "3.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='beverage', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error\n", | |
| "\n", | |
| "2025/12/20 10:00:24 INFO dspy.evaluate.evaluate: Average Metric: 30.0 / 53 (56.6%)\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 11: Full valset score for new program: 0.5660377358490566\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 11: Full train_val score for new program: 0.5660377358490566\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 11: Individual valset scores for new program: [1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 1, 1, 0, 0.0, 1, 0, 0, 0, 1]\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 11: New valset pareto front scores: [1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1]\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 11: Full valset pareto front score: 0.8113207547169812\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 11: Updated valset pareto front programs: [{3, 4, 5, 6, 7, 8, 9}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, {8, 1, 7}, {4, 5, 6, 7, 9}, {1, 2, 5, 6, 7, 8, 9}, {8, 3, 6, 7}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, {1, 3, 6, 7, 8, 9}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, {8, 7}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, {0, 1, 4, 7, 8, 9}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, {8, 3, 7}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, {1, 2, 3, 4}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, {0, 2, 3, 5, 6, 7, 8, 9}, {8, 6, 7}, {8, 6}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, {8, 3, 5}, {1, 5, 9}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, {1, 4, 5, 6, 8}, {3, 4, 6, 8, 9}, {1, 3, 6, 8, 9}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, {0, 1, 2, 3, 6, 7, 9}, {4, 6}, {1, 2, 3, 4, 5, 6, 7, 8, 9}, {2, 4, 5, 6, 7, 8, 9}, {9, 5}, {6}, {1, 2, 3, 4, 6, 8}, {9, 3}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, {8}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9}, {3, 4, 5, 6, 8, 9}]\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 11: Best valset aggregate score so far: 0.660377358490566\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 11: Best program as per aggregate score on train_val: 8\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 11: Best program as per aggregate score on valset: 8\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 11: Best score on valset: 0.660377358490566\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 11: Best score on train_val: 0.660377358490566\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 11: Linear pareto front program index: 8\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 11: New program candidate index: 9\n", | |
| "GEPA Optimization: 100%|█████████▉| 590/592 [00:02<00:00, 176.47rollouts/s]2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 12: No merge candidates found\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 12: Selected program 6 score: 0.6226415094339622\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 2.00 / 3 (66.7%): 100%|██████████| 3/3 [00:00<00:00, 3504.01it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:24 INFO dspy.evaluate.evaluate: Average Metric: 2.0 / 3 (66.7%)\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 12: Proposed new text for self: You are performing a nutritional analysis from a short free-text food_description. Your job is to parse the described foods and amounts, estimate nutrition, and output a list of FoodItem objects.\n", | |
| "\n", | |
| "Input:\n", | |
| "- A single string field (food_description) describing foods eaten and amounts.\n", | |
| "\n", | |
| "Output:\n", | |
| "- Return ONLY a single Python-style list literal of FoodItem objects, nothing else. Example:\n", | |
| " [FoodItem(name='apple', quantity=1.0, calories=95.0, carbs=25.0, fat=0.3, protein=0.5, fiber=4.4, food_groups=['fruit'])]\n", | |
| "\n", | |
| "FoodItem fields:\n", | |
| "- name: string, concise common name (include brand if specified; e.g., \"Oatly oat milk\").\n", | |
| "- quantity: float, the number of per-unit servings you are using for this item (see quantity rules).\n", | |
| "- calories: float, calories PER ONE UNIT of quantity.\n", | |
| "- carbs, fat, protein, fiber: floats in grams PER ONE UNIT of quantity.\n", | |
| "- food_groups: list of strings (choose from: 'fruit', 'vegetable', 'grain', 'dairy', 'meat and alternatives', 'legume', 'nut and seed', 'fat and oils', 'sweets and snacks', 'beverage'). If unclear, use [].\n", | |
| "\n", | |
| "Critical rule about quantity and per-unit nutrition:\n", | |
| "- The evaluator computes totals as sum(quantity * calories). Therefore, calories and macros MUST be per ONE unit of quantity.\n", | |
| "- Safe approaches:\n", | |
| " - If the description gives a specific portion in grams/ounces/milliliters/cups/etc. (e.g., “190 g bread”, “1 slice pizza”, “2 cups soup”, “620 ml bottle”), compute the TOTAL nutrition for that described portion and set quantity=1.0 with calories/macros equal to that total for the portion.\n", | |
| " - If the description gives a count of identical units (e.g., “2 eggs”, “15 almonds”, “2 little focaccia pieces”), you may set quantity to that count, BUT then calories/macros must be per single unit (e.g., per egg, per almond, per piece). Never provide totals alongside quantity>1, or it will double-count.\n", | |
| " - Fractions of a single unit are allowed (e.g., quantity=0.2 for “0.2 of a pastry”), but then calories/macros must be per whole unit of that item.\n", | |
| "- Never set quantity equal to grams while also providing total calories for that gram amount. Either:\n", | |
| " - Convert to a single portion: quantity=1.0 with calories/macros = totals for that gram/ml amount; OR\n", | |
| " - Use quantity in grams/ml AND provide calories/macros per gram/ml (generally avoid this; prefer quantity=1.0 with totals).\n", | |
| "\n", | |
| "Interpretation and itemization rules:\n", | |
| "- Parse the description to identify distinct foods and their amounts. When amounts are in grams or milliliters, compute totals proportionally from per-100 g or per-100 ml references and set quantity=1.0.\n", | |
| "- Do not invent or add separate items unless clearly present. Treat toppings or descriptors (“with X,” “on a medium crust,” “with fruit”) as part of the same item unless the text clearly indicates an additional, separate serving.\n", | |
| "- If modifiers indicate small pieces (e.g., “little piece”), choose a conservative per-piece size and nutrition consistent with the descriptor.\n", | |
| "- Keep names clear; use singular names with quantity>1 when items are identical units; otherwise describe the whole portion with quantity=1.0.\n", | |
| "- Preserve brand names when given (e.g., “Oatly oat milk”, “Star Nutrition protein isolate”).\n", | |
| "\n", | |
| "Packaging and default size heuristics (important for accuracy):\n", | |
| "- “Bottle” of soda: assume 20 fl oz (591 ml) unless otherwise specified (if it says “small bottle,” assume 500 ml; “can” defaults to 12 fl oz/355 ml).\n", | |
| "- “Bottle” of iced tea: if not specified as “sweetened,” assume unsweetened tea (≈ 0–2 kcal/100 ml). If explicitly “sweetened” or similar, treat like soda ≈ 42–45 kcal/100 ml.\n", | |
| "- “Small bag” of chips/crackers: assume ≈ 1.5 oz (42–45 g). A “snack-size” bag is often 1.0–1.5 oz; choose 1.5 oz unless context implies the smallest 1.0 oz bag.\n", | |
| "- If grams/ml are provided for the entire packaged item, always compute totals from that weight/volume and set quantity=1.0.\n", | |
| "\n", | |
| "Estimation guidance and baselines (tuned to match evaluation):\n", | |
| "- Prefer realistic, conservative estimates when portions are unspecified; however, when packaging implies a standard larger size (e.g., “bottle” of soda), use the standard package assumption above rather than a smaller “dieting” portion.\n", | |
| "- Ensure macros align with calories (kcal ≈ 4*carbs + 4*protein + 9*fat, allowing for rounding). Include fiber where reasonable.\n", | |
| "\n", | |
| "Core baselines (per 100 g unless stated; scale by portion and set quantity=1.0 for gram/ml-specified items):\n", | |
| "- Cheese pizza, general (includes crust and cheese; toppings are part of this if described):\n", | |
| " - ≈ 238 kcal/100 g; carbs ≈ 30 g, fat ≈ 9 g, protein ≈ 10 g, fiber ≈ 2 g per 100 g.\n", | |
| "- Stracciatella (fresh cheese, dairy component of burrata):\n", | |
| " - ≈ 300 kcal/100 g; carbs ≈ 2 g, fat ≈ 27 g, protein ≈ 12 g, fiber 0 g per 100 g.\n", | |
| " - If portion unspecified in a dish, assume a modest 40–60 g unless context implies more.\n", | |
| "- Confit grapes (garnish):\n", | |
| " - Small garnish ≈ 30–60 kcal total per tablespoon or two; mostly carbs; fiber ≈ 0–1 g.\n", | |
| "- Focaccia, “little piece” (small bite-sized piece ≈ 15–20 g each):\n", | |
| " - ≈ 45–60 kcal per piece; carbs ≈ 8–12 g, fat ≈ 1–2 g, protein ≈ 1–2 g, fiber ≈ 0.3–0.6 g per piece.\n", | |
| " - If text says “2 little pieces,” set quantity=2 with per-piece nutrition.\n", | |
| "- Seabream, stewed/cooked:\n", | |
| " - ≈ 90 kcal/100 g; carbs 0 g, fat ≈ 2 g, protein ≈ 18 g per 100 g.\n", | |
| "- Matabala, boiled (starchy root; very low energy density when boiled):\n", | |
| " - ≈ 30 kcal/100 g; carbs ≈ 7–8 g, fat ≈ 0.1 g, protein ≈ 0.5–1 g, fiber ≈ 1–2 g per 100 g.\n", | |
| "- Rice with vegetables, cooked mixed:\n", | |
| " - ≈ 90 kcal/100 g; carbs ≈ 19 g, fat ≈ 1.0 g, protein ≈ 2.5 g, fiber ≈ 1 g per 100 g.\n", | |
| "\n", | |
| "Additional baselines (from prior tasks; use these to match evaluation expectations):\n", | |
| "- Leavened corn and wheat flour bread (cornbread-like/enriched leavened bread):\n", | |
| " - ≈ 350 kcal/100 g; carbs ≈ 57 g, fat ≈ 10 g, protein ≈ 8 g, fiber ≈ 3 g per 100 g.\n", | |
| "- Spiced butter (seasoned butter; treat as butter):\n", | |
| " - ≈ 720–730 kcal/100 g; carbs ≈ 1 g, fat ≈ 81 g, protein ≈ 1 g, fiber 0 g per 100 g.\n", | |
| "- Fish broth/stock:\n", | |
| " - ≈ 10–12 kcal/100 g; carbs 0 g, fat ≈ 0.2–0.3 g, protein ≈ 1–2 g, fiber 0 g per 100 g.\n", | |
| "- Barley flour porridge (thick cooked porridge, water-based unless stated):\n", | |
| " - ≈ 170 kcal/100 g; carbs ≈ 35 g, fat ≈ 1.5 g, protein ≈ 3.5 g, fiber ≈ 3 g per 100 g.\n", | |
| "- Fried ripe plantains (maduros-style; conservative oil uptake):\n", | |
| " - ≈ 133 kcal/100 g; carbs ≈ 26 g, fat ≈ 3 g, protein ≈ 1.3 g, fiber ≈ 2.2 g per 100 g.\n", | |
| "- Pecans:\n", | |
| " - One handful ≈ 28–30 g. Per 28 g: ≈ 196 kcal; carbs ≈ 3.9 g, fat ≈ 20.4 g, protein ≈ 2.6 g, fiber ≈ 2.7 g.\n", | |
| " - Per 100 g reference ≈ 691 kcal; carbs ≈ 14 g, fat ≈ 72 g, protein ≈ 9 g, fiber ≈ 10 g.\n", | |
| "- Water:\n", | |
| " - 0 kcal; carbs 0 g, fat 0 g, protein 0 g, fiber 0 g.\n", | |
| "\n", | |
| "Baselines inferred from examples (apply these to stay within expected answers):\n", | |
| "- Chicken submarine sandwich with cheese/veg/spread (whole mixed item; use gram/ml scaling when provided):\n", | |
| " - ≈ 200 kcal/100 g; carbs ≈ 20 g, fat ≈ 9 g, protein ≈ 10 g, fiber ≈ 1 g per 100 g.\n", | |
| " - If total weight is given (e.g., “520 g”), compute totals and set quantity=1.0 with those totals.\n", | |
| "- Iced green tea, bottled:\n", | |
| " - If not explicitly “sweetened,” assume unsweetened ≈ 2 kcal/100 ml; carbs/fat/protein ≈ 0; fiber 0; food_groups=['beverage'].\n", | |
| " - If clearly sweetened, use soda baseline below.\n", | |
| "- Cola (regular, not diet):\n", | |
| " - ≈ 42–43 kcal/100 ml; carbs ≈ 10.6–11 g/100 ml; fat/protein/fiber ≈ 0.\n", | |
| " - Packaging defaults: can 355 ml ≈ 150 kcal; bottle 591 ml ≈ 240–255 kcal; 500 ml ≈ 210–220 kcal. Use “bottle”=591 ml unless stated otherwise.\n", | |
| "- Cheez-It crackers, small bag:\n", | |
| " - “Small bag/snack bag” ≈ 42–45 g: ≈ 200–240 kcal total; carbs ≈ 25–30 g, fat ≈ 10–12 g, protein ≈ 4–5 g, fiber ≈ 1–2 g. food_groups=['grain','dairy'].\n", | |
| " - Per 28 g (1 oz) reference ≈ 150 kcal; carbs ≈ 17 g, fat ≈ 8 g, protein ≈ 3 g, fiber ≈ 1 g.\n", | |
| "- Chocolate ice cream sandwich (prepackaged; whole sandwich):\n", | |
| " - ≈ 270 kcal/100 g. If a 75 g sandwich: ≈ 200–205 kcal total.\n", | |
| " - Macro profile per 100 g typical: carbs ≈ 34 g, fat ≈ 13 g, protein ≈ 4 g, fiber ≈ 1 g. Scale by weight.\n", | |
| "\n", | |
| "Other useful baselines (from prior tasks; apply as described):\n", | |
| "- Pain au chocolat (one standard pastry ≈ 65–75 g):\n", | |
| " - ≈ 300 kcal per pastry; carbs ≈ 33 g, fat ≈ 17 g, protein ≈ 6 g, fiber ≈ 1.5 g per pastry.\n", | |
| " - For “0.2 of a pain au chocolat,” either set quantity=0.2 with these per-pastry values, or set quantity=1.0 with totals for the 20% portion—do not double-count.\n", | |
| "- Durum falafel wrap (whole wrap with falafel, veg, typical sauces):\n", | |
| " - ≈ 550 kcal per wrap; carbs ≈ 60 g, fat ≈ 24 g, protein ≈ 16 g, fiber ≈ 8 g per wrap.\n", | |
| " - Treat as a single mixed item; do not split into tortilla/falafel/sauce unless clearly separate servings.\n", | |
| "- French fries, per piece:\n", | |
| " - ≈ 15 kcal per fry; carbs ≈ 1.8 g, fat ≈ 0.7 g, protein ≈ 0.2 g, fiber ≈ 0.2 g per fry.\n", | |
| " - If given a count (e.g., “10 fries”), set quantity to that count and keep per-fry nutrition.\n", | |
| "- Peanut butter (1 tablespoon ≈ 16 g):\n", | |
| " - ≈ 95 kcal per tbsp; carbs ≈ 3.5 g, fat ≈ 8.0 g, protein ≈ 4.0 g, fiber ≈ 1.5 g per tbsp.\n", | |
| "- Protein powder isolate (e.g., unflavoured whey/plant isolate; brand like Star Nutrition):\n", | |
| " - Per 30 g scoop: ≈ 110–120 kcal; carbs ≈ 1–3 g, fat ≈ 0.5–1.5 g, protein ≈ 25 g, fiber ≈ 0 g per 30 g.\n", | |
| "- Oat milk (e.g., Oatly; assume Barista unless specified):\n", | |
| " - ≈ 60 kcal/100 ml; carbs ≈ 6.5–7 g, fat ≈ 3.5 g, protein ≈ 1.0 g, fiber ≈ 0–1 g per 100 ml.\n", | |
| " - For specified ml (e.g., 200 ml), compute totals and set quantity=1.0 with those totals.\n", | |
| " - Plant milks are beverages, not dairy, for food_groups.\n", | |
| "- Coffee, unsweetened black:\n", | |
| " - ≈ 2 kcal/100 ml; negligible macros; set as a beverage.\n", | |
| "\n", | |
| "Food group mapping guidance:\n", | |
| "- Breads, focaccia, rice, pizza crust, porridge, crackers -> ['grain'].\n", | |
| "- Cheese, yogurt, stracciatella -> ['dairy'].\n", | |
| "- Fish, chicken, eggs, protein powders/supplements -> ['meat and alternatives'].\n", | |
| "- Falafel, beans, lentils, tofu, tempeh -> ['legume'].\n", | |
| "- Peanut butter, nuts, seeds -> ['nut and seed'].\n", | |
| "- Butter and oils -> ['fat and oils'].\n", | |
| "- Grapes (fresh or confit), plantains -> ['fruit'].\n", | |
| "- Mixed items like “rice with vegetables” -> include both ['grain', 'vegetable'] when appropriate.\n", | |
| "- Water, tea, coffee, plant milks, soda, iced tea -> ['beverage'].\n", | |
| "- Avoid labeling plant milks as 'dairy'.\n", | |
| "\n", | |
| "Handling target calories in the description:\n", | |
| "- If the description includes an explicit or approximate total calorie target (e.g., “~250 cal”, “total of 700 cals”), ensure the sum over all items (sum of quantity*calories) is within ±10% of that target. Adjust portion assumptions and per-item estimates accordingly, while staying realistic.\n", | |
| "\n", | |
| "Portion and unit heuristics:\n", | |
| "- For gram- or milliliter-specified amounts: compute totals by (amount/100) * per-100 baseline; set quantity=1.0 with those totals.\n", | |
| "- For counted items (pieces, slices): set quantity to the count; provide per-unit nutrition, not totals.\n", | |
| "- For “a handful of nuts”: assume ≈ 28–30 g unless otherwise specified; set quantity=1.0 with totals for that portion.\n", | |
| "- For fractional units (e.g., “0.2 of a pastry”), you may set quantity to the fraction with per-whole-unit nutrition.\n", | |
| "- For “a glass of water”: treat as 0 kcal beverage; set quantity=1.0.\n", | |
| "\n", | |
| "Quality checks before output:\n", | |
| "- Verify total calories = sum(quantity * calories) is sensible and, if a target was stated, within ±10% of that target.\n", | |
| "- Ensure macros roughly align with calories (kcal ≈ 4*carbs + 4*protein + 9*fat, allowing for rounding and fiber).\n", | |
| "- Choose food_groups from the allowed list; avoid leaving [] when a clear mapping exists (e.g., cola -> ['beverage'], Cheez-It -> ['grain','dairy'], falafel -> ['legume'], peanut butter -> ['nut and seed'], oat milk -> ['beverage']).\n", | |
| "- Do not include any explanatory text—only the list of FoodItem(...) objects.\n", | |
| "2025/12/20 10:00:24 INFO dspy.evaluate.evaluate: Average Metric: 3.0 / 3 (100.0%)\n", | |
| "2025/12/20 10:00:24 INFO dspy.teleprompt.gepa.gepa: Iteration 12: New subsample score 3 is better than old score 2. Continue to full eval and add to candidate pool.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:25 INFO dspy.evaluate.evaluate: Average Metric: 33.0 / 53 (62.3%)\n", | |
| "2025/12/20 10:00:25 INFO dspy.teleprompt.gepa.gepa: Iteration 12: Full valset score for new program: 0.6226415094339622\n", | |
| "2025/12/20 10:00:25 INFO dspy.teleprompt.gepa.gepa: Iteration 12: Full train_val score for new program: 0.6226415094339622\n", | |
| "2025/12/20 10:00:25 INFO dspy.teleprompt.gepa.gepa: Iteration 12: Individual valset scores for new program: [1, 0, 0, 0, 1, 1, 0, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 1, 1, 0, 1, 1, 1, 1, 0, 0, 1, 0, 1, 0, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1]\n", | |
| "2025/12/20 10:00:25 INFO dspy.teleprompt.gepa.gepa: Iteration 12: New valset pareto front scores: [1, 0, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 1, 0, 1, 1, 1, 1, 1, 1, 0, 1, 0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 1]\n", | |
| "2025/12/20 10:00:25 INFO dspy.teleprompt.gepa.gepa: Iteration 12: Full valset pareto front score: 0.8301886792452831\n", | |
| "2025/12/20 10:00:25 INFO dspy.teleprompt.gepa.gepa: Iteration 12: Updated valset pareto front programs: [{3, 4, 5, 6, 7, 8, 9, 10}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {8, 1, 7}, {4, 5, 6, 7, 9, 10}, {1, 2, 5, 6, 7, 8, 9, 10}, {8, 3, 6, 7}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 3, 6, 7, 8, 9, 10}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {8, 7}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {0, 1, 4, 7, 8, 9}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {8, 3, 7}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 2, 3, 4, 10}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {0, 2, 3, 5, 6, 7, 8, 9, 10}, {8, 6, 7}, {8, 6}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {8, 10, 3, 5}, {1, 5, 9}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 4, 5, 6, 8, 10}, {3, 4, 6, 8, 9, 10}, {1, 3, 6, 8, 9, 10}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {0, 1, 2, 3, 6, 7, 9, 10}, {10, 4, 6}, {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {2, 4, 5, 6, 7, 8, 9, 10}, {9, 5}, {10, 6}, {1, 2, 3, 4, 6, 8, 10}, {9, 3}, {10}, {8}, {0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10}, {3, 4, 5, 6, 8, 9, 10}]\n", | |
| "2025/12/20 10:00:25 INFO dspy.teleprompt.gepa.gepa: Iteration 12: Best valset aggregate score so far: 0.660377358490566\n", | |
| "2025/12/20 10:00:25 INFO dspy.teleprompt.gepa.gepa: Iteration 12: Best program as per aggregate score on train_val: 8\n", | |
| "2025/12/20 10:00:25 INFO dspy.teleprompt.gepa.gepa: Iteration 12: Best program as per aggregate score on valset: 8\n", | |
| "2025/12/20 10:00:25 INFO dspy.teleprompt.gepa.gepa: Iteration 12: Best score on valset: 0.660377358490566\n", | |
| "2025/12/20 10:00:25 INFO dspy.teleprompt.gepa.gepa: Iteration 12: Best score on train_val: 0.660377358490566\n", | |
| "2025/12/20 10:00:25 INFO dspy.teleprompt.gepa.gepa: Iteration 12: Linear pareto front program index: 8\n", | |
| "2025/12/20 10:00:25 INFO dspy.teleprompt.gepa.gepa: Iteration 12: New program candidate index: 10\n", | |
| "GEPA Optimization: 100%|█████████▉| 590/592 [00:03<00:00, 184.39rollouts/s]\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 29.00 / 54 (53.7%): 100%|██████████| 54/54 [00:00<00:00, 216.75it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:25 INFO dspy.evaluate.evaluate: Average Metric: 29.0 / 54 (53.7%)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "EvaluationResult(score=53.7, results=<list of 54 results>)\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "gpt_5 = dspy.LM(model=\"openai/gpt-5\", temperature=1.0, max_tokens=32000)\n", | |
| "gepa = dspy.GEPA(\n", | |
| " metric=eval_metric,\n", | |
| " auto=\"light\",\n", | |
| " reflection_lm=gpt_5,\n", | |
| " num_threads=32,\n", | |
| ")\n", | |
| "\n", | |
| "module_gepa = gepa.compile(module_vanilla, trainset=trainset)\n", | |
| "optimizer_program_score = evaluate(module_gepa)\n", | |
| "er.add_result(\"gepa_gemini_2.5_flash\", optimizer_program_score.score)\n", | |
| "print(optimizer_program_score)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 14, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "gepa_gemini_2.5_flash: 53.7\n", | |
| "optimized_gemini_2.5_flash: 44.44\n", | |
| "bootstrap_fewshot_random_search_gemini_2.5_flash: 31.48\n", | |
| "vanilla_gemini_2.5_flash: 29.63\n", | |
| "mipro_v2_gemini_2.5_flash: 29.63\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "er.print_results()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 15, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Evaluating optimized for deepseek/deepseek-chat\n", | |
| "Average Metric: 31.00 / 54 (57.4%): 100%|██████████| 54/54 [00:00<00:00, 713.06it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:25 INFO dspy.evaluate.evaluate: Average Metric: 31.0 / 54 (57.4%)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Evaluating gepa for deepseek/deepseek-chat\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:25 ERROR dspy.utils.parallelizer: Error for Example({'food_description': \"For lunch, I picked up a 165-gram double cheeseburger from McDonald's.\", 'food_groups': ['grain', 'meat and alternatives', 'dairy'], 'total_calories': 465.3, 'source': 'nutribench'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value='FoodItem(name=\"McDonald\\... alternatives\", \"dairy\"', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 0.00 / 0 (0%): 0%| | 0/54 [00:00<?, ?it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:25 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'pasta with pesto and tomatoes', 'food_groups': ['vegetable', 'grain', 'dairy'], 'total_calories': 339.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value=\"FoodItem(name='pasta wit...etable', 'fat and oils'\", input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 0.00 / 0 (0%): 2%|▏ | 1/54 [00:00<00:01, 50.91it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:25 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'a couple of handfuls of chocolate granola to snack on', 'food_groups': ['grain'], 'total_calories': 280.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value=\"FoodItem(name='chocolate...n', 'sweets and snacks'\", input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 0.00 / 0 (0%): 4%|▎ | 2/54 [00:00<00:00, 73.09it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:25 ERROR dspy.utils.parallelizer: Error for Example({'food_description': \"For dinner, I'm having 25 grams of bread, 150 grams of chicken wings, and a 250-gram mixed vegetable salad.\", 'food_groups': ['grain', 'meat and alternatives', 'vegetable'], 'total_calories': 633.0, 'source': 'nutribench'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value='vegetable', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:25 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'I snacked on 35 grams of dry pastry with 3 grams of margarine, plus a drink of 100 grams of apricot nectar and 30 grams of wheat bread.', 'food_groups': ['grain', 'dairy', 'fruit'], 'total_calories': 289.0, 'source': 'nutribench'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value='grain', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 0.00 / 0 (0%): 6%|▌ | 3/54 [00:00<00:00, 81.83it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:25 ERROR dspy.utils.parallelizer: Error for Example({'food_description': '100\\u202fg of peanut butter', 'food_groups': ['meat and alternatives'], 'total_calories': 588.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value=\"FoodItem(name='peanut bu..._groups=['nut and seed'\", input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 0.00 / 0 (0%): 11%|█ | 6/54 [00:00<00:00, 98.03it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:25 ERROR dspy.utils.parallelizer: Error for Example({'food_description': '3 buns with peanut butter and jelly (small)', 'food_groups': ['fruit', 'grain', 'meat and alternatives'], 'total_calories': 615.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value=\"FoodItem(name='bun with ...'nut and seed', 'fruit'\", input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:25 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'This morning, I had a 31g cereal bar and a 248g nutritional shake for breakfast.', 'food_groups': ['grain', 'dairy'], 'total_calories': 406.41, 'source': 'nutribench'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value='beverage', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 0.00 / 0 (0%): 15%|█▍ | 8/54 [00:00<00:00, 118.76it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:25 ERROR dspy.utils.parallelizer: Error for Example({'food_description': '200\\u202fg of pasta with pesto and tomatoes', 'food_groups': ['vegetable', 'grain', 'dairy'], 'total_calories': 430.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value=\"FoodItem(name='pasta wit...etable', 'fat and oils'\", input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:25 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'apple', 'food_groups': ['fruit'], 'total_calories': 95.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value=\"FoodItem(name='apple', q...4, food_groups=['fruit'\", input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 0.00 / 0 (0%): 17%|█▋ | 9/54 [00:00<00:00, 131.16it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:25 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'For my snack, I’m enjoying 14 grams of soft fruit treats, a 240-gram bottle of plain water, and a 372-gram serving of root beer.', 'food_groups': ['fruit'], 'total_calories': 207.12, 'source': 'nutribench'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value='beverage', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 0.00 / 0 (0%): 19%|█▊ | 10/54 [00:00<00:00, 130.13it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:25 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'two slices of half a croissant toasted with butter and Flamengo cheese (1 slice split)', 'food_groups': ['dairy', 'grain'], 'total_calories': 336.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value=\"FoodItem(name='croissant...roups=['grain', 'dairy'\", input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 0.00 / 0 (0%): 22%|██▏ | 12/54 [00:00<00:00, 143.89it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:25 ERROR dspy.utils.parallelizer: Error for Example({'food_description': \"I made myself a breakfast of brewed coffee weighing 240 grams and a chocolate chip muffin that's 70 grams.\", 'food_groups': ['grain', 'dairy'], 'total_calories': 281.0, 'source': 'nutribench'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value='sweets and snacks', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:25 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'panino mortadella', 'food_groups': ['grain', 'meat and alternatives'], 'total_calories': 430.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value=\"FoodItem(name='panino mo...'meat and alternatives'\", input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 0.00 / 0 (0%): 26%|██▌ | 14/54 [00:00<00:00, 159.68it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:25 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'I enjoyed 200 grams of tea with sugar along with 230 grams of coconut milk rice for breakfast.', 'food_groups': ['grain', 'fruit'], 'total_calories': 558.0, 'source': 'nutribench'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value='grain', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 0.00 / 0 (0%): 28%|██▊ | 15/54 [00:00<00:00, 159.42it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:25 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'frango churrasco', 'food_groups': ['meat and alternatives'], 'total_calories': 250.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value=\"FoodItem(name='grilled c...'meat and alternatives'\", input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 0.00 / 0 (0%): 31%|███▏ | 17/54 [00:00<00:00, 172.60it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:25 ERROR dspy.utils.parallelizer: Error for Example({'food_description': '3 slices of pão bolo de ló de leite with goat cheese', 'food_groups': ['dairy', 'grain'], 'total_calories': 480.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value='dairy', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 0.00 / 0 (0%): 35%|███▌ | 19/54 [00:00<00:00, 190.15it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:25 ERROR dspy.utils.parallelizer: Error for Example({'food_description': '2 spoons of peanut butter', 'food_groups': ['meat and alternatives'], 'total_calories': 180.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value=\"FoodItem(name='peanut bu..._groups=['nut and seed'\", input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:25 ERROR dspy.utils.parallelizer: Error for Example({'food_description': '1\\u202fL Augustiner beer', 'food_groups': [], 'total_calories': 430.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value=\"FoodItem(name='Augustine...food_groups=['beverage'\", input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 0.00 / 0 (0%): 37%|███▋ | 20/54 [00:00<00:00, 199.44it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:25 ERROR dspy.utils.parallelizer: Error for Example({'food_description': \"For lunch, I'm having an English muffin (58.0g) with American cheese (21.0g), a cooked egg omelet (55.0g), some jelly (20.0g), and a serving of pork bacon (12.0g).\", 'food_groups': ['grain', 'dairy', 'meat and alternatives', 'fruit'], 'total_calories': 406.69, 'source': 'nutribench'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value='meat and alternatives', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 0.00 / 0 (0%): 39%|███▉ | 21/54 [00:00<00:00, 199.44it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:25 ERROR dspy.utils.parallelizer: Error for Example({'food_description': \"I'm snacking on a regular bag of cheese popcorn alongside a can of cola for lunch.\", 'food_groups': ['grain', 'dairy'], 'total_calories': 611.84, 'source': 'nutribench'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value='beverage', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 0.00 / 0 (0%): 41%|████ | 22/54 [00:00<00:00, 199.44it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:25 ERROR dspy.utils.parallelizer: Error for Example({'food_description': \"I'm enjoying a peanut butter and jelly sandwich on wheat bread and a cup of unsweetened water for lunch.\", 'food_groups': ['meat and alternatives', 'fruit', 'grain'], 'total_calories': 402.08, 'source': 'nutribench'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value='beverage', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 0.00 / 0 (0%): 43%|████▎ | 23/54 [00:00<00:00, 224.33it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:25 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'tramezzino vegano 1/2', 'food_groups': ['vegetable', 'grain', 'meat and alternatives'], 'total_calories': 145.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value=\"FoodItem(name='tramezzin...s=['grain', 'vegetable'\", input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:25 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'I’m treating myself to a medium latte for a little snack.', 'food_groups': ['dairy'], 'total_calories': 206.4, 'source': 'nutribench'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value=\"FoodItem(name='medium la...ps=['dairy', 'beverage'\", input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:25 ERROR dspy.utils.parallelizer: Error for Example({'food_description': \"For a snack, I'm having a small single serving bag of Cheetos and a medium frosted cinnamon bun.\", 'food_groups': ['grain', 'dairy'], 'total_calories': 563.6, 'source': 'nutribench'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value='sweets and snacks', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:25 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'For my snack, I prepared 150g of fresh peeled apple, 33g of beans, and 60g of white bread. I also have 9.4g of raw onion, 35.5g of potato, and 11.5g of salad tomato, plus 5.8g of olive oil and 320g of tap water.', 'food_groups': ['fruit', 'meat and alternatives', 'grain', 'vegetable'], 'total_calories': 370.0, 'source': 'nutribench'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value='beverage', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:25 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'For dinner, I’m having 3 grams of nacho cheese flavored Doritos with about 41 grams of topping from a meat pizza.', 'food_groups': ['grain', 'dairy', 'meat and alternatives', 'vegetable'], 'total_calories': 147.18, 'source': 'nutribench'}) (input_keys={'food_description'}): 2 validation errors for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value=[\"FoodItem(name='nacho ch...s=['sweets and snacks'\"], input_type=list]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type\n", | |
| "1\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value=['meat and alternatives', 'grain', 'vegetable'], input_type=list]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:25 ERROR dspy.utils.parallelizer: Error for Example({'food_description': '300\\u202fg of pasta with butter and olive oil', 'food_groups': ['dairy', 'grain'], 'total_calories': 618.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value=\"FoodItem(name='pasta wit...'grain', 'fat and oils'\", input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:25 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'For breakfast, I enjoyed 11.2 grams of cornstarch Atole, along with 69 grams of fried eggs, 115.8 grams of cooked maize flour, and 113 grams of homemade tomato chirmol sauce.', 'food_groups': ['grain', 'meat and alternatives', 'vegetable'], 'total_calories': 687.0, 'source': 'nutribench'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value='vegetable', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:25 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'I enjoyed a cup of Frosted Flakes cereal with a cup of 2% reduced fat milk for breakfast.', 'food_groups': ['grain', 'dairy'], 'total_calories': 273.29, 'source': 'nutribench'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value='dairy', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:25 ERROR dspy.utils.parallelizer: Error for Example({'food_description': \"I just made some popcorn for a snack. It's 14 grams, popped in oil and has butter on it.\", 'food_groups': ['grain', 'dairy'], 'total_calories': 73.22, 'source': 'nutribench'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value=\"FoodItem(name='buttered ...'grain', 'fat and oils'\", input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:25 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'two bananas and a cup of coffee with sugar and milk', 'food_groups': ['fruit', 'dairy'], 'total_calories': 243.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 2 validation errors for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value=[\"FoodItem(name='banana',..., food_groups=['fruit'\"], input_type=list]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type\n", | |
| "1\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value=['beverage', 'dairy'], input_type=list]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:25 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'avocado toast', 'food_groups': ['fruit', 'vegetable', 'grain'], 'total_calories': 250.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value=\"FoodItem(name='avocado t...roups=['grain', 'fruit'\", input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:25 WARNING dspy.utils.parallelizer: Execution cancelled due to errors or interruption.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Error evaluating gepa_deepseek_chat for deepseek/deepseek-chat: Execution cancelled due to errors or interruption.\n", | |
| "Evaluating mipro for deepseek/deepseek-chat\n", | |
| "Average Metric: 9.00 / 54 (16.7%): 100%|██████████| 54/54 [00:00<00:00, 657.49it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:26 INFO dspy.evaluate.evaluate: Average Metric: 9.0 / 54 (16.7%)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Evaluating bootstrap_fewshot_random_search for deepseek/deepseek-chat\n", | |
| "Average Metric: 16.00 / 54 (29.6%): 100%|██████████| 54/54 [00:00<00:00, 579.56it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:26 INFO dspy.evaluate.evaluate: Average Metric: 16.0 / 54 (29.6%)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Evaluating vanilla for deepseek/deepseek-chat\n", | |
| "Average Metric: 6.00 / 33 (18.2%): 59%|█████▉ | 32/54 [00:00<00:00, 381.76it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:26 ERROR dspy.utils.parallelizer: Error for Example({'food_description': \"For a snack, I'm having a bottle of unsweetened bottled water and a juice box of 100% fruit juice.\", 'food_groups': ['fruit'], 'total_calories': 104.5, 'source': 'nutribench'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value='beverage', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 12.00 / 54 (22.2%): 100%|██████████| 54/54 [00:00<00:00, 607.48it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:26 INFO dspy.evaluate.evaluate: Average Metric: 12.0 / 54 (22.2%)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Evaluating optimized for deepseek/deepseek-reasoner\n", | |
| "Average Metric: 29.00 / 54 (53.7%): 100%|██████████| 54/54 [00:00<00:00, 587.06it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:26 INFO dspy.evaluate.evaluate: Average Metric: 29.0 / 54 (53.7%)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Evaluating gepa for deepseek/deepseek-reasoner\n", | |
| "Average Metric: 0.00 / 9 (0.0%): 15%|█▍ | 8/54 [00:00<00:00, 141.68it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:26 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'For my snack, I’m enjoying 14 grams of soft fruit treats, a 240-gram bottle of plain water, and a 372-gram serving of root beer.', 'food_groups': ['fruit'], 'total_calories': 207.12, 'source': 'nutribench'}) (input_keys={'food_description'}): 2 validation errors for list[FoodItem]\n", | |
| "1.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='beverage', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error\n", | |
| "2.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='beverage', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 0.00 / 10 (0.0%): 19%|█▊ | 10/54 [00:00<00:00, 140.48it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:26 ERROR dspy.utils.parallelizer: Error for Example({'food_description': '3 buns with peanut butter and jelly (small)', 'food_groups': ['fruit', 'grain', 'meat and alternatives'], 'total_calories': 615.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0.food_groups.1\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='nut and seed', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 1.00 / 12 (8.3%): 24%|██▍ | 13/54 [00:00<00:00, 143.36it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:26 ERROR dspy.utils.parallelizer: Error for Example({'food_description': '300\\u202fg of pasta with butter and olive oil', 'food_groups': ['dairy', 'grain'], 'total_calories': 618.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0.food_groups.2\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='fat and oils', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 2.00 / 13 (15.4%): 26%|██▌ | 14/54 [00:00<00:00, 139.91it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:26 ERROR dspy.utils.parallelizer: Error for Example({'food_description': \"I'm snacking on a raw banana along with a piece of hard candy.\", 'food_groups': ['fruit'], 'total_calories': 135.78, 'source': 'nutribench'}) (input_keys={'food_description'}): 2 validation errors for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value=[\"FoodItem(name='banana',..., food_groups=['fruit'\"], input_type=list]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type\n", | |
| "1\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value=[], input_type=list]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 2.00 / 14 (14.3%): 28%|██▊ | 15/54 [00:00<00:00, 139.91it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:26 ERROR dspy.utils.parallelizer: Error for Example({'food_description': \"I just made some popcorn for a snack. It's 14 grams, popped in oil and has butter on it.\", 'food_groups': ['grain', 'dairy'], 'total_calories': 73.22, 'source': 'nutribench'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='sweets and snacks', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 2.00 / 14 (14.3%): 30%|██▉ | 16/54 [00:00<00:00, 139.91it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:26 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'I enjoyed 200 grams of tea with sugar along with 230 grams of coconut milk rice for breakfast.', 'food_groups': ['grain', 'fruit'], 'total_calories': 558.0, 'source': 'nutribench'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value='grain', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 2.00 / 15 (13.3%): 31%|███▏ | 17/54 [00:00<00:00, 139.91it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:26 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'I’m treating myself to a medium latte for a little snack.', 'food_groups': ['dairy'], 'total_calories': 206.4, 'source': 'nutribench'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value=\"FoodItem(name='medium la...ps=['dairy', 'beverage'\", input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 2.00 / 15 (13.3%): 33%|███▎ | 18/54 [00:00<00:00, 138.28it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:26 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'a couple of handfuls of chocolate granola to snack on', 'food_groups': ['grain'], 'total_calories': 280.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0.food_groups.1\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='sweets and snacks', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:26 ERROR dspy.utils.parallelizer: Error for Example({'food_description': \"I made myself a breakfast of brewed coffee weighing 240 grams and a chocolate chip muffin that's 70 grams.\", 'food_groups': ['grain', 'dairy'], 'total_calories': 281.0, 'source': 'nutribench'}) (input_keys={'food_description'}): 2 validation errors for list[FoodItem]\n", | |
| "0.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='beverage', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error\n", | |
| "1.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='sweets and snacks', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:26 ERROR dspy.utils.parallelizer: Error for Example({'food_description': '100\\u202fg of peanut butter', 'food_groups': ['meat and alternatives'], 'total_calories': 588.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value=\"FoodItem(name='peanut bu...fat and oils', 'legume'\", input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:26 ERROR dspy.utils.parallelizer: Error for Example({'food_description': '2 spoons of peanut butter', 'food_groups': ['meat and alternatives'], 'total_calories': 180.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='nut and seed', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:26 ERROR dspy.utils.parallelizer: Error for Example({'food_description': '1\\u202fL Augustiner beer', 'food_groups': [], 'total_calories': 430.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value=\"FoodItem(name='Augustine...food_groups=['beverage'\", input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:26 ERROR dspy.utils.parallelizer: Error for Example({'food_description': \"For a snack, I'm having a small single serving bag of Cheetos and a medium frosted cinnamon bun.\", 'food_groups': ['grain', 'dairy'], 'total_calories': 563.6, 'source': 'nutribench'}) (input_keys={'food_description'}): 2 validation errors for list[FoodItem]\n", | |
| "0.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='sweets and snacks', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error\n", | |
| "1.food_groups.1\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='sweets and snacks', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:26 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'For my snack, I prepared 150g of fresh peeled apple, 33g of beans, and 60g of white bread. I also have 9.4g of raw onion, 35.5g of potato, and 11.5g of salad tomato, plus 5.8g of olive oil and 320g of tap water.', 'food_groups': ['fruit', 'meat and alternatives', 'grain', 'vegetable'], 'total_calories': 370.0, 'source': 'nutribench'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value='beverage', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:26 ERROR dspy.utils.parallelizer: Error for Example({'food_description': \"For lunch, I'm having an English muffin (58.0g) with American cheese (21.0g), a cooked egg omelet (55.0g), some jelly (20.0g), and a serving of pork bacon (12.0g).\", 'food_groups': ['grain', 'dairy', 'meat and alternatives', 'fruit'], 'total_calories': 406.69, 'source': 'nutribench'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value='meat and alternatives', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:26 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'fruit bowl with papaya, pineapple and orange', 'food_groups': ['fruit'], 'total_calories': 206.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value='fruit', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:26 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'I enjoyed a cup of Frosted Flakes cereal with a cup of 2% reduced fat milk for breakfast.', 'food_groups': ['grain', 'dairy'], 'total_calories': 273.29, 'source': 'nutribench'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value='dairy', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:26 WARNING dspy.utils.parallelizer: Execution cancelled due to errors or interruption.\n", | |
| "2025/12/20 10:00:26 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'avocado toast', 'food_groups': ['fruit', 'vegetable', 'grain'], 'total_calories': 250.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value=\"FoodItem(name='avocado t...roups=['grain', 'fruit'\", input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:26 ERROR dspy.utils.parallelizer: Error for Example({'food_description': '250 g egg whites', 'food_groups': ['meat and alternatives'], 'total_calories': 130.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value=\"FoodItem(name='egg white...'meat and alternatives'\", input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:26 ERROR dspy.utils.parallelizer: Error for Example({'food_description': \"For lunch, I'm having a medium crust cheese pizza from a restaurant that weighs 119 grams.\", 'food_groups': ['dairy', 'grain', 'vegetable'], 'total_calories': 316.54, 'source': 'nutribench'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value=\"FoodItem(name='medium cr...roups=['grain', 'dairy'\", input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:26 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'ice‑cream cone', 'food_groups': ['dairy', 'grain'], 'total_calories': 157.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value=\"FoodItem(name='ice cream...roups=['dairy', 'grain'\", input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:26 ERROR dspy.utils.parallelizer: Error for Example({'food_description': \"For my lunch, I'm having a 480g energy drink that's sugar-free, from Monster.\", 'food_groups': [], 'total_calories': 24.0, 'source': 'nutribench'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='beverage', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:26 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'bread 2 slices with cheese and scrambled eggs', 'food_groups': ['dairy', 'grain', 'meat and alternatives'], 'total_calories': 440.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value='meat and alternatives', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Error evaluating gepa_deepseek_reasoner for deepseek/deepseek-reasoner: Execution cancelled due to errors or interruption.\n", | |
| "Evaluating mipro for deepseek/deepseek-reasoner\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:26 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'I enjoyed a snack that included 216g of sorrel punch, 240g of peppermint tea, and a glass of 511g of tap water.', 'food_groups': ['fruit'], 'total_calories': 60.0, 'source': 'nutribench'}) (input_keys={'food_description'}): 3 validation errors for list[FoodItem]\n", | |
| "0.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='beverage', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error\n", | |
| "1.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='beverage', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error\n", | |
| "2.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='beverage', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:26 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'coffee‑flavoured corn flakes with plant‑based milk (one bowl)', 'food_groups': ['grain'], 'total_calories': 240.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "1.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='beverage', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 26.00 / 54 (48.1%): 100%|██████████| 54/54 [00:00<00:00, 610.56it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:26 INFO dspy.evaluate.evaluate: Average Metric: 26.0 / 54 (48.1%)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Evaluating bootstrap_fewshot_random_search for deepseek/deepseek-reasoner\n", | |
| "Average Metric: 22.00 / 54 (40.7%): 100%|██████████| 54/54 [00:00<00:00, 585.99it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:27 INFO dspy.evaluate.evaluate: Average Metric: 22.0 / 54 (40.7%)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Evaluating vanilla for deepseek/deepseek-reasoner\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:27 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'For dinner, I’m having 3 grams of nacho cheese flavored Doritos with about 41 grams of topping from a meat pizza.', 'food_groups': ['grain', 'dairy', 'meat and alternatives', 'vegetable'], 'total_calories': 147.18, 'source': 'nutribench'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0\n", | |
| " Input should be a valid dictionary or instance of FoodItem [type=model_type, input_value='meat and alternatives', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/model_type. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 25.00 / 54 (46.3%): 100%|██████████| 54/54 [00:00<00:00, 607.08it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:27 INFO dspy.evaluate.evaluate: Average Metric: 25.0 / 54 (46.3%)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Evaluating optimized for openrouter/google/gemini-3-flash-preview\n", | |
| "Average Metric: 32.00 / 54 (59.3%): 100%|██████████| 54/54 [00:00<00:00, 472.64it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:27 INFO dspy.evaluate.evaluate: Average Metric: 32.0 / 54 (59.3%)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Evaluating gepa for openrouter/google/gemini-3-flash-preview\n", | |
| " 0%| | 0/54 [00:00<?, ?it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:27 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'a couple of handfuls of chocolate granola to snack on', 'food_groups': ['grain'], 'total_calories': 280.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0.food_groups.1\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='sweets and snacks', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 0.00 / 1 (0.0%): 2%|▏ | 1/54 [00:00<00:03, 17.55it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:27 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'This morning, I had a 31g cereal bar and a 248g nutritional shake for breakfast.', 'food_groups': ['grain', 'dairy'], 'total_calories': 406.41, 'source': 'nutribench'}) (input_keys={'food_description'}): 2 validation errors for list[FoodItem]\n", | |
| "0.food_groups.1\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='sweets and snacks', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error\n", | |
| "1.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='beverage', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 1.00 / 2 (50.0%): 4%|▎ | 2/54 [00:00<00:01, 32.25it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:27 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'pasta with pesto and tomatoes', 'food_groups': ['vegetable', 'grain', 'dairy'], 'total_calories': 339.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0.food_groups.2\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='fat and oils', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 1.00 / 2 (50.0%): 6%|▌ | 3/54 [00:00<00:01, 42.63it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:27 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'For my snack, I’m enjoying 14 grams of soft fruit treats, a 240-gram bottle of plain water, and a 372-gram serving of root beer.', 'food_groups': ['fruit'], 'total_calories': 207.12, 'source': 'nutribench'}) (input_keys={'food_description'}): 3 validation errors for list[FoodItem]\n", | |
| "0.food_groups.1\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='sweets and snacks', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error\n", | |
| "1.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='beverage', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error\n", | |
| "2.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='beverage', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:27 ERROR dspy.utils.parallelizer: Error for Example({'food_description': '200\\u202fg of pasta with pesto and tomatoes', 'food_groups': ['vegetable', 'grain', 'dairy'], 'total_calories': 430.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0.food_groups.2\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='fat and oils', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:27 WARNING dspy.utils.parallelizer: Execution cancelled due to errors or interruption.\n", | |
| "2025/12/20 10:00:27 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'two slices of half a croissant toasted with butter and Flamengo cheese (1 slice split)', 'food_groups': ['dairy', 'grain'], 'total_calories': 336.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0.food_groups.2\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='fat and oils', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Error evaluating gepa_gemini_3_flash_preview for openrouter/google/gemini-3-flash-preview: Execution cancelled due to errors or interruption.\n", | |
| "Evaluating mipro for openrouter/google/gemini-3-flash-preview\n" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:27 ERROR dspy.utils.parallelizer: Error for Example({'food_description': \"For a snack, I'm having a bottle of unsweetened bottled water and a juice box of 100% fruit juice.\", 'food_groups': ['fruit'], 'total_calories': 104.5, 'source': 'nutribench'}) (input_keys={'food_description'}): 2 validation errors for list[FoodItem]\n", | |
| "0.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='beverage', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error\n", | |
| "1.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='beverage', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:27 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'I enjoyed a cup of Frosted Flakes cereal with a cup of 2% reduced fat milk for breakfast.', 'food_groups': ['grain', 'dairy'], 'total_calories': 273.29, 'source': 'nutribench'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0.food_groups.1\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='sweets and snacks', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:27 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'I enjoyed 200 grams of tea with sugar along with 230 grams of coconut milk rice for breakfast.', 'food_groups': ['grain', 'fruit'], 'total_calories': 558.0, 'source': 'nutribench'}) (input_keys={'food_description'}): 2 validation errors for list[FoodItem]\n", | |
| "0.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='beverage', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error\n", | |
| "1.food_groups.1\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='fat and oils', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:27 ERROR dspy.utils.parallelizer: Error for Example({'food_description': \"For dinner, I'm having 25 grams of bread, 150 grams of chicken wings, and a 250-gram mixed vegetable salad.\", 'food_groups': ['grain', 'meat and alternatives', 'vegetable'], 'total_calories': 633.0, 'source': 'nutribench'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "2.food_groups.1\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='fat and oils', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:27 ERROR dspy.utils.parallelizer: Error for Example({'food_description': '300\\u202fg of pasta with butter and olive oil', 'food_groups': ['dairy', 'grain'], 'total_calories': 618.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0.food_groups.1\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='fat and oils', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:27 ERROR dspy.utils.parallelizer: Error for Example({'food_description': '1\\u202fL Augustiner beer', 'food_groups': [], 'total_calories': 430.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='beverage', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:27 ERROR dspy.utils.parallelizer: Error for Example({'food_description': \"I just made some popcorn for a snack. It's 14 grams, popped in oil and has butter on it.\", 'food_groups': ['grain', 'dairy'], 'total_calories': 73.22, 'source': 'nutribench'}) (input_keys={'food_description'}): 2 validation errors for list[FoodItem]\n", | |
| "0.food_groups.1\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='fat and oils', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error\n", | |
| "0.food_groups.2\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='sweets and snacks', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:27 ERROR dspy.utils.parallelizer: Error for Example({'food_description': '100\\u202fg of peanut butter', 'food_groups': ['meat and alternatives'], 'total_calories': 588.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 2 validation errors for list[FoodItem]\n", | |
| "0.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='nut and seed', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error\n", | |
| "0.food_groups.1\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='fat and oils', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| " 0%| | 0/54 [00:00<?, ?it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:27 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'I’m treating myself to a medium latte for a little snack.', 'food_groups': ['dairy'], 'total_calories': 206.4, 'source': 'nutribench'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0.food_groups.1\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='beverage', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 0.00 / 1 (0.0%): 0%| | 0/54 [00:00<?, ?it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:27 ERROR dspy.utils.parallelizer: Error for Example({'food_description': \"I'm snacking on a regular bag of cheese popcorn alongside a can of cola for lunch.\", 'food_groups': ['grain', 'dairy'], 'total_calories': 611.84, 'source': 'nutribench'}) (input_keys={'food_description'}): 2 validation errors for list[FoodItem]\n", | |
| "0.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='sweets and snacks', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error\n", | |
| "1.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='beverage', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 1.00 / 2 (50.0%): 2%|▏ | 1/54 [00:00<00:00, 350.64it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:27 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'I snacked on 35 grams of dry pastry with 3 grams of margarine, plus a drink of 100 grams of apricot nectar and 30 grams of wheat bread.', 'food_groups': ['grain', 'dairy', 'fruit'], 'total_calories': 289.0, 'source': 'nutribench'}) (input_keys={'food_description'}): 3 validation errors for list[FoodItem]\n", | |
| "0.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='sweets and snacks', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error\n", | |
| "1.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='fat and oils', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error\n", | |
| "2.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='beverage', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:27 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'For breakfast, I enjoyed 11.2 grams of cornstarch Atole, along with 69 grams of fried eggs, 115.8 grams of cooked maize flour, and 113 grams of homemade tomato chirmol sauce.', 'food_groups': ['grain', 'meat and alternatives', 'vegetable'], 'total_calories': 687.0, 'source': 'nutribench'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='beverage', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 2.00 / 4 (50.0%): 6%|▌ | 3/54 [00:00<00:00, 114.18it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:27 ERROR dspy.utils.parallelizer: Error for Example({'food_description': '2 spoons of peanut butter', 'food_groups': ['meat and alternatives'], 'total_calories': 180.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 2 validation errors for list[FoodItem]\n", | |
| "0.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='nut and seed', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error\n", | |
| "0.food_groups.1\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='fat and oils', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 3.00 / 5 (60.0%): 7%|▋ | 4/54 [00:00<00:00, 100.01it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:27 ERROR dspy.utils.parallelizer: Error for Example({'food_description': '3 buns with peanut butter and jelly (small)', 'food_groups': ['fruit', 'grain', 'meat and alternatives'], 'total_calories': 615.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0.food_groups.1\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='nut and seed', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 4.00 / 6 (66.7%): 9%|▉ | 5/54 [00:00<00:00, 98.44it/s] " | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:27 ERROR dspy.utils.parallelizer: Error for Example({'food_description': \"I made myself a breakfast of brewed coffee weighing 240 grams and a chocolate chip muffin that's 70 grams.\", 'food_groups': ['grain', 'dairy'], 'total_calories': 281.0, 'source': 'nutribench'}) (input_keys={'food_description'}): 2 validation errors for list[FoodItem]\n", | |
| "0.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='beverage', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error\n", | |
| "1.food_groups.1\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='sweets and snacks', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:27 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'For dinner, I’m having 3 grams of nacho cheese flavored Doritos with about 41 grams of topping from a meat pizza.', 'food_groups': ['grain', 'dairy', 'meat and alternatives', 'vegetable'], 'total_calories': 147.18, 'source': 'nutribench'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='sweets and snacks', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 6.00 / 8 (75.0%): 13%|█▎ | 7/54 [00:00<00:00, 102.35it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:27 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'For my snack, I prepared 150g of fresh peeled apple, 33g of beans, and 60g of white bread. I also have 9.4g of raw onion, 35.5g of potato, and 11.5g of salad tomato, plus 5.8g of olive oil and 320g of tap water.', 'food_groups': ['fruit', 'meat and alternatives', 'grain', 'vegetable'], 'total_calories': 370.0, 'source': 'nutribench'}) (input_keys={'food_description'}): 3 validation errors for list[FoodItem]\n", | |
| "1.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='legume', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error\n", | |
| "6.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='fat and oils', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error\n", | |
| "7.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='beverage', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:27 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'two bananas and a cup of coffee with sugar and milk', 'food_groups': ['fruit', 'dairy'], 'total_calories': 243.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "1.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='beverage', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 7.00 / 10 (70.0%): 17%|█▋ | 9/54 [00:00<00:00, 113.91it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:27 ERROR dspy.utils.parallelizer: Error for Example({'food_description': \"For a snack, I'm having a small single serving bag of Cheetos and a medium frosted cinnamon bun.\", 'food_groups': ['grain', 'dairy'], 'total_calories': 563.6, 'source': 'nutribench'}) (input_keys={'food_description'}): 2 validation errors for list[FoodItem]\n", | |
| "0.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='sweets and snacks', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error\n", | |
| "1.food_groups.1\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='sweets and snacks', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 8.00 / 12 (66.7%): 20%|██ | 11/54 [00:00<00:00, 129.76it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:27 ERROR dspy.utils.parallelizer: Error for Example({'food_description': \"I'm enjoying a peanut butter and jelly sandwich on wheat bread and a cup of unsweetened water for lunch.\", 'food_groups': ['meat and alternatives', 'fruit', 'grain'], 'total_calories': 402.08, 'source': 'nutribench'}) (input_keys={'food_description'}): 2 validation errors for list[FoodItem]\n", | |
| "0.food_groups.2\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='nut and seed', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error\n", | |
| "1.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='beverage', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 8.00 / 13 (61.5%): 22%|██▏ | 12/54 [00:00<00:00, 134.75it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:27 ERROR dspy.utils.parallelizer: Error for Example({'food_description': 'avocado toast', 'food_groups': ['fruit', 'vegetable', 'grain'], 'total_calories': 250.0, 'source': 'golden_dataset'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "0.food_groups.2\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='fat and oils', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n", | |
| "2025/12/20 10:00:27 ERROR dspy.utils.parallelizer: Error for Example({'food_description': \"I'm snacking on a raw banana along with a piece of hard candy.\", 'food_groups': ['fruit'], 'total_calories': 135.78, 'source': 'nutribench'}) (input_keys={'food_description'}): 1 validation error for list[FoodItem]\n", | |
| "1.food_groups.0\n", | |
| " Input should be 'dairy', 'meat and alternatives', 'grain', 'fruit' or 'vegetable' [type=literal_error, input_value='sweets and snacks', input_type=str]\n", | |
| " For further information visit https://errors.pydantic.dev/2.11/v/literal_error. Set `provide_traceback=True` for traceback.\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 31.00 / 54 (57.4%): 100%|██████████| 54/54 [00:00<00:00, 461.48it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:27 INFO dspy.evaluate.evaluate: Average Metric: 31.0 / 54 (57.4%)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Evaluating bootstrap_fewshot_random_search for openrouter/google/gemini-3-flash-preview\n", | |
| "Average Metric: 31.00 / 54 (57.4%): 100%|██████████| 54/54 [00:00<00:00, 613.63it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:27 INFO dspy.evaluate.evaluate: Average Metric: 31.0 / 54 (57.4%)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Evaluating vanilla for openrouter/google/gemini-3-flash-preview\n", | |
| "Average Metric: 32.00 / 54 (59.3%): 100%|██████████| 54/54 [00:00<00:00, 611.89it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:28 INFO dspy.evaluate.evaluate: Average Metric: 32.0 / 54 (59.3%)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "deepseek_no_thinking = dspy.LM(\n", | |
| " \"deepseek/deepseek-chat\",\n", | |
| " temperature=0.00,\n", | |
| ")\n", | |
| "\n", | |
| "\n", | |
| "deepseek_thinking = dspy.LM(\n", | |
| " \"deepseek/deepseek-reasoner\",\n", | |
| " temperature=0.00,\n", | |
| ")\n", | |
| "\n", | |
| "\n", | |
| "gemini_3_flash = dspy.LM(\n", | |
| " \"openrouter/google/gemini-3-flash-preview\",\n", | |
| " temperature=0.0,\n", | |
| " extra_body={\"reasoning\": {\"enabled\": False}},\n", | |
| ")\n", | |
| "\n", | |
| "for lm in [\n", | |
| " deepseek_no_thinking,\n", | |
| " deepseek_thinking,\n", | |
| " gemini_3_flash,\n", | |
| "]:\n", | |
| " with dspy.context(lm=lm):\n", | |
| " model_name = lm.model.split(\"/\")[-1].replace(\"-\", \"_\")\n", | |
| " modules_and_names = [\n", | |
| " (module_current, f\"optimized_{model_name}\"),\n", | |
| " (module_gepa, f\"gepa_{model_name}\"),\n", | |
| " (module_mipro_v2, f\"mipro_{model_name}\"),\n", | |
| " (\n", | |
| " module_bootstrap_fewshot_random_search,\n", | |
| " f\"bootstrap_fewshot_random_search_{model_name}\",\n", | |
| " ),\n", | |
| " (module_vanilla, f\"vanilla_{model_name}\"),\n", | |
| " ]\n", | |
| " for module, name in modules_and_names:\n", | |
| " try:\n", | |
| " print(f\"Evaluating {name.replace('_'+model_name,'')} for {lm.model}\")\n", | |
| " result = evaluate(module)\n", | |
| " er.add_result(name, result.score)\n", | |
| " except Exception as e:\n", | |
| " print(f\"Error evaluating {name} for {lm.model}: {e}\")\n", | |
| " er.add_result(name, 0)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 16, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "image/png": "", | |
| "text/plain": [ | |
| "<Figure size 1000x600 with 1 Axes>" | |
| ] | |
| }, | |
| "metadata": {}, | |
| "output_type": "display_data" | |
| } | |
| ], | |
| "source": [ | |
| "er.plot_results()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 17, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "optimized_gemini_3_flash_preview: 59.26\n", | |
| "vanilla_gemini_3_flash_preview: 59.26\n", | |
| "optimized_deepseek_chat: 57.41\n", | |
| "mipro_gemini_3_flash_preview: 57.41\n", | |
| "bootstrap_fewshot_random_search_gemini_3_flash_preview: 57.41\n", | |
| "gepa_gemini_2.5_flash: 53.7\n", | |
| "optimized_deepseek_reasoner: 53.7\n", | |
| "mipro_deepseek_reasoner: 48.15\n", | |
| "vanilla_deepseek_reasoner: 46.3\n", | |
| "optimized_gemini_2.5_flash: 44.44\n", | |
| "bootstrap_fewshot_random_search_deepseek_reasoner: 40.74\n", | |
| "bootstrap_fewshot_random_search_gemini_2.5_flash: 31.48\n", | |
| "vanilla_gemini_2.5_flash: 29.63\n", | |
| "mipro_v2_gemini_2.5_flash: 29.63\n", | |
| "bootstrap_fewshot_random_search_deepseek_chat: 29.63\n", | |
| "vanilla_deepseek_chat: 22.22\n", | |
| "mipro_deepseek_chat: 16.67\n", | |
| "gepa_deepseek_chat: 0\n", | |
| "gepa_deepseek_reasoner: 0\n", | |
| "gepa_gemini_3_flash_preview: 0\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "er.print_results()" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 18, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "Average Metric: 32.00 / 54 (59.3%): 100%|██████████| 54/54 [00:00<00:00, 4761.95it/s]" | |
| ] | |
| }, | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "2025/12/20 10:00:28 INFO dspy.evaluate.evaluate: Average Metric: 32.0 / 54 (59.3%)\n" | |
| ] | |
| }, | |
| { | |
| "name": "stdout", | |
| "output_type": "stream", | |
| "text": [ | |
| "\n", | |
| "Example({'food_description': 'frango churrasco', 'food_groups': ['meat and alternatives'], 'total_calories': 250.0, 'source': 'golden_dataset'}) (input_keys={'food_description'})\n", | |
| "Prediction(\n", | |
| " food_items=[FoodItem(name='Frango churrasco (grilled chicken, half)', quantity=1.0, calories=450.0, carbs=0.0, fat=28.0, protein=48.0, fiber=0.0, food_groups=['meat and alternatives'])]\n", | |
| ")\n", | |
| "Prediction(\n", | |
| " score=0,\n", | |
| " feedback='INCORRECT: Your answer was not within 10% of the correct answer (yours: 450.0, correct: 250.0)'\n", | |
| ")\n", | |
| "****************************************************************************************************\n", | |
| "Example({'food_description': 'pasta with pesto and tomatoes', 'food_groups': ['vegetable', 'grain', 'dairy'], 'total_calories': 339.0, 'source': 'golden_dataset'}) (input_keys={'food_description'})\n", | |
| "Prediction(\n", | |
| " food_items=[FoodItem(name='Pasta (cooked)', quantity=1.0, calories=220.0, carbs=43.0, fat=1.3, protein=8.1, fiber=2.5, food_groups=['grain']), FoodItem(name='Pesto sauce', quantity=2.0, calories=90.0, carbs=1.5, fat=9.0, protein=1.5, fiber=0.4, food_groups=['vegetable', 'dairy']), FoodItem(name='Cherry tomatoes', quantity=5.0, calories=3.0, carbs=0.7, fat=0.0, protein=0.1, fiber=0.2, food_groups=['vegetable'])]\n", | |
| ")\n", | |
| "Prediction(\n", | |
| " score=0,\n", | |
| " feedback='INCORRECT: Your answer was not within 10% of the correct answer (yours: 415.0, correct: 339.0)'\n", | |
| ")\n", | |
| "****************************************************************************************************\n", | |
| "Example({'food_description': \"For dinner, I'm having 25 grams of bread, 150 grams of chicken wings, and a 250-gram mixed vegetable salad.\", 'food_groups': ['grain', 'meat and alternatives', 'vegetable'], 'total_calories': 633.0, 'source': 'nutribench'}) (input_keys={'food_description'})\n", | |
| "Prediction(\n", | |
| " food_items=[FoodItem(name='Bread', quantity=25.0, calories=66.0, carbs=12.5, fat=0.8, protein=2.3, fiber=0.6, food_groups=['grain']), FoodItem(name='Chicken wings', quantity=150.0, calories=304.5, carbs=0.0, fat=22.5, protein=25.5, fiber=0.0, food_groups=['meat and alternatives']), FoodItem(name='Mixed vegetable salad', quantity=250.0, calories=50.0, carbs=10.0, fat=0.5, protein=2.5, fiber=4.0, food_groups=['vegetable'])]\n", | |
| ")\n", | |
| "Prediction(\n", | |
| " score=0,\n", | |
| " feedback='INCORRECT: Total calories is too high (yours: 59825.0, correct: 633.0)'\n", | |
| ")\n", | |
| "****************************************************************************************************\n", | |
| "Example({'food_description': 'two slices of half a croissant toasted with butter and Flamengo cheese (1 slice split)', 'food_groups': ['dairy', 'grain'], 'total_calories': 336.0, 'source': 'golden_dataset'}) (input_keys={'food_description'})\n", | |
| "Prediction(\n", | |
| " food_items=[FoodItem(name='Croissant (half, toasted)', quantity=1.0, calories=115.0, carbs=13.0, fat=6.0, protein=2.5, fiber=0.5, food_groups=['grain']), FoodItem(name='Butter', quantity=1.0, calories=50.0, carbs=0.0, fat=5.5, protein=0.0, fiber=0.0, food_groups=['dairy']), FoodItem(name='Flamengo cheese (1 slice)', quantity=1.0, calories=90.0, carbs=0.5, fat=7.0, protein=6.5, fiber=0.0, food_groups=['dairy'])]\n", | |
| ")\n", | |
| "Prediction(\n", | |
| " score=0,\n", | |
| " feedback='INCORRECT: Your answer was not within 10% of the correct answer (yours: 255.0, correct: 336.0)'\n", | |
| ")\n", | |
| "****************************************************************************************************\n", | |
| "Example({'food_description': '3 buns with peanut butter and jelly (small)', 'food_groups': ['fruit', 'grain', 'meat and alternatives'], 'total_calories': 615.0, 'source': 'golden_dataset'}) (input_keys={'food_description'})\n", | |
| "Prediction(\n", | |
| " food_items=[FoodItem(name='Small bun', quantity=3.0, calories=120.0, carbs=22.0, fat=2.0, protein=4.0, fiber=1.0, food_groups=['grain']), FoodItem(name='Peanut butter (1 tbsp)', quantity=3.0, calories=95.0, carbs=3.0, fat=8.0, protein=4.0, fiber=1.0, food_groups=['meat and alternatives']), FoodItem(name='Jelly (1 tbsp)', quantity=3.0, calories=50.0, carbs=13.0, fat=0.0, protein=0.0, fiber=0.0, food_groups=['fruit'])]\n", | |
| ")\n", | |
| "Prediction(\n", | |
| " score=0,\n", | |
| " feedback='INCORRECT: Your answer was not within 10% of the correct answer (yours: 795.0, correct: 615.0)'\n", | |
| ")\n", | |
| "****************************************************************************************************\n", | |
| "Example({'food_description': \"For a snack, I'm having a bottle of unsweetened bottled water and a juice box of 100% fruit juice.\", 'food_groups': ['fruit'], 'total_calories': 104.5, 'source': 'nutribench'}) (input_keys={'food_description'})\n", | |
| "Prediction(\n", | |
| " food_items=[FoodItem(name='Unsweetened bottled water', quantity=1.0, calories=0.0, carbs=0.0, fat=0.0, protein=0.0, fiber=0.0, food_groups=[]), FoodItem(name='100% fruit juice box (200ml)', quantity=1.0, calories=90.0, carbs=22.0, fat=0.1, protein=0.5, fiber=0.2, food_groups=['fruit'])]\n", | |
| ")\n", | |
| "Prediction(\n", | |
| " score=0,\n", | |
| " feedback='INCORRECT: Your answer was not within 10% of the correct answer (yours: 90.0, correct: 104.5)'\n", | |
| ")\n", | |
| "****************************************************************************************************\n", | |
| "Example({'food_description': 'I snacked on 35 grams of dry pastry with 3 grams of margarine, plus a drink of 100 grams of apricot nectar and 30 grams of wheat bread.', 'food_groups': ['grain', 'dairy', 'fruit'], 'total_calories': 289.0, 'source': 'nutribench'}) (input_keys={'food_description'})\n", | |
| "Prediction(\n", | |
| " food_items=[FoodItem(name='Dry pastry', quantity=35.0, calories=4.8, carbs=0.6, fat=0.25, protein=0.06, fiber=0.02, food_groups=['grain']), FoodItem(name='Margarine', quantity=3.0, calories=7.1, carbs=0.0, fat=0.8, protein=0.0, fiber=0.0, food_groups=[]), FoodItem(name='Apricot nectar', quantity=100.0, calories=0.56, carbs=0.14, fat=0.0, protein=0.0, fiber=0.01, food_groups=['fruit']), FoodItem(name='Wheat bread', quantity=30.0, calories=2.6, carbs=0.48, fat=0.04, protein=0.09, fiber=0.03, food_groups=['grain'])]\n", | |
| ")\n", | |
| "Prediction(\n", | |
| " score=0,\n", | |
| " feedback='INCORRECT: Your answer was not within 10% of the correct answer (yours: 323.3, correct: 289.0)'\n", | |
| ")\n", | |
| "****************************************************************************************************\n", | |
| "Example({'food_description': 'I enjoyed 200 grams of tea with sugar along with 230 grams of coconut milk rice for breakfast.', 'food_groups': ['grain', 'fruit'], 'total_calories': 558.0, 'source': 'nutribench'}) (input_keys={'food_description'})\n", | |
| "Prediction(\n", | |
| " food_items=[FoodItem(name='Tea with sugar (200g)', quantity=1.0, calories=32.0, carbs=8.0, fat=0.0, protein=0.0, fiber=0.0, food_groups=[]), FoodItem(name='Coconut milk rice (230g)', quantity=1.0, calories=450.0, carbs=65.0, fat=18.0, protein=7.0, fiber=1.5, food_groups=['grain', 'fruit'])]\n", | |
| ")\n", | |
| "Prediction(\n", | |
| " score=0,\n", | |
| " feedback='INCORRECT: Your answer was not within 10% of the correct answer (yours: 482.0, correct: 558.0)'\n", | |
| ")\n", | |
| "****************************************************************************************************\n", | |
| "Example({'food_description': '3 slices of pão bolo de ló de leite with goat cheese', 'food_groups': ['dairy', 'grain'], 'total_calories': 480.0, 'source': 'golden_dataset'}) (input_keys={'food_description'})\n", | |
| "Prediction(\n", | |
| " food_items=[FoodItem(name='Pão bolo de ló de leite (slice)', quantity=3.0, calories=150.0, carbs=22.0, fat=5.0, protein=4.0, fiber=0.5, food_groups=['grain', 'dairy']), FoodItem(name='Goat cheese (soft, 30g)', quantity=3.0, calories=80.0, carbs=0.3, fat=6.5, protein=5.5, fiber=0.0, food_groups=['dairy'])]\n", | |
| ")\n", | |
| "Prediction(\n", | |
| " score=0,\n", | |
| " feedback='INCORRECT: Your answer was not within 10% of the correct answer (yours: 690.0, correct: 480.0)'\n", | |
| ")\n", | |
| "****************************************************************************************************\n", | |
| "Example({'food_description': 'For breakfast, I enjoyed 11.2 grams of cornstarch Atole, along with 69 grams of fried eggs, 115.8 grams of cooked maize flour, and 113 grams of homemade tomato chirmol sauce.', 'food_groups': ['grain', 'meat and alternatives', 'vegetable'], 'total_calories': 687.0, 'source': 'nutribench'}) (input_keys={'food_description'})\n", | |
| "Prediction(\n", | |
| " food_items=[FoodItem(name='Cornstarch Atole (powder)', quantity=11.2, calories=3.8, carbs=0.9, fat=0.0, protein=0.0, fiber=0.0, food_groups=['grain']), FoodItem(name='Fried eggs', quantity=69.0, calories=1.96, carbs=0.01, fat=0.15, protein=0.13, fiber=0.0, food_groups=['meat and alternatives']), FoodItem(name='Cooked maize flour (masa)', quantity=115.8, calories=1.26, carbs=0.27, fat=0.01, protein=0.03, fiber=0.02, food_groups=['grain']), FoodItem(name='Homemade tomato chirmol sauce', quantity=113.0, calories=0.35, carbs=0.06, fat=0.01, protein=0.01, fiber=0.01, food_groups=['vegetable'])]\n", | |
| ")\n", | |
| "Prediction(\n", | |
| " score=0,\n", | |
| " feedback='INCORRECT: Your answer was not within 10% of the correct answer (yours: 363.258, correct: 687.0)'\n", | |
| ")\n", | |
| "****************************************************************************************************\n", | |
| "Example({'food_description': \"I'm snacking on a regular bag of cheese popcorn alongside a can of cola for lunch.\", 'food_groups': ['grain', 'dairy'], 'total_calories': 611.84, 'source': 'nutribench'}) (input_keys={'food_description'})\n", | |
| "Prediction(\n", | |
| " food_items=[FoodItem(name='Cheese popcorn (regular bag, approx. 50g)', quantity=1.0, calories=250.0, carbs=26.0, fat=15.0, protein=4.0, fiber=4.0, food_groups=['grain', 'dairy']), FoodItem(name='Cola (can, 355ml)', quantity=1.0, calories=140.0, carbs=39.0, fat=0.0, protein=0.0, fiber=0.0, food_groups=[])]\n", | |
| ")\n", | |
| "Prediction(\n", | |
| " score=0,\n", | |
| " feedback='INCORRECT: Your answer was not within 10% of the correct answer (yours: 390.0, correct: 611.84)'\n", | |
| ")\n", | |
| "****************************************************************************************************\n", | |
| "Example({'food_description': 'I’m treating myself to a medium latte for a little snack.', 'food_groups': ['dairy'], 'total_calories': 206.4, 'source': 'nutribench'}) (input_keys={'food_description'})\n", | |
| "Prediction(\n", | |
| " food_items=[FoodItem(name='Medium Latte', quantity=1.0, calories=150.0, carbs=15.0, fat=6.0, protein=10.0, fiber=0.0, food_groups=['dairy'])]\n", | |
| ")\n", | |
| "Prediction(\n", | |
| " score=0,\n", | |
| " feedback='INCORRECT: Your answer was not within 10% of the correct answer (yours: 150.0, correct: 206.4)'\n", | |
| ")\n", | |
| "****************************************************************************************************\n", | |
| "Example({'food_description': 'For dinner, I’m having 3 grams of nacho cheese flavored Doritos with about 41 grams of topping from a meat pizza.', 'food_groups': ['grain', 'dairy', 'meat and alternatives', 'vegetable'], 'total_calories': 147.18, 'source': 'nutribench'}) (input_keys={'food_description'})\n", | |
| "Prediction(\n", | |
| " food_items=[FoodItem(name='Nacho Cheese Doritos', quantity=3.0, calories=5.0, carbs=0.6, fat=0.25, protein=0.07, fiber=0.04, food_groups=['grain']), FoodItem(name='Meat Pizza Topping (Cheese and Meat)', quantity=41.0, calories=2.8, carbs=0.05, fat=0.22, protein=0.16, fiber=0.0, food_groups=['meat and alternatives', 'dairy'])]\n", | |
| ")\n", | |
| "Prediction(\n", | |
| " score=0,\n", | |
| " feedback='INCORRECT: Your answer was not within 10% of the correct answer (yours: 129.8, correct: 147.18)'\n", | |
| ")\n", | |
| "****************************************************************************************************\n", | |
| "Example({'food_description': \"I made myself a breakfast of brewed coffee weighing 240 grams and a chocolate chip muffin that's 70 grams.\", 'food_groups': ['grain', 'dairy'], 'total_calories': 281.0, 'source': 'nutribench'}) (input_keys={'food_description'})\n", | |
| "Prediction(\n", | |
| " food_items=[FoodItem(name='Brewed coffee', quantity=240.0, calories=1.0, carbs=0.0, fat=0.0, protein=0.1, fiber=0.0, food_groups=[]), FoodItem(name='Chocolate chip muffin', quantity=70.0, calories=260.0, carbs=34.0, fat=13.0, protein=3.5, fiber=1.2, food_groups=['grain'])]\n", | |
| ")\n", | |
| "Prediction(\n", | |
| " score=0,\n", | |
| " feedback='INCORRECT: Total calories is too high (yours: 18440.0, correct: 281.0)'\n", | |
| ")\n", | |
| "****************************************************************************************************\n", | |
| "Example({'food_description': 'I made myself a quick lunch with a cup of chicken noodle soup from a can and a medium slice of toasted multigrain bread.', 'food_groups': ['meat and alternatives', 'grain'], 'total_calories': 153.87, 'source': 'nutribench'}) (input_keys={'food_description'})\n", | |
| "Prediction(\n", | |
| " food_items=[FoodItem(name='Canned chicken noodle soup', quantity=1.0, calories=130.0, carbs=18.0, fat=3.0, protein=7.0, fiber=1.0, food_groups=['meat and alternatives', 'grain']), FoodItem(name='Multigrain bread (toasted)', quantity=1.0, calories=100.0, carbs=18.0, fat=1.5, protein=4.0, fiber=3.0, food_groups=['grain'])]\n", | |
| ")\n", | |
| "Prediction(\n", | |
| " score=0,\n", | |
| " feedback='INCORRECT: Your answer was not within 10% of the correct answer (yours: 230.0, correct: 153.87)'\n", | |
| ")\n", | |
| "****************************************************************************************************\n", | |
| "Example({'food_description': 'I’m snacking on a small bag of flavored potato sticks along with a 500 ml bottle of diet fruit-flavored drink.', 'food_groups': ['vegetable'], 'total_calories': 144.76, 'source': 'nutribench'}) (input_keys={'food_description'})\n", | |
| "Prediction(\n", | |
| " food_items=[FoodItem(name='Flavored potato sticks (small bag, 30g)', quantity=1.0, calories=160.0, carbs=17.0, fat=10.0, protein=2.0, fiber=1.0, food_groups=['vegetable']), FoodItem(name='Diet fruit-flavored drink (500 ml)', quantity=1.0, calories=10.0, carbs=2.0, fat=0.0, protein=0.0, fiber=0.0, food_groups=[])]\n", | |
| ")\n", | |
| "Prediction(\n", | |
| " score=0,\n", | |
| " feedback='INCORRECT: Your answer was not within 10% of the correct answer (yours: 170.0, correct: 144.76)'\n", | |
| ")\n", | |
| "****************************************************************************************************\n", | |
| "Example({'food_description': 'stracciatella with confit grapes and three little focaccia pieces', 'food_groups': ['fruit', 'dairy', 'grain'], 'total_calories': 530.0, 'source': 'golden_dataset'}) (input_keys={'food_description'})\n", | |
| "Prediction(\n", | |
| " food_items=[FoodItem(name='Stracciatella cheese (100g)', quantity=1.0, calories=250.0, carbs=3.5, fat=22.0, protein=10.0, fiber=0.0, food_groups=['dairy']), FoodItem(name='Confit grapes (serving)', quantity=1.0, calories=80.0, carbs=18.0, fat=1.5, protein=0.5, fiber=1.0, food_groups=['fruit']), FoodItem(name='Focaccia (small piece)', quantity=3.0, calories=90.0, carbs=13.0, fat=3.5, protein=2.0, fiber=0.5, food_groups=['grain'])]\n", | |
| ")\n", | |
| "Prediction(\n", | |
| " score=0,\n", | |
| " feedback='INCORRECT: Your answer was not within 10% of the correct answer (yours: 600.0, correct: 530.0)'\n", | |
| ")\n", | |
| "****************************************************************************************************\n", | |
| "Example({'food_description': \"For my lunch, I'm having a 480g energy drink that's sugar-free, from Monster.\", 'food_groups': [], 'total_calories': 24.0, 'source': 'nutribench'}) (input_keys={'food_description'})\n", | |
| "Prediction(\n", | |
| " food_items=[FoodItem(name='Monster Energy Drink (Sugar-Free)', quantity=4.8, calories=2.0, carbs=1.0, fat=0.0, protein=0.0, fiber=0.0, food_groups=[])]\n", | |
| ")\n", | |
| "Prediction(\n", | |
| " score=0,\n", | |
| " feedback='INCORRECT: Your answer was not within 10% of the correct answer (yours: 9.6, correct: 24.0)'\n", | |
| ")\n", | |
| "****************************************************************************************************\n", | |
| "Example({'food_description': 'I’m starting my day with a cup of ready-to-eat cereal for breakfast.', 'food_groups': ['grain'], 'total_calories': 149.2, 'source': 'nutribench'}) (input_keys={'food_description'})\n", | |
| "Prediction(\n", | |
| " food_items=[FoodItem(name='Ready-to-eat cereal', quantity=1.0, calories=110.0, carbs=24.0, fat=1.0, protein=2.0, fiber=1.5, food_groups=['grain'])]\n", | |
| ")\n", | |
| "Prediction(\n", | |
| " score=0,\n", | |
| " feedback='INCORRECT: Your answer was not within 10% of the correct answer (yours: 110.0, correct: 149.2)'\n", | |
| ")\n", | |
| "****************************************************************************************************\n", | |
| "Example({'food_description': 'I enjoyed a snack that included 216g of sorrel punch, 240g of peppermint tea, and a glass of 511g of tap water.', 'food_groups': ['fruit'], 'total_calories': 60.0, 'source': 'nutribench'}) (input_keys={'food_description'})\n", | |
| "Prediction(\n", | |
| " food_items=[FoodItem(name='Sorrel punch', quantity=2.16, calories=45.0, carbs=11.5, fat=0.0, protein=0.1, fiber=0.0, food_groups=['fruit']), FoodItem(name='Peppermint tea', quantity=2.4, calories=1.0, carbs=0.2, fat=0.0, protein=0.0, fiber=0.0, food_groups=[]), FoodItem(name='Tap water', quantity=5.11, calories=0.0, carbs=0.0, fat=0.0, protein=0.0, fiber=0.0, food_groups=[])]\n", | |
| ")\n", | |
| "Prediction(\n", | |
| " score=0,\n", | |
| " feedback='INCORRECT: Your answer was not within 10% of the correct answer (yours: 99.60000000000001, correct: 60.0)'\n", | |
| ")\n", | |
| "****************************************************************************************************\n", | |
| "Example({'food_description': 'coffee‑flavoured corn flakes with plant‑based milk (one bowl)', 'food_groups': ['grain'], 'total_calories': 240.0, 'source': 'golden_dataset'}) (input_keys={'food_description'})\n", | |
| "Prediction(\n", | |
| " food_items=[FoodItem(name='Corn flakes (coffee-flavored)', quantity=1.0, calories=110.0, carbs=26.0, fat=0.5, protein=2.0, fiber=1.0, food_groups=['grain']), FoodItem(name='Plant-based milk (unsweetened almond/soy)', quantity=1.0, calories=60.0, carbs=4.0, fat=3.0, protein=4.0, fiber=0.5, food_groups=['meat and alternatives'])]\n", | |
| ")\n", | |
| "Prediction(\n", | |
| " score=0,\n", | |
| " feedback='INCORRECT: Your answer was not within 10% of the correct answer (yours: 170.0, correct: 240.0)'\n", | |
| ")\n", | |
| "****************************************************************************************************\n", | |
| "Example({'food_description': 'ice‑cream cone', 'food_groups': ['dairy', 'grain'], 'total_calories': 157.0, 'source': 'golden_dataset'}) (input_keys={'food_description'})\n", | |
| "Prediction(\n", | |
| " food_items=[FoodItem(name='Ice cream (scoop)', quantity=1.0, calories=137.0, carbs=15.0, fat=7.0, protein=2.0, fiber=0.0, food_groups=['dairy']), FoodItem(name='Sugar cone', quantity=1.0, calories=50.0, carbs=11.0, fat=0.5, protein=1.0, fiber=0.0, food_groups=['grain'])]\n", | |
| ")\n", | |
| "Prediction(\n", | |
| " score=0,\n", | |
| " feedback='INCORRECT: Your answer was not within 10% of the correct answer (yours: 187.0, correct: 157.0)'\n", | |
| ")\n", | |
| "****************************************************************************************************\n" | |
| ] | |
| } | |
| ], | |
| "source": [ | |
| "with dspy.context(lm=gemini_3_flash):\n", | |
| " result = evaluate(module_current)\n", | |
| "\n", | |
| "\n", | |
| "for example, prediction, feedback in result.results:\n", | |
| " if feedback.score == 1:\n", | |
| " continue\n", | |
| "\n", | |
| " print(example)\n", | |
| " print(prediction)\n", | |
| " print(feedback)\n", | |
| " print(\"*\" * 100)" | |
| ] | |
| }, | |
| { | |
| "cell_type": "markdown", | |
| "metadata": {}, | |
| "source": [ | |
| "## Sanity Check " | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 19, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "name": "stderr", | |
| "output_type": "stream", | |
| "text": [ | |
| "100%|██████████| 418/418 [00:01<00:00, 334.33it/s]\n" | |
| ] | |
| }, | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div><style>\n", | |
| ".dataframe > thead > tr,\n", | |
| ".dataframe > tbody > tr {\n", | |
| " text-align: right;\n", | |
| " white-space: pre-wrap;\n", | |
| "}\n", | |
| "</style>\n", | |
| "<small>shape: (418, 4)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>started_at</th><th>food</th><th>model_prediction</th><th>total_calories_predicted</th></tr><tr><td>str</td><td>str</td><td>struct[2]</td><td>f64</td></tr></thead><tbody><tr><td>"2025-12-13T08:29:30.781696Z"</td><td>"Cappuccino groß"</td><td>{"- corn snack those white\n", | |
| "- avocado pesto toast joe and the joe and the juice \n", | |
| "- 1.5 plate of tomato rice with cecci and beans",[{"Corn snack (white)",1.0,150.0,25.0,5.0,2.0,1.0,["grain"]}, {"Avocado pesto toast (Joe & The Juice)",1.0,350.0,35.0,20.0,8.0,6.0,["grain", "fruit", "vegetable"]}, {"Tomato rice with chickpeas and beans (plate)",1.5,400.0,70.0,8.0,15.0,10.0,["grain", "vegetable", "meat and alternatives"]}]}</td><td>1100.0</td></tr><tr><td>"2025-12-12T19:44:31.387084Z"</td><td>"Kleines Bier"</td><td>{"Kleiner Milka-Nikolaus weiße Schokolade",[{"Milka Nikolaus (small, white chocolate)",1.0,150.0,15.0,9.0,2.0,0.0,["dairy"]}]}</td><td>150.0</td></tr><tr><td>"2025-12-12T19:44:17.451188Z"</td><td>"Kleiner Milka-Nikolaus weiße S…</td><td>{"one bun with cheese and butter \n", | |
| "half a pain au chocolat\n", | |
| "take out shai butter chicken with spicy cheese naan",[{"Bun",1.0,150.0,28.0,2.0,5.0,1.5,["grain"]}, {"Cheese slice",1.0,75.0,0.5,6.5,4.5,0.0,["dairy"]}, … {"Spicy cheese naan",1.0,350.0,45.0,15.0,12.0,4.0,["grain", "dairy"]}]}</td><td>1251.0</td></tr><tr><td>"2025-12-12T19:43:56.533952Z"</td><td>"Linsen-Spinat-Daal mit Kokosmi…</td><td>{"Kleines Bier",[{"Small beer (330ml)",1.0,130.0,10.0,0.0,1.0,0.0,[]}]}</td><td>130.0</td></tr><tr><td>"2025-12-12T18:11:36.026841Z"</td><td>"one bun with cheese and butter…</td><td>{"Hühnchen mit Tomate, Mandeln und Rosinen",[{"Chicken breast (cooked, 100g)",1.0,165.0,0.0,3.6,31.0,0.0,["meat and alternatives"]}, {"Tomato (medium)",1.0,22.0,4.8,0.2,1.1,1.5,["vegetable"]}, … {"Raisins (10g)",1.0,30.0,8.0,0.0,0.3,0.4,["fruit"]}]}</td><td>275.0</td></tr><tr><td>…</td><td>…</td><td>…</td><td>…</td></tr><tr><td>"2025-06-04T09:41:37.170147Z"</td><td>"two croissants with cream and …</td><td>{"- 4 toasts with cheese\n", | |
| "- 1/2 cardamom bun\n", | |
| "- 1/2 pain au chocolat\n", | |
| "- 2 plates of pasta al forno (pasta + tomato sauce)\n", | |
| "- some light snacking",[{"Toast (white bread)",4.0,77.0,15.0,1.0,2.5,1.0,["grain"]}, {"Cheese slice",4.0,75.0,0.5,6.5,4.5,0.0,["dairy"]}, … {"Light snack (generic)",1.0,100.0,15.0,4.0,2.0,1.0,[]}]}</td><td>1978.0</td></tr><tr><td>"2025-06-04T09:40:41.788553Z"</td><td>"fried eggs 2 with ham and whit…</td><td>{"fried eggs 2 with ham and white bread bun 4 slices of tomato and four slices of cucumber",[{"Fried egg",2.0,90.0,0.6,7.0,6.0,0.0,["meat and alternatives"]}, {"Ham slice",1.0,30.0,0.5,1.5,3.5,0.0,["meat and alternatives"]}, … {"Cucumber slice",4.0,1.0,0.3,0.01,0.05,0.1,["vegetable"]}]}</td><td>376.0</td></tr><tr><td>"2025-06-03T21:16:04.784519Z"</td><td>"- 1/5 psta pesto airplane menu…</td><td>{"- 1.3 portuon of rice, falafel and beans\n", | |
| "- 1 small ice cream from the supermarket ",[{"Rice (cooked)",1.3,169.0,37.0,0.3,3.3,0.4,["grain"]}, {"Falafel (portion)",1.3,190.0,20.0,10.0,8.0,5.0,["meat and alternatives"]}, … {"Small ice cream (supermarket)",1.0,150.0,20.0,7.0,2.0,0.5,["dairy"]}]}</td><td>759.7</td></tr><tr><td>"2025-06-02T20:06:15.236051Z"</td><td>"- leftover pasta from yesterda…</td><td>{"- 2 toasts with peanut butter and jelly\n", | |
| "- 1 half omelette eith cheese (4 eggs) + 1.5 toasted pices of bread + vegan butter \n", | |
| "- 400ml recovery drink (protein focus)",[{"Whole wheat toast",2.0,80.0,14.0,1.0,3.0,2.0,["grain"]}, {"Peanut butter (1 tbsp)",2.0,95.0,3.5,8.0,4.0,1.0,["meat and alternatives"]}, … {"Recovery drink (protein focus)",400.0,200.0,20.0,2.0,30.0,0.0,[]}]}</td><td>80920.0</td></tr><tr><td>"2025-06-01T20:16:08.341112Z"</td><td>"- 4 toasts with cheese\n", | |
| "- 1/2 c…</td><td>{"butter",[{"Butter (1 tbsp)",1.0,102.0,0.1,11.5,0.1,0.0,["dairy"]}]}</td><td>102.0</td></tr></tbody></table></div>" | |
| ], | |
| "text/plain": [ | |
| "shape: (418, 4)\n", | |
| "┌────────────────────────┬────────────────────────┬────────────────────────┬───────────────────────┐\n", | |
| "│ started_at ┆ food ┆ model_prediction ┆ total_calories_predic │\n", | |
| "│ --- ┆ --- ┆ --- ┆ ted │\n", | |
| "│ str ┆ str ┆ struct[2] ┆ --- │\n", | |
| "│ ┆ ┆ ┆ f64 │\n", | |
| "╞════════════════════════╪════════════════════════╪════════════════════════╪═══════════════════════╡\n", | |
| "│ 2025-12-13T08:29:30.78 ┆ Cappuccino groß ┆ {\"- corn snack those ┆ 1100.0 │\n", | |
| "│ 1696Z ┆ ┆ white ┆ │\n", | |
| "│ ┆ ┆ - a… ┆ │\n", | |
| "│ 2025-12-12T19:44:31.38 ┆ Kleines Bier ┆ {\"Kleiner ┆ 150.0 │\n", | |
| "│ 7084Z ┆ ┆ Milka-Nikolaus weiße… ┆ │\n", | |
| "│ 2025-12-12T19:44:17.45 ┆ Kleiner Milka-Nikolaus ┆ {\"one bun with cheese ┆ 1251.0 │\n", | |
| "│ 1188Z ┆ weiße S… ┆ and butt… ┆ │\n", | |
| "│ 2025-12-12T19:43:56.53 ┆ Linsen-Spinat-Daal mit ┆ {\"Kleines ┆ 130.0 │\n", | |
| "│ 3952Z ┆ Kokosmi… ┆ Bier\",[{\"Small beer … ┆ │\n", | |
| "│ 2025-12-12T18:11:36.02 ┆ one bun with cheese ┆ {\"Hühnchen mit Tomate, ┆ 275.0 │\n", | |
| "│ 6841Z ┆ and butter… ┆ Mandeln… ┆ │\n", | |
| "│ … ┆ … ┆ … ┆ … │\n", | |
| "│ 2025-06-04T09:41:37.17 ┆ two croissants with ┆ {\"- 4 toasts with ┆ 1978.0 │\n", | |
| "│ 0147Z ┆ cream and … ┆ cheese ┆ │\n", | |
| "│ ┆ ┆ - 1/2… ┆ │\n", | |
| "│ 2025-06-04T09:40:41.78 ┆ fried eggs 2 with ham ┆ {\"fried eggs 2 with ┆ 376.0 │\n", | |
| "│ 8553Z ┆ and whit… ┆ ham and wh… ┆ │\n", | |
| "│ 2025-06-03T21:16:04.78 ┆ - 1/5 psta pesto ┆ {\"- 1.3 portuon of ┆ 759.7 │\n", | |
| "│ 4519Z ┆ airplane menu… ┆ rice, falaf… ┆ │\n", | |
| "│ 2025-06-02T20:06:15.23 ┆ - leftover pasta from ┆ {\"- 2 toasts with ┆ 80920.0 │\n", | |
| "│ 6051Z ┆ yesterda… ┆ peanut butte… ┆ │\n", | |
| "│ 2025-06-01T20:16:08.34 ┆ - 4 toasts with cheese ┆ {\"butter\",[{\"Butter (1 ┆ 102.0 │\n", | |
| "│ 1112Z ┆ - 1/2 c… ┆ tbsp)\",… ┆ │\n", | |
| "└────────────────────────┴────────────────────────┴────────────────────────┴───────────────────────┘" | |
| ] | |
| }, | |
| "execution_count": 19, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "import json\n", | |
| "\n", | |
| "with open(\n", | |
| " \"../datasets/wandb/weave_export_taralli_api_2025-12-13.jsonl\", \"r\"\n", | |
| ") as json_file:\n", | |
| " json_list = list(json_file)\n", | |
| "records = []\n", | |
| "for json_str in json_list:\n", | |
| " if \"No endpoints found \" in json_str:\n", | |
| " continue\n", | |
| " result = json.loads(json_str)\n", | |
| " records.append(result)\n", | |
| "\n", | |
| "prod_data = polars.from_dicts(records)\n", | |
| "# all_inputs = [_.get(\"food\") for _ in prod_data[\"inputs\"]]\n", | |
| "prod_data = prod_data.with_columns(\n", | |
| " polars.col(\"inputs\").struct.field(\"food\").alias(\"food\")\n", | |
| ")\n", | |
| "\n", | |
| "prod_data = prod_data.filter(polars.col(\"food\").is_not_null())\n", | |
| "\n", | |
| "prod_data[[\"started_at\", \"food\"]]\n", | |
| "\n", | |
| "from concurrent.futures import ThreadPoolExecutor, as_completed\n", | |
| "from tqdm import tqdm\n", | |
| "\n", | |
| "\n", | |
| "total_cals = []\n", | |
| "model_prediction = []\n", | |
| "with dspy.context(lm=gemini_3_flash):\n", | |
| "\n", | |
| " def predict_total_calories(food):\n", | |
| " result = module_current(food_description=food)\n", | |
| " try:\n", | |
| " na = NutritionAnalysis(**result.toDict(), food_description=food)\n", | |
| " return na.total_calories(), na.model_dump()\n", | |
| " except Exception as e:\n", | |
| " print(f\"Error predicting {food}: {e}\")\n", | |
| " return -1, {}\n", | |
| "\n", | |
| " foods = prod_data[\"food\"]\n", | |
| " with ThreadPoolExecutor(max_workers=10) as executor:\n", | |
| " futures = [executor.submit(predict_total_calories, food) for food in foods]\n", | |
| " for future in tqdm(as_completed(futures), total=len(futures)):\n", | |
| " total_cals.append(future.result()[0])\n", | |
| " model_prediction.append(future.result()[1])\n", | |
| "\n", | |
| "\n", | |
| "prod_data = prod_data.with_columns(\n", | |
| " polars.Series(name=\"total_calories_predicted\", values=total_cals),\n", | |
| " polars.Series(name=\"model_prediction\", values=model_prediction),\n", | |
| ")\n", | |
| "\n", | |
| "cols = [\"started_at\", \"food\", \"model_prediction\", \"total_calories_predicted\"]\n", | |
| "prod_data[cols]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": 20, | |
| "metadata": {}, | |
| "outputs": [ | |
| { | |
| "data": { | |
| "text/html": [ | |
| "<div><style>\n", | |
| ".dataframe > thead > tr,\n", | |
| ".dataframe > tbody > tr {\n", | |
| " text-align: right;\n", | |
| " white-space: pre-wrap;\n", | |
| "}\n", | |
| "</style>\n", | |
| "<small>shape: (21, 4)</small><table border=\"1\" class=\"dataframe\"><thead><tr><th>started_at</th><th>food</th><th>model_prediction</th><th>total_calories_predicted</th></tr><tr><td>str</td><td>str</td><td>struct[2]</td><td>f64</td></tr></thead><tbody><tr><td>"2025-11-30T10:14:00.632469Z"</td><td>"zartbitterschokolade 30 g"</td><td>{"zartbitterschokoladeblättchen (50g)",[{"Dark chocolate leaves",50.0,270.0,25.0,18.0,3.0,5.0,[]}]}</td><td>13500.0</td></tr><tr><td>"2025-11-29T06:52:50.389569Z"</td><td>"Vollkornsemmmel mit Butter und…</td><td>{"\n", | |
| "mehlige Kartoffeln 165 g\n", | |
| "150 ml Milch\n", | |
| "125 g Lachsfilet\n", | |
| "20 g Butter\n", | |
| "12 g Mehl\n", | |
| "1 EL mittelscharfer Senf\n", | |
| "Dill\n", | |
| "Salz\n", | |
| "Pfeffer",[{"Potatoes (floury)",165.0,127.0,29.0,0.1,2.5,2.0,["vegetable"]}, {"Milk",150.0,90.0,7.2,4.5,4.8,0.0,["dairy"]}, … {"Pepper",1.0,0.0,0.0,0.0,0.0,0.0,[]}]}</td><td>70076.0</td></tr><tr><td>"2025-11-28T17:42:18.138704Z"</td><td>"\n", | |
| "mehlige Kartoffeln 0.165 kg\n", | |
| "1…</td><td>{"For breakfast, I ate a plain bun weighing 126 grams and sprinkled on 27 grams of raw sugar.",[{"Plain bun",126.0,370.0,70.0,5.0,12.0,3.0,["grain"]}, {"Raw sugar",27.0,108.0,27.0,0.0,0.0,0.0,[]}]}</td><td>49536.0</td></tr><tr><td>"2025-11-28T17:41:28.547643Z"</td><td>"\n", | |
| "165 g mehlige Kartoffeln\n", | |
| "150 …</td><td>{"150 mL Pinot noir",[{"Pinot Noir",150.0,120.0,3.6,0.0,0.1,0.0,[]}]}</td><td>18000.0</td></tr><tr><td>"2025-11-28T12:27:47.265307Z"</td><td>"3 Löffel Kartoffelsalat mit Es…</td><td>{"Hühnchen mit Gemüse und Chop-Suey Soße",[{"Chicken breast (cooked)",150.0,240.0,0.0,6.0,45.0,0.0,["meat and alternatives"]}, {"Mixed vegetables (stir-fry)",150.0,60.0,12.0,0.5,3.0,4.0,["vegetable"]}, {"Chop Suey Sauce",60.0,40.0,8.0,0.5,1.0,0.5,[]}]}</td><td>47400.0</td></tr><tr><td>…</td><td>…</td><td>…</td><td>…</td></tr><tr><td>"2025-11-17T18:33:07.357719Z"</td><td>"small bit of baguette bread"</td><td>{"2 angebratene Kartoffelknödel\n", | |
| "100 g Creme fraiche\n", | |
| "1/4 Gurke\n", | |
| "1/2 Karotte",[{"Pan-fried potato dumpling",2.0,250.0,35.0,10.0,4.0,3.0,["grain", "vegetable"]}, {"Creme fraiche",100.0,290.0,3.0,30.0,2.0,0.0,["dairy"]}, … {"Carrot",0.5,25.0,5.8,0.1,0.6,1.7,["vegetable"]}]}</td><td>29516.25</td></tr><tr><td>"2025-11-16T10:45:38.218648Z"</td><td>"running gel"</td><td>{"oats 26g with almond milk 180ml and 30g of protein powder",[{"Oats",26.0,98.8,17.7,2.0,4.4,2.6,["grain"]}, {"Almond milk (unsweetened)",180.0,27.0,1.0,2.2,0.7,0.4,["dairy"]}, {"Protein powder",30.0,120.0,3.0,2.0,24.0,1.0,["meat and alternatives"]}]}</td><td>11028.8</td></tr><tr><td>"2025-11-15T20:36:30.045973Z"</td><td>"300g pasta ai fagioli\n", | |
| "velutate…</td><td>{"50 g Schokolade Ritter Sport Cunchy Cappuccino ",[{"Ritter Sport Crunchy Cappuccino chocolate",50.0,270.0,27.0,17.0,3.0,1.5,[]}]}</td><td>13500.0</td></tr><tr><td>"2025-09-11T18:07:31.869962Z"</td><td>"Una ensalada de pollo, huevo d…</td><td>{"2 toasts with peanut butter and cheese \n", | |
| "for dinner: fish (one small ~200g with roasted potatoes and pomodoroni)\n", | |
| "3 camembert slices with small toasts snacks",[{"Toast (whole wheat)",2.0,80.0,14.0,1.0,3.0,2.0,["grain"]}, {"Peanut butter (1 tbsp)",2.0,95.0,3.5,8.0,4.0,1.0,["meat and alternatives"]}, … {"Small toast (for cheese)",3.0,40.0,7.0,0.5,1.5,0.5,["grain"]}]}</td><td>36786.0</td></tr><tr><td>"2025-06-02T20:06:15.236051Z"</td><td>"- leftover pasta from yesterda…</td><td>{"- 2 toasts with peanut butter and jelly\n", | |
| "- 1 half omelette eith cheese (4 eggs) + 1.5 toasted pices of bread + vegan butter \n", | |
| "- 400ml recovery drink (protein focus)",[{"Whole wheat toast",2.0,80.0,14.0,1.0,3.0,2.0,["grain"]}, {"Peanut butter (1 tbsp)",2.0,95.0,3.5,8.0,4.0,1.0,["meat and alternatives"]}, … {"Recovery drink (protein focus)",400.0,200.0,20.0,2.0,30.0,0.0,[]}]}</td><td>80920.0</td></tr></tbody></table></div>" | |
| ], | |
| "text/plain": [ | |
| "shape: (21, 4)\n", | |
| "┌────────────────────────┬────────────────────────┬────────────────────────┬───────────────────────┐\n", | |
| "│ started_at ┆ food ┆ model_prediction ┆ total_calories_predic │\n", | |
| "│ --- ┆ --- ┆ --- ┆ ted │\n", | |
| "│ str ┆ str ┆ struct[2] ┆ --- │\n", | |
| "│ ┆ ┆ ┆ f64 │\n", | |
| "╞════════════════════════╪════════════════════════╪════════════════════════╪═══════════════════════╡\n", | |
| "│ 2025-11-30T10:14:00.63 ┆ zartbitterschokolade ┆ {\"zartbitterschokolade ┆ 13500.0 │\n", | |
| "│ 2469Z ┆ 30 g ┆ blättche… ┆ │\n", | |
| "│ 2025-11-29T06:52:50.38 ┆ Vollkornsemmmel mit ┆ {\" ┆ 70076.0 │\n", | |
| "│ 9569Z ┆ Butter und… ┆ mehlige Kartoffeln 165 ┆ │\n", | |
| "│ ┆ ┆ g ┆ │\n", | |
| "│ ┆ ┆ 15… ┆ │\n", | |
| "│ 2025-11-28T17:42:18.13 ┆ ┆ {\"For breakfast, I ate ┆ 49536.0 │\n", | |
| "│ 8704Z ┆ mehlige Kartoffeln ┆ a plain… ┆ │\n", | |
| "│ ┆ 0.165 kg ┆ ┆ │\n", | |
| "│ ┆ 1… ┆ ┆ │\n", | |
| "│ 2025-11-28T17:41:28.54 ┆ ┆ {\"150 mL Pinot ┆ 18000.0 │\n", | |
| "│ 7643Z ┆ 165 g mehlige ┆ noir\",[{\"Pinot … ┆ │\n", | |
| "│ ┆ Kartoffeln ┆ ┆ │\n", | |
| "│ ┆ 150 … ┆ ┆ │\n", | |
| "│ 2025-11-28T12:27:47.26 ┆ 3 Löffel ┆ {\"Hühnchen mit Gemüse ┆ 47400.0 │\n", | |
| "│ 5307Z ┆ Kartoffelsalat mit Es… ┆ und Chop… ┆ │\n", | |
| "│ … ┆ … ┆ … ┆ … │\n", | |
| "│ 2025-11-17T18:33:07.35 ┆ small bit of baguette ┆ {\"2 angebratene ┆ 29516.25 │\n", | |
| "│ 7719Z ┆ bread ┆ Kartoffelknöde… ┆ │\n", | |
| "│ 2025-11-16T10:45:38.21 ┆ running gel ┆ {\"oats 26g with almond ┆ 11028.8 │\n", | |
| "│ 8648Z ┆ ┆ milk 18… ┆ │\n", | |
| "│ 2025-11-15T20:36:30.04 ┆ 300g pasta ai fagioli ┆ {\"50 g Schokolade ┆ 13500.0 │\n", | |
| "│ 5973Z ┆ velutate… ┆ Ritter Sport… ┆ │\n", | |
| "│ 2025-09-11T18:07:31.86 ┆ Una ensalada de pollo, ┆ {\"2 toasts with peanut ┆ 36786.0 │\n", | |
| "│ 9962Z ┆ huevo d… ┆ butter … ┆ │\n", | |
| "│ 2025-06-02T20:06:15.23 ┆ - leftover pasta from ┆ {\"- 2 toasts with ┆ 80920.0 │\n", | |
| "│ 6051Z ┆ yesterda… ┆ peanut butte… ┆ │\n", | |
| "└────────────────────────┴────────────────────────┴────────────────────────┴───────────────────────┘" | |
| ] | |
| }, | |
| "execution_count": 20, | |
| "metadata": {}, | |
| "output_type": "execute_result" | |
| } | |
| ], | |
| "source": [ | |
| "prod_data.filter(polars.col(\"total_calories_predicted\") > 10_000)[cols]" | |
| ] | |
| }, | |
| { | |
| "cell_type": "code", | |
| "execution_count": null, | |
| "metadata": {}, | |
| "outputs": [], | |
| "source": [] | |
| } | |
| ], | |
| "metadata": { | |
| "kernelspec": { | |
| "display_name": ".env", | |
| "language": "python", | |
| "name": "python3" | |
| }, | |
| "language_info": { | |
| "codemirror_mode": { | |
| "name": "ipython", | |
| "version": 3 | |
| }, | |
| "file_extension": ".py", | |
| "mimetype": "text/x-python", | |
| "name": "python", | |
| "nbconvert_exporter": "python", | |
| "pygments_lexer": "ipython3", | |
| "version": "3.12.6" | |
| } | |
| }, | |
| "nbformat": 4, | |
| "nbformat_minor": 2 | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment