Created
February 12, 2026 17:40
-
-
Save realark/50833d07746ca3be9ac36559f6db5418 to your computer and use it in GitHub Desktop.
spawn one project log per eval case
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| using System.Diagnostics; | |
| using Braintrust.Sdk.Eval; | |
| using Braintrust.Sdk.Instrumentation.OpenAI; | |
| using Braintrust.Sdk.Trace; | |
| using OpenAI; | |
| using OpenAI.Chat; | |
| namespace Braintrust.Sdk.Examples.EvalExample; | |
| class Program | |
| { | |
| /// <summary> | |
| /// Creates an independent trace that is not parented under any current activity or Braintrust context. | |
| /// Captures the current experiment ID (if any) and stores it as an attribute on the new trace. | |
| /// Returns a disposable scope that restores the original context when disposed. | |
| /// </summary> | |
| static IndependentTraceScope CreateIndependentTrace(ActivitySource activitySource, string spanName) | |
| { | |
| var experimentId = BraintrustContext.Current?.ExperimentId; | |
| var currentActivity = Activity.Current; | |
| Activity.Current = null; | |
| var contextScope = BraintrustContext.OfProject(null!).MakeCurrent(); | |
| var rootSpan = activitySource.StartActivity(spanName); | |
| if (rootSpan != null && experimentId != null) | |
| { | |
| rootSpan.SetTag("original-experiment-id", experimentId); | |
| } | |
| return new IndependentTraceScope(rootSpan, contextScope, currentActivity); | |
| } | |
| private class IndependentTraceScope : IDisposable | |
| { | |
| public Activity? Activity { get; } | |
| private readonly IDisposable _contextScope; | |
| private readonly Activity? _previousActivity; | |
| public IndependentTraceScope(Activity? activity, IDisposable contextScope, Activity? previousActivity) | |
| { | |
| Activity = activity; | |
| _contextScope = contextScope; | |
| _previousActivity = previousActivity; | |
| } | |
| public void Dispose() | |
| { | |
| Activity?.Dispose(); | |
| _contextScope.Dispose(); | |
| System.Diagnostics.Activity.Current = _previousActivity; | |
| } | |
| } | |
| static async Task Main(string[] args) | |
| { | |
| var openAIApiKey = Environment.GetEnvironmentVariable("OPENAI_API_KEY"); | |
| if (string.IsNullOrEmpty(openAIApiKey)) | |
| { | |
| Console.WriteLine("ERROR: OPENAI_API_KEY environment variable not set. Bailing."); | |
| return; | |
| } | |
| var braintrust = Braintrust.Get(); | |
| var activitySource = braintrust.GetActivitySource(); | |
| OpenAIClient openAIClient = BraintrustOpenAI.WrapOpenAI(activitySource, openAIApiKey); | |
| // Define the task function that uses OpenAI to classify food | |
| string GetFoodType(string food) | |
| { | |
| using (var trace = CreateIndependentTrace(activitySource, "my-root-span")) | |
| { | |
| trace.Activity?.SetTag("food.input", food); | |
| trace.Activity?.SetTag("custom.attribute", "hello from independent trace"); | |
| } | |
| return "fruit"; | |
| } | |
| // Create and run the evaluation | |
| var eval = await braintrust | |
| .EvalBuilder<string, string>() | |
| .Name($"dotnet-eval-x-{DateTimeOffset.UtcNow.ToUnixTimeMilliseconds()}") | |
| // Experiment-level tags and metadata (shown in the Braintrust UI for the experiment) | |
| .Tags("food-classifier", "dotnet-sdk", "gpt-4o-mini") | |
| .Metadata(new Dictionary<string, object> | |
| { | |
| { "model", "gpt-4o-mini" }, | |
| { "description", "Classifies food items as fruit or vegetable" } | |
| }) | |
| .Cases( | |
| DatasetCase.Of("strawberry", "fruit"), | |
| DatasetCase.Of("asparagus", "vegetable"), | |
| DatasetCase.Of("apple", "fruit"), | |
| // Case-level tags and metadata (shown for individual eval cases) | |
| DatasetCase.Of( | |
| "banana", | |
| "fruit", | |
| new List<string> { "tropical", "yellow" }, | |
| new Dictionary<string, object> { { "category", "tropical-fruit" }, { "ripeness", "ripe" } } | |
| ) | |
| ) | |
| .TaskFunction(GetFoodType) | |
| .Scorers( | |
| new FunctionScorer<string, string>("exact_match", (expected, actual) => expected == actual ? 1.0 : 0.0), | |
| new FunctionScorer<string, string>("close_enough_match", (expected, actual) => expected.Trim().ToLowerInvariant() == actual.Trim().ToLowerInvariant() ? 1.0 : 0.0) | |
| ) | |
| .BuildAsync(); | |
| var result = await eval.RunAsync(); | |
| Console.WriteLine($"\n\n{result.CreateReportString()}"); | |
| } | |
| } |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment