Skip to content

Instantly share code, notes, and snippets.

@Theoistic
Created March 4, 2025 17:38
Show Gist options
  • Select an option

  • Save Theoistic/79b80d6bcf6cc9e3a3b8c74c18f12072 to your computer and use it in GitHub Desktop.

Select an option

Save Theoistic/79b80d6bcf6cc9e3a3b8c74c18f12072 to your computer and use it in GitHub Desktop.
GBNF
using System;
using System.Collections;
using System.Collections.Generic;
using System.Reflection;
using System.Text;
// Custom attribute to mark a property as required
[AttributeUsage(AttributeTargets.Property)]
public class GBNFSchemaRequiredAttribute : Attribute { }
public static class GBNFSchemaExtensions
{
/// <summary>
/// Generates a GBNF grammar string for the given object instance,
/// supporting reusable definitions for complex types and simple type definitions.
/// </summary>
public static string ToGbnfGrammar<T>(this T obj)
{
// Use the type of the object
return typeof(T).ToGbnfGrammar();
}
/// <summary>
/// Generates a GBNF grammar string for the given type,
/// supporting reusable definitions for complex types and simple type definitions.
/// </summary>
public static string ToGbnfGrammar(this Type type)
{
var definitions = new Dictionary<string, string>();
var processedTypes = new HashSet<Type>();
// Generate GBNF grammar for the given type (and populate definitions)
GenerateGbnfForType(type, definitions, processedTypes);
// Add base simple type definitions if not already present.
if (!definitions.ContainsKey("ws"))
{
definitions["ws"] = "ws ::= [ \\t\\n]*";
}
if (!definitions.ContainsKey("string"))
{
// This definition allows any sequence of characters except quotes.
definitions["string"] = "string ::= \"\\\"\" ([^\\\"\\\\\\x7F\\x00-\\x1F])* \"\\\"\"";
}
if (!definitions.ContainsKey("boolean"))
{
definitions["boolean"] = "boolean ::= \"true\" | \"false\"";
}
if (!definitions.ContainsKey("integer"))
{
definitions["integer"] = "integer ::= \"-\"?[0-9]+";
}
if (!definitions.ContainsKey("uint"))
{
definitions["uint"] = "uint ::= [0-9]+";
}
if (!definitions.ContainsKey("float"))
{
definitions["float"] = "float ::= \"-\"?[0-9]+\".\"?[0-9]*([eE][-+]?[0-9]+)?[fF]?";
}
if (!definitions.ContainsKey("double"))
{
definitions["double"] = "double ::= \"-\"?[0-9]+\".\"?[0-9]*([eE][-+]?[0-9]+)?[dD]?";
}
// Build the final grammar string with a root rule.
StringBuilder sb = new StringBuilder();
sb.AppendLine($"root ::= {type.Name}");
sb.AppendLine();
foreach (var def in definitions)
{
sb.AppendLine(def.Value);
sb.AppendLine();
}
return sb.ToString();
}
/// <summary>
/// Recursively generates a GBNF grammar rule for a given type and adds it to definitions.
/// Returns the name of the production rule for that type.
/// </summary>
private static string GenerateGbnfForType(Type type, Dictionary<string, string> definitions, HashSet<Type> processedTypes)
{
// If the type has already been processed, return its rule name.
if (processedTypes.Contains(type))
{
return type.Name;
}
processedTypes.Add(type);
// For simple types, return the predefined rule name.
if (IsSimpleType(type))
{
return GetGbnfForSimpleType(type);
}
// Handle arrays (or any IEnumerable except string)
if (IsArray(type, out Type elementType))
{
// Recursively get the rule for the element type.
string elementRule = GenerateGbnfForType(elementType, definitions, processedTypes);
string ruleName = $"ArrayOf{elementType.Name}";
string rule = $"{ruleName} ::= \"[\" ws? ( {elementRule} ( ws? \",\" ws? {elementRule} )* )? ws? \"]\"";
definitions[ruleName] = rule;
return ruleName;
}
// Otherwise, assume a complex object (class).
// Begin constructing the production rule for this type.
StringBuilder ruleBuilder = new StringBuilder();
ruleBuilder.Append($"{type.Name} ::= \"{{\" ws? ");
var props = type.GetProperties(BindingFlags.Public | BindingFlags.Instance);
List<string> propertyRules = new List<string>();
foreach (var prop in props)
{
// Build a production for each property: it must output the property name (quoted),
// a colon, optional whitespace, then the production rule for the property’s type.
string propRule;
if (IsArray(prop.PropertyType, out Type propElementType))
{
string arrayRule = GenerateGbnfForType(prop.PropertyType, definitions, processedTypes);
propRule = $"\"\\\"{prop.Name}\\\"\" \":\" ws? {arrayRule}";
}
else if (IsSimpleType(prop.PropertyType))
{
string simpleRule = GetGbnfForSimpleType(prop.PropertyType);
propRule = $"\"\\\"{prop.Name}\\\"\" \":\" ws? {simpleRule}";
}
else
{
// For complex types, recursively generate the production rule.
string complexRule = GenerateGbnfForType(prop.PropertyType, definitions, processedTypes);
propRule = $"\"\\\"{prop.Name}\\\"\" \":\" ws? {complexRule}";
}
// If the property is not marked as required, mark it as optional.
if (prop.GetCustomAttribute<GBNFSchemaRequiredAttribute>() == null)
{
propRule = $"( {propRule} )?";
}
propertyRules.Add(propRule);
}
// Join all property productions with a comma (and optional whitespace).
if (propertyRules.Count > 0)
{
ruleBuilder.Append(string.Join(" ws? \",\" ws? ", propertyRules));
}
ruleBuilder.Append(" ws? \"}\"");
string ruleStr = ruleBuilder.ToString();
definitions[type.Name] = ruleStr;
return type.Name;
}
/// <summary>
/// Checks if the given type is considered "simple" (a primitive, string, decimal, DateTime, Guid, etc.).
/// </summary>
private static bool IsSimpleType(Type type)
{
return type.IsPrimitive ||
type == typeof(string) ||
type == typeof(decimal) ||
type == typeof(DateTime) ||
type == typeof(Guid) ||
type == typeof(uint) ||
type == typeof(float) ||
type == typeof(double);
}
/// <summary>
/// Determines if a type is an array or IEnumerable (excluding string) and returns its element type.
/// </summary>
private static bool IsArray(Type type, out Type elementType)
{
if (type.IsArray)
{
elementType = type.GetElementType();
return true;
}
else if (typeof(IEnumerable).IsAssignableFrom(type) && type != typeof(string))
{
if (type.IsGenericType)
{
elementType = type.GetGenericArguments()[0];
return true;
}
}
elementType = null;
return false;
}
/// <summary>
/// Returns a predefined GBNF rule name for simple types.
/// For instance, string maps to the nonterminal "string", boolean to "boolean", etc.
/// </summary>
private static string GetGbnfForSimpleType(Type type)
{
if (type == typeof(string))
return "string";
if (type == typeof(bool))
return "boolean";
if (type == typeof(int) || type == typeof(long) || type == typeof(short))
return "integer";
if (type == typeof(uint))
return "uint";
if (type == typeof(float))
return "float";
if (type == typeof(double) || type == typeof(decimal))
return "double";
return "object";
}
}
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment