Skip to content

Instantly share code, notes, and snippets.

@truevis
Created March 6, 2025 09:39
Show Gist options
  • Select an option

  • Save truevis/971ce24625cb1cf5c7933907dc6b1031 to your computer and use it in GitHub Desktop.

Select an option

Save truevis/971ce24625cb1cf5c7933907dc6b1031 to your computer and use it in GitHub Desktop.
Validate text using Gemini and output true or false in JSON
model = "gemini-2.0-pro-exp-02-05"
# Set API key
GEMINI_API_KEY = "abc"
def log_failed_validation(pdf_path):
"""Log failed validation to failed.log with timestamp"""
if pdf_path:
base_name = os.path.splitext(os.path.basename(pdf_path))[0]
timestamp = datetime.now().isoformat()
with open('failed.log', 'a') as f:
f.write(f"[{timestamp}] {base_name}.md\n")
def looks_like_nec_page(llm_output, pdf_path=None):
"""Helper function to check if the LLM output looks like a NEC page"""
try:
# Initialize the client
client = genai.Client(api_key=GEMINI_API_KEY)
validation_prompt = """You are a validation expert who ONLY responds with a JSON object in the format {"is_valid": true/false}. No other text or explanation.
Analyze this text and determine if it appears to be a National Electrical Code (NEC) page in Markdown format.
A valid NEC page should have AT LEAST ONE of these:
1. Technical electrical specifications or requirements
2. References to NEC codes or standards
3. Electrical measurements or dimensions
Respond ONLY with {"is_valid": true} if valid, or {"is_valid": false} if invalid.
Do not include any other text or explanation.
TEXT TO ANALYZE:
"""
# Create the contents for validation - fixed syntax
contents = types.Content(
role="user",
parts=[{"text": validation_prompt + "\n\n" + llm_output}]
)
# Configure generation parameters
generate_content_config = types.GenerateContentConfig(
temperature=0.1, # Lower temperature for more consistent JSON output
top_p=0.1,
top_k=1,
response_mime_type="text/plain",
)
# Generate validation response
response = client.models.generate_content(
model=model,
contents=contents,
config=generate_content_config,
)
if response and response.text:
validation_response = response.text.strip()
# print("\n=== VALIDATION DEBUG INFO ===")
# print(f"Raw validation response:\n{validation_response}")
try:
# Try to find JSON object in the response
start_idx = validation_response.find('{')
end_idx = validation_response.rfind('}') + 1
if start_idx >= 0 and end_idx > start_idx:
json_str = validation_response[start_idx:end_idx]
# print(f"\nExtracted JSON string:\n{json_str}")
validation_json = json.loads(json_str)
# print(f"\nParsed JSON object:\n{json.dumps(validation_json, indent=2)}")
# Verify the expected structure
if 'is_valid' in validation_json:
is_valid = bool(validation_json['is_valid'])
print(f"\n~~~~~~~~~~~~~~~~~~ Validation result: {'VALID' if is_valid else 'INVALID'}")
if not is_valid:
log_failed_validation(pdf_path)
print("BAD OUTPUT!!!!!!!!")
return is_valid
else:
print("\nMissing 'is_valid' key in JSON response")
print("\nInvalid JSON structure: BAD OUTPUT!!!!!!!!")
log_failed_validation(pdf_path)
return False
except (json.JSONDecodeError, ValueError) as e:
print(f"\nJSON parsing error: {str(e)}")
print(f"Invalid JSON response: BAD OUTPUT!!!!!!!! Error: {str(e)}")
log_failed_validation(pdf_path)
return False
else:
print("\nNo response text from Gemini")
print("No response from Gemini: BAD OUTPUT!!!!!!!!")
log_failed_validation(pdf_path)
return False
except Exception as e:
print(f"\nValidation function error: {str(e)}")
print("Exception: BAD OUTPUT!!!!!!!!")
log_failed_validation(pdf_path)
return False
@truevis
Copy link
Author

truevis commented Mar 6, 2025

This script babysits Markdown files, begging Google’s Gemini AI to confirm if they resemble NEC pages. If Gemini disapproves, it sulks and logs the failure with a timestamp. AI-powered nitpicking at its finest.

Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment