Created
March 6, 2025 09:39
-
-
Save truevis/971ce24625cb1cf5c7933907dc6b1031 to your computer and use it in GitHub Desktop.
Validate text using Gemini and output true or false in JSON
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| model = "gemini-2.0-pro-exp-02-05" | |
| # Set API key | |
| GEMINI_API_KEY = "abc" | |
| def log_failed_validation(pdf_path): | |
| """Log failed validation to failed.log with timestamp""" | |
| if pdf_path: | |
| base_name = os.path.splitext(os.path.basename(pdf_path))[0] | |
| timestamp = datetime.now().isoformat() | |
| with open('failed.log', 'a') as f: | |
| f.write(f"[{timestamp}] {base_name}.md\n") | |
| def looks_like_nec_page(llm_output, pdf_path=None): | |
| """Helper function to check if the LLM output looks like a NEC page""" | |
| try: | |
| # Initialize the client | |
| client = genai.Client(api_key=GEMINI_API_KEY) | |
| validation_prompt = """You are a validation expert who ONLY responds with a JSON object in the format {"is_valid": true/false}. No other text or explanation. | |
| Analyze this text and determine if it appears to be a National Electrical Code (NEC) page in Markdown format. | |
| A valid NEC page should have AT LEAST ONE of these: | |
| 1. Technical electrical specifications or requirements | |
| 2. References to NEC codes or standards | |
| 3. Electrical measurements or dimensions | |
| Respond ONLY with {"is_valid": true} if valid, or {"is_valid": false} if invalid. | |
| Do not include any other text or explanation. | |
| TEXT TO ANALYZE: | |
| """ | |
| # Create the contents for validation - fixed syntax | |
| contents = types.Content( | |
| role="user", | |
| parts=[{"text": validation_prompt + "\n\n" + llm_output}] | |
| ) | |
| # Configure generation parameters | |
| generate_content_config = types.GenerateContentConfig( | |
| temperature=0.1, # Lower temperature for more consistent JSON output | |
| top_p=0.1, | |
| top_k=1, | |
| response_mime_type="text/plain", | |
| ) | |
| # Generate validation response | |
| response = client.models.generate_content( | |
| model=model, | |
| contents=contents, | |
| config=generate_content_config, | |
| ) | |
| if response and response.text: | |
| validation_response = response.text.strip() | |
| # print("\n=== VALIDATION DEBUG INFO ===") | |
| # print(f"Raw validation response:\n{validation_response}") | |
| try: | |
| # Try to find JSON object in the response | |
| start_idx = validation_response.find('{') | |
| end_idx = validation_response.rfind('}') + 1 | |
| if start_idx >= 0 and end_idx > start_idx: | |
| json_str = validation_response[start_idx:end_idx] | |
| # print(f"\nExtracted JSON string:\n{json_str}") | |
| validation_json = json.loads(json_str) | |
| # print(f"\nParsed JSON object:\n{json.dumps(validation_json, indent=2)}") | |
| # Verify the expected structure | |
| if 'is_valid' in validation_json: | |
| is_valid = bool(validation_json['is_valid']) | |
| print(f"\n~~~~~~~~~~~~~~~~~~ Validation result: {'VALID' if is_valid else 'INVALID'}") | |
| if not is_valid: | |
| log_failed_validation(pdf_path) | |
| print("BAD OUTPUT!!!!!!!!") | |
| return is_valid | |
| else: | |
| print("\nMissing 'is_valid' key in JSON response") | |
| print("\nInvalid JSON structure: BAD OUTPUT!!!!!!!!") | |
| log_failed_validation(pdf_path) | |
| return False | |
| except (json.JSONDecodeError, ValueError) as e: | |
| print(f"\nJSON parsing error: {str(e)}") | |
| print(f"Invalid JSON response: BAD OUTPUT!!!!!!!! Error: {str(e)}") | |
| log_failed_validation(pdf_path) | |
| return False | |
| else: | |
| print("\nNo response text from Gemini") | |
| print("No response from Gemini: BAD OUTPUT!!!!!!!!") | |
| log_failed_validation(pdf_path) | |
| return False | |
| except Exception as e: | |
| print(f"\nValidation function error: {str(e)}") | |
| print("Exception: BAD OUTPUT!!!!!!!!") | |
| log_failed_validation(pdf_path) | |
| return False |
Author
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment
This script babysits Markdown files, begging Google’s Gemini AI to confirm if they resemble NEC pages. If Gemini disapproves, it sulks and logs the failure with a timestamp. AI-powered nitpicking at its finest.