truevis · March 6, 2025 09:39 · truevis · Mar 6, 2025
diff --git a/looks_like_nec_page b/looks_like_nec_page
 model = "gemini-2.0-pro-exp-02-05"  
 # Set API key
 GEMINI_API_KEY = "abc"

 def log_failed_validation(pdf_path):
    """Log failed validation to failed.log with timestamp"""
    if pdf_path:
        base_name = os.path.splitext(os.path.basename(pdf_path))[0]
        timestamp = datetime.now().isoformat()
        with open('failed.log', 'a') as f:
            f.write(f"[{timestamp}] {base_name}.md\n")

 def looks_like_nec_page(llm_output, pdf_path=None):
    """Helper function to check if the LLM output looks like a NEC page"""
    try:
        # Initialize the client
        client = genai.Client(api_key=GEMINI_API_KEY)

        validation_prompt = """You are a validation expert who ONLY responds with a JSON object in the format {"is_valid": true/false}. No other text or explanation.

 Analyze this text and determine if it appears to be a National Electrical Code (NEC) page in Markdown format.

 A valid NEC page should have AT LEAST ONE of these:
 1. Technical electrical specifications or requirements
 2. References to NEC codes or standards
 3. Electrical measurements or dimensions

 Respond ONLY with {"is_valid": true} if valid, or {"is_valid": false} if invalid.
 Do not include any other text or explanation.

 TEXT TO ANALYZE:
 """

        # Create the contents for validation - fixed syntax
        contents = types.Content(
            role="user",
            parts=[{"text": validation_prompt + "\n\n" + llm_output}]
        )

        # Configure generation parameters
        generate_content_config = types.GenerateContentConfig(
            temperature=0.1,  # Lower temperature for more consistent JSON output
            top_p=0.1,
            top_k=1,
            response_mime_type="text/plain",
        )

        # Generate validation response
        response = client.models.generate_content(
            model=model,
            contents=contents,
            config=generate_content_config,
        )

        if response and response.text:
            validation_response = response.text.strip()
            # print("\n=== VALIDATION DEBUG INFO ===")
            # print(f"Raw validation response:\n{validation_response}")
            
            try:
                # Try to find JSON object in the response
                start_idx = validation_response.find('{')
                end_idx = validation_response.rfind('}') + 1
                if start_idx >= 0 and end_idx > start_idx:
                    json_str = validation_response[start_idx:end_idx]
                    # print(f"\nExtracted JSON string:\n{json_str}")
                    
                    validation_json = json.loads(json_str)
                    # print(f"\nParsed JSON object:\n{json.dumps(validation_json, indent=2)}")
                    
                    # Verify the expected structure
                    if 'is_valid' in validation_json:
                        is_valid = bool(validation_json['is_valid'])
                        print(f"\n~~~~~~~~~~~~~~~~~~ Validation result: {'VALID' if is_valid else 'INVALID'}")
                        if not is_valid:
                            log_failed_validation(pdf_path)
                            print("BAD OUTPUT!!!!!!!!")
                        return is_valid
                    else:
                        print("\nMissing 'is_valid' key in JSON response")
                
                print("\nInvalid JSON structure: BAD OUTPUT!!!!!!!!")
                log_failed_validation(pdf_path)
                return False
                    
            except (json.JSONDecodeError, ValueError) as e:
                print(f"\nJSON parsing error: {str(e)}")
                print(f"Invalid JSON response: BAD OUTPUT!!!!!!!! Error: {str(e)}")
                log_failed_validation(pdf_path)
                return False
        else:
            print("\nNo response text from Gemini")
            print("No response from Gemini: BAD OUTPUT!!!!!!!!")
            log_failed_validation(pdf_path)
            return False
            
    except Exception as e:
        print(f"\nValidation function error: {str(e)}")
        print("Exception: BAD OUTPUT!!!!!!!!")
        log_failed_validation(pdf_path)
        return False
	model = "gemini-2.0-pro-exp-02-05"
	# Set API key
	GEMINI_API_KEY = "abc"

	def log_failed_validation(pdf_path):
	"""Log failed validation to failed.log with timestamp"""
	if pdf_path:
	base_name = os.path.splitext(os.path.basename(pdf_path))[0]
	timestamp = datetime.now().isoformat()
	with open('failed.log', 'a') as f:
	f.write(f"[{timestamp}] {base_name}.md\n")

	def looks_like_nec_page(llm_output, pdf_path=None):
	"""Helper function to check if the LLM output looks like a NEC page"""
	try:
	# Initialize the client
	client = genai.Client(api_key=GEMINI_API_KEY)

	validation_prompt = """You are a validation expert who ONLY responds with a JSON object in the format {"is_valid": true/false}. No other text or explanation.

	Analyze this text and determine if it appears to be a National Electrical Code (NEC) page in Markdown format.

	A valid NEC page should have AT LEAST ONE of these:
	1. Technical electrical specifications or requirements
	2. References to NEC codes or standards
	3. Electrical measurements or dimensions

	Respond ONLY with {"is_valid": true} if valid, or {"is_valid": false} if invalid.
	Do not include any other text or explanation.

	TEXT TO ANALYZE:
	"""

	# Create the contents for validation - fixed syntax
	contents = types.Content(
	role="user",
	parts=[{"text": validation_prompt + "\n\n" + llm_output}]
	)

	# Configure generation parameters
	generate_content_config = types.GenerateContentConfig(
	temperature=0.1, # Lower temperature for more consistent JSON output
	top_p=0.1,
	top_k=1,
	response_mime_type="text/plain",
	)

	# Generate validation response
	response = client.models.generate_content(
	model=model,
	contents=contents,
	config=generate_content_config,
	)

	if response and response.text:
	validation_response = response.text.strip()
	# print("\n=== VALIDATION DEBUG INFO ===")
	# print(f"Raw validation response:\n{validation_response}")

	try:
	# Try to find JSON object in the response
	start_idx = validation_response.find('{')
	end_idx = validation_response.rfind('}') + 1
	if start_idx >= 0 and end_idx > start_idx:
	json_str = validation_response[start_idx:end_idx]
	# print(f"\nExtracted JSON string:\n{json_str}")

	validation_json = json.loads(json_str)
	# print(f"\nParsed JSON object:\n{json.dumps(validation_json, indent=2)}")

	# Verify the expected structure
	if 'is_valid' in validation_json:
	is_valid = bool(validation_json['is_valid'])
	print(f"\n~~~~~~~~~~~~~~~~~~ Validation result: {'VALID' if is_valid else 'INVALID'}")
	if not is_valid:
	log_failed_validation(pdf_path)
	print("BAD OUTPUT!!!!!!!!")
	return is_valid
	else:
	print("\nMissing 'is_valid' key in JSON response")

	print("\nInvalid JSON structure: BAD OUTPUT!!!!!!!!")
	log_failed_validation(pdf_path)
	return False

	except (json.JSONDecodeError, ValueError) as e:
	print(f"\nJSON parsing error: {str(e)}")
	print(f"Invalid JSON response: BAD OUTPUT!!!!!!!! Error: {str(e)}")
	log_failed_validation(pdf_path)
	return False
	else:
	print("\nNo response text from Gemini")
	print("No response from Gemini: BAD OUTPUT!!!!!!!!")
	log_failed_validation(pdf_path)
	return False

	except Exception as e:
	print(f"\nValidation function error: {str(e)}")
	print("Exception: BAD OUTPUT!!!!!!!!")
	log_failed_validation(pdf_path)
	return False
No results found