|
#!/usr/bin/env python3 |
|
""" |
|
Website Lead Generation Script using Agno Framework |
|
================================================== |
|
|
|
This script creates an AI agent that generates structured website leads |
|
using Pydantic models and saves them to JSON files. |
|
|
|
Features: |
|
- Pydantic data models for structured lead generation |
|
- Agno agent with OpenRouter Gemini model integration |
|
- Customizable user input system |
|
- JSON output to website_leads.json |
|
- Comprehensive lead information capture |
|
""" |
|
|
|
import json |
|
import os |
|
from datetime import datetime |
|
from typing import List, Optional, Dict, Any |
|
from pathlib import Path |
|
|
|
from pydantic import BaseModel, Field, EmailStr, field_validator |
|
from agno.agent import Agent |
|
from agno.models.openrouter import OpenRouter |
|
from dotenv import load_dotenv |
|
|
|
# Load environment variables |
|
load_dotenv() |
|
|
|
class ContactInfo(BaseModel): |
|
"""Contact information for a lead""" |
|
email: EmailStr = Field(..., description="Primary email address") |
|
phone: Optional[str] = Field(None, description="Phone number") |
|
linkedin: Optional[str] = Field(None, description="LinkedIn profile URL") |
|
website: Optional[str] = Field(None, description="Personal or company website") |
|
|
|
class CompanyInfo(BaseModel): |
|
"""Company information for a lead""" |
|
name: str = Field(..., description="Company name") |
|
industry: str = Field(..., description="Industry sector") |
|
size: str = Field(..., description="Company size (e.g., '1-10', '11-50', '51-200', '201-500', '500+')") |
|
location: str = Field(..., description="Company location (city, state/country)") |
|
website: Optional[str] = Field(None, description="Company website URL") |
|
description: Optional[str] = Field(None, description="Brief company description") |
|
|
|
class LeadQualification(BaseModel): |
|
"""Lead qualification and scoring information""" |
|
interest_level: str = Field(..., description="Interest level: 'high', 'medium', 'low'") |
|
budget_range: Optional[str] = Field(None, description="Estimated budget range") |
|
timeline: Optional[str] = Field(None, description="Expected timeline for decision") |
|
decision_maker: bool = Field(False, description="Whether the lead is a decision maker") |
|
pain_points: List[str] = Field(default_factory=list, description="Identified pain points") |
|
lead_score: int = Field(default=0, description="Lead score from 0-100") |
|
|
|
@field_validator('lead_score') |
|
@classmethod |
|
def validate_lead_score(cls, v): |
|
"""Validate lead score is between 0 and 100""" |
|
if not 0 <= v <= 100: |
|
raise ValueError('Lead score must be between 0 and 100') |
|
return v |
|
|
|
@field_validator('interest_level') |
|
@classmethod |
|
def validate_interest_level(cls, v): |
|
"""Validate interest level is one of the allowed values""" |
|
allowed_levels = ['high', 'medium', 'low'] |
|
if v.lower() not in allowed_levels: |
|
raise ValueError(f'Interest level must be one of: {allowed_levels}') |
|
return v.lower() |
|
|
|
class WebsiteLead(BaseModel): |
|
"""Complete website lead data model""" |
|
# Basic Information |
|
first_name: str = Field(..., description="First name") |
|
last_name: str = Field(..., description="Last name") |
|
job_title: str = Field(..., description="Job title or position") |
|
|
|
# Contact Information |
|
contact_info: ContactInfo = Field(..., description="Contact details") |
|
|
|
# Company Information |
|
company_info: CompanyInfo = Field(..., description="Company details") |
|
|
|
# Lead Qualification |
|
qualification: LeadQualification = Field(..., description="Lead qualification data") |
|
|
|
# Source and Tracking |
|
lead_source: str = Field(..., description="Source of the lead (e.g., 'website', 'social media', 'referral')") |
|
utm_source: Optional[str] = Field(None, description="UTM source parameter") |
|
utm_medium: Optional[str] = Field(None, description="UTM medium parameter") |
|
utm_campaign: Optional[str] = Field(None, description="UTM campaign parameter") |
|
|
|
# Engagement Data |
|
pages_visited: List[str] = Field(default_factory=list, description="Website pages visited") |
|
content_downloaded: List[str] = Field(default_factory=list, description="Content downloaded") |
|
form_submissions: int = Field(default=0, description="Number of form submissions") |
|
|
|
# Additional Information |
|
notes: Optional[str] = Field(None, description="Additional notes about the lead") |
|
tags: List[str] = Field(default_factory=list, description="Tags for categorization") |
|
|
|
# Metadata |
|
created_at: datetime = Field(default_factory=datetime.now, description="Lead creation timestamp") |
|
updated_at: datetime = Field(default_factory=datetime.now, description="Last update timestamp") |
|
|
|
class WebsiteLeadGenerator: |
|
"""Website lead generation agent using Agno framework""" |
|
|
|
def __init__(self, api_key: Optional[str] = None): |
|
"""Initialize the lead generator with OpenRouter API key""" |
|
self.api_key = api_key or os.getenv("OPENROUTER_API_KEY") |
|
if not self.api_key: |
|
raise ValueError("OpenRouter API key is required. Set OPENROUTER_API_KEY environment variable.") |
|
|
|
# Initialize the Agno agent with Gemini model |
|
self.agent = Agent( |
|
model=OpenRouter( |
|
id="google/gemini-2.0-flash-001", |
|
api_key=self.api_key |
|
), |
|
description="""You are an expert lead generation specialist and data analyst. |
|
Your role is to analyze user input and generate comprehensive, structured website lead information. |
|
You excel at extracting meaningful insights from partial information and creating complete lead profiles.""", |
|
instructions=""" |
|
As a lead generation specialist, follow these guidelines: |
|
|
|
1. ACCURACY: Always prioritize accuracy over completeness. If information is not provided, use null/empty values rather than making assumptions. |
|
|
|
2. LEAD QUALIFICATION: Assess leads based on: |
|
- Job title and seniority level |
|
- Company size and industry |
|
- Expressed interest and engagement level |
|
- Budget indicators and timeline |
|
|
|
3. LEAD SCORING: Calculate lead scores (0-100) based on: |
|
- Decision-making authority (0-25 points) |
|
- Company fit and size (0-25 points) |
|
- Interest level and engagement (0-25 points) |
|
- Budget and timeline alignment (0-25 points) |
|
|
|
4. PAIN POINTS: Identify potential pain points based on: |
|
- Industry common challenges |
|
- Company size typical issues |
|
- Job role responsibilities |
|
|
|
5. CATEGORIZATION: Suggest relevant tags for lead categorization and follow-up strategies. |
|
|
|
6. STRUCTURED OUTPUT: Always return data in the exact JSON format specified by the WebsiteLead model. |
|
|
|
7. COMPLETENESS: Fill in as much information as possible while maintaining accuracy. |
|
|
|
Remember: Quality leads are better than quantity. Focus on providing actionable insights. |
|
""", |
|
show_tool_calls=True, |
|
markdown=True |
|
) |
|
|
|
# Output file path |
|
self.output_file = Path("website_leads.json") |
|
|
|
def generate_lead(self, user_input: str, **kwargs) -> WebsiteLead: |
|
"""Generate a website lead from user input""" |
|
|
|
# Prepare the prompt with user input and additional context |
|
prompt = f""" |
|
Based on the following information, generate a comprehensive website lead profile: |
|
|
|
USER INPUT: {user_input} |
|
|
|
ADDITIONAL CONTEXT: |
|
{json.dumps(kwargs, indent=2) if kwargs else "No additional context provided"} |
|
|
|
TASK: Create a complete WebsiteLead JSON object with the following structure: |
|
|
|
{{ |
|
"first_name": "string", |
|
"last_name": "string", |
|
"job_title": "string", |
|
"contact_info": {{ |
|
"email": "email@example.com", |
|
"phone": "string or null", |
|
"linkedin": "string or null", |
|
"website": "string or null" |
|
}}, |
|
"company_info": {{ |
|
"name": "string", |
|
"industry": "string", |
|
"size": "string", |
|
"location": "string", |
|
"website": "string or null", |
|
"description": "string or null" |
|
}}, |
|
"qualification": {{ |
|
"interest_level": "high/medium/low", |
|
"budget_range": "string or null", |
|
"timeline": "string or null", |
|
"decision_maker": true/false, |
|
"pain_points": ["string1", "string2"], |
|
"lead_score": 0-100 |
|
}}, |
|
"lead_source": "string", |
|
"utm_source": "string or null", |
|
"utm_medium": "string or null", |
|
"utm_campaign": "string or null", |
|
"pages_visited": ["string1", "string2"], |
|
"content_downloaded": ["string1", "string2"], |
|
"form_submissions": 0, |
|
"notes": "string or null", |
|
"tags": ["string1", "string2"], |
|
"created_at": "2024-01-01T00:00:00", |
|
"updated_at": "2024-01-01T00:00:00" |
|
}} |
|
|
|
REQUIREMENTS: |
|
1. Return ONLY valid JSON that matches the WebsiteLead schema |
|
2. Use realistic, professional data |
|
3. Calculate an appropriate lead score (0-100) |
|
4. Include relevant pain points for the industry/role |
|
5. Add appropriate tags for categorization |
|
6. Set timestamps to current date/time |
|
7. If information is missing, use null values (not empty strings) |
|
|
|
Generate the JSON now: |
|
""" |
|
|
|
# Get response from the agent |
|
response = self.agent.run(prompt) |
|
|
|
# Extract JSON from response |
|
json_str = self._extract_json_from_response(response.content) |
|
|
|
# Parse and validate the JSON |
|
try: |
|
lead_data = json.loads(json_str) |
|
lead = WebsiteLead(**lead_data) |
|
return lead |
|
except (json.JSONDecodeError, ValueError) as e: |
|
raise ValueError(f"Failed to parse lead data: {e}\nResponse: {json_str}") |
|
|
|
def _extract_json_from_response(self, response: str) -> str: |
|
"""Extract JSON from the agent's response""" |
|
# Look for JSON in the response |
|
import re |
|
|
|
# Try to find JSON block |
|
json_match = re.search(r'```json\n(.*?)\n```', response, re.DOTALL) |
|
if json_match: |
|
return json_match.group(1).strip() |
|
|
|
# Try to find JSON without code blocks |
|
json_match = re.search(r'\{.*\}', response, re.DOTALL) |
|
if json_match: |
|
return json_match.group(0).strip() |
|
|
|
# If no JSON found, try to use the entire response |
|
return response.strip() |
|
|
|
def save_lead_to_file(self, lead: WebsiteLead, append: bool = True) -> None: |
|
"""Save lead to JSON file""" |
|
|
|
# Load existing leads if file exists and append is True |
|
leads = [] |
|
if append and self.output_file.exists(): |
|
try: |
|
with open(self.output_file, 'r', encoding='utf-8') as f: |
|
existing_data = json.load(f) |
|
if isinstance(existing_data, list): |
|
leads = existing_data |
|
else: |
|
leads = [existing_data] |
|
except (json.JSONDecodeError, FileNotFoundError): |
|
leads = [] |
|
|
|
# Add new lead |
|
leads.append(lead.model_dump()) |
|
|
|
# Save to file |
|
with open(self.output_file, 'w', encoding='utf-8') as f: |
|
json.dump(leads, f, indent=2, default=str, ensure_ascii=False) |
|
|
|
print(f"β
Lead saved to {self.output_file}") |
|
|
|
def generate_and_save_lead(self, user_input: str, **kwargs) -> WebsiteLead: |
|
"""Generate a lead and save it to file""" |
|
lead = self.generate_lead(user_input, **kwargs) |
|
self.save_lead_to_file(lead) |
|
return lead |
|
|
|
def get_lead_summary(self, lead: WebsiteLead) -> str: |
|
"""Get a formatted summary of the lead""" |
|
return f""" |
|
π― LEAD SUMMARY |
|
================ |
|
Name: {lead.first_name} {lead.last_name} |
|
Title: {lead.job_title} |
|
Company: {lead.company_info.name} ({lead.company_info.industry}) |
|
Email: {lead.contact_info.email} |
|
Lead Score: {lead.qualification.lead_score}/100 |
|
Interest Level: {lead.qualification.interest_level.upper()} |
|
Decision Maker: {'Yes' if lead.qualification.decision_maker else 'No'} |
|
|
|
π’ COMPANY INFO |
|
=============== |
|
Industry: {lead.company_info.industry} |
|
Size: {lead.company_info.size} |
|
Location: {lead.company_info.location} |
|
|
|
π― QUALIFICATION |
|
================ |
|
Budget Range: {lead.qualification.budget_range or 'Not specified'} |
|
Timeline: {lead.qualification.timeline or 'Not specified'} |
|
Pain Points: {', '.join(lead.qualification.pain_points) if lead.qualification.pain_points else 'None identified'} |
|
|
|
π·οΈ TAGS |
|
======== |
|
{', '.join(lead.tags) if lead.tags else 'No tags'} |
|
|
|
π NOTES |
|
======== |
|
{lead.notes or 'No additional notes'} |
|
""" |
|
|
|
def main(): |
|
"""Main function to demonstrate the lead generator""" |
|
print("π Website Lead Generation System") |
|
print("=" * 50) |
|
|
|
# Initialize the lead generator |
|
try: |
|
generator = WebsiteLeadGenerator() |
|
print("β
Lead generator initialized successfully!") |
|
except ValueError as e: |
|
print(f"β Error: {e}") |
|
print("Please set your OPENROUTER_API_KEY environment variable.") |
|
return |
|
|
|
# Example usage scenarios |
|
examples = [ |
|
{ |
|
"description": "SaaS Company CTO", |
|
"input": """ |
|
John Smith, CTO at TechCorp, a 150-person SaaS company in San Francisco. |
|
He's looking for AI solutions to improve their customer support automation. |
|
Email: john.smith@techcorp.com |
|
Budget: $50k-100k annually |
|
Timeline: Q2 2024 implementation |
|
""", |
|
"context": { |
|
"lead_source": "website", |
|
"utm_source": "google", |
|
"utm_medium": "cpc", |
|
"utm_campaign": "ai-solutions", |
|
"pages_visited": ["/ai-solutions", "/pricing", "/case-studies"], |
|
"content_downloaded": ["AI Implementation Guide"] |
|
} |
|
}, |
|
{ |
|
"description": "Marketing Director", |
|
"input": """ |
|
Sarah Johnson, Marketing Director at GrowthCo, a 50-person marketing agency. |
|
Interested in lead generation tools and marketing automation. |
|
Contact: sarah.j@growthco.com |
|
Company website: growthco.com |
|
Looking to improve client acquisition by 30% |
|
""", |
|
"context": { |
|
"lead_source": "referral", |
|
"pages_visited": ["/marketing-automation", "/lead-generation"], |
|
"form_submissions": 2 |
|
} |
|
} |
|
] |
|
|
|
print("\nπ― Generating Example Leads") |
|
print("=" * 30) |
|
|
|
for i, example in enumerate(examples, 1): |
|
print(f"\nπ Example {i}: {example['description']}") |
|
print("-" * 40) |
|
|
|
try: |
|
# Generate lead |
|
lead = generator.generate_and_save_lead( |
|
example["input"], |
|
**example["context"] |
|
) |
|
|
|
# Display summary |
|
print(generator.get_lead_summary(lead)) |
|
|
|
except Exception as e: |
|
print(f"β Error generating lead: {e}") |
|
|
|
# Interactive mode |
|
print("\nπ Interactive Mode") |
|
print("=" * 20) |
|
print("Enter lead information (or 'quit' to exit):") |
|
|
|
while True: |
|
try: |
|
user_input = input("\nπ Enter lead details: ").strip() |
|
|
|
if user_input.lower() in ['quit', 'exit', 'q']: |
|
break |
|
|
|
if not user_input: |
|
print("Please enter some lead information.") |
|
continue |
|
|
|
# Generate and save lead |
|
lead = generator.generate_and_save_lead(user_input) |
|
print(generator.get_lead_summary(lead)) |
|
|
|
except KeyboardInterrupt: |
|
print("\n\nπ Goodbye!") |
|
break |
|
except Exception as e: |
|
print(f"β Error: {e}") |
|
|
|
print(f"\nβ
All leads saved to: {generator.output_file}") |
|
print("π Lead generation complete!") |
|
|
|
if __name__ == "__main__": |
|
main() |