litwicki · February 4, 2026 05:19
diff --git a/gistfile1.txt b/gistfile1.txt
 # Rate Limiting and 429 Handling

 ## 1. Application-Level Rate Limiting

 ### RateLimiter Class (`rate-limiter.ts`)

 ```typescript
 export class RateLimiter {
  private stateFile: string;
  private state: RateLimitState;
  private maxRequestsPerMinute: number;
  private maxRequestsPer24Hours: number;

  constructor(
    outputDir: string,
    maxRequestsPerMinute = 30,
    maxRequestsPer24Hours = 10000
  ) {
    this.stateFile = path.join(outputDir, 'rate-limit-state.json');
    this.maxRequestsPerMinute = maxRequestsPerMinute;
    this.maxRequestsPer24Hours = maxRequestsPer24Hours;
    this.state = {
      requestsInCurrentMinute: 0,
      requestsIn24Hours: 0,
      currentMinuteStart: Date.now(),
      last24HoursStart: Date.now(),
      requestTimestamps: [],
    };
  }

  async waitIfNeeded(): Promise<void> {
    const now = Date.now();
    
    // Clean up old timestamps
    this.cleanupOldTimestamps();
    
    // Check 24-hour limit
    if (this.state.requestsIn24Hours >= this.maxRequestsPer24Hours) {
      const oldestTimestamp = this.state.requestTimestamps[0];
      const timeUntilReset = oldestTimestamp + 24 * 60 * 60 * 1000 - now;
      
      if (timeUntilReset > 0) {
        const hoursToWait = Math.ceil(timeUntilReset / (60 * 60 * 1000));
        console.log(
          `⚠️  24-hour limit reached (${this.maxRequestsPer24Hours} requests). Waiting ${hoursToWait} hour(s)...`
        );
        await this.sleep(timeUntilReset);
        this.cleanupOldTimestamps();
      }
    }
    
    // Check per-minute limit
    const timeSinceMinuteStart = now - this.state.currentMinuteStart;
    
    if (timeSinceMinuteStart >= 60000) {
      // Reset minute counter
      this.state.currentMinuteStart = now;
      this.state.requestsInCurrentMinute = 0;
    } else if (this.state.requestsInCurrentMinute >= this.maxRequestsPerMinute) {
      // Wait until the minute is over
      const timeToWait = 60000 - timeSinceMinuteStart;
      console.log(
        `⚠️  Rate limit: ${this.maxRequestsPerMinute} requests/minute reached. Waiting ${Math.ceil(timeToWait / 1000)}s...`
      );
      await this.sleep(timeToWait);
      this.state.currentMinuteStart = Date.now();
      this.state.requestsInCurrentMinute = 0;
    }
  }

  async recordRequest(): Promise<void> {
    const now = Date.now();
    
    this.state.requestsInCurrentMinute++;
    this.state.requestsIn24Hours++;
    this.state.requestTimestamps.push(now);
    
    // Save state every 10 requests
    if (this.state.requestsIn24Hours % 10 === 0) {
      await this.save();
    }
  }

  private cleanupOldTimestamps(): void {
    const now = Date.now();
    const oneDayAgo = now - 24 * 60 * 60 * 1000;
    
    // Remove timestamps older than 24 hours
    this.state.requestTimestamps = this.state.requestTimestamps.filter(
      (timestamp) => timestamp > oneDayAgo
    );
    
    this.state.requestsIn24Hours = this.state.requestTimestamps.length;
  }

  private sleep(ms: number): Promise<void> {
    return new Promise((resolve) => setTimeout(resolve, ms));
  }
 }
 ```

 ## 2. API-Level 429 Handling

 ### API Client with Exponential Backoff (`api-client.ts`)

 ```typescript
 export class MetronCharacterApiClient {
  private authHeader: string;
  private lastRequestTime = 0;

  private async retryWithBackoff<T>(
    operation: () => Promise<T>,
    context: string
  ): Promise<T> {
    const MAX_RETRIES = 5;
    const INITIAL_BACKOFF = 2000; // 2 seconds
    let lastError: Error | null = null;

    for (let attempt = 0; attempt < MAX_RETRIES; attempt++) {
      try {
        return await operation();
      } catch (error) {
        lastError = error as Error;
        
        // Check if it's a 429 error
        if (error instanceof Error && error.message.includes('429')) {
          const backoffTime = INITIAL_BACKOFF * Math.pow(2, attempt);
          console.log(
            `⚠️  Rate limited (429) on ${context}. Waiting ${backoffTime / 1000}s before retry ${attempt + 1}/${MAX_RETRIES}...`
          );
          await new Promise((resolve) => setTimeout(resolve, backoffTime));
          continue;
        }
        
        // For other errors, throw immediately
        throw error;
      }
    }

    throw new Error(
      `Failed after ${MAX_RETRIES} retries for ${context}: ${lastError?.message}`
    );
  }

  async fetchCharacterDetail(id: number): Promise<MetronCharacterDetail> {
    return this.retryWithBackoff(async () => {
      const url = `${METRON_BASE_URL}/character/${id}/`;
      const response = await fetch(url, {
        headers: {
          Authorization: this.authHeader,
          Accept: 'application/json',
        },
      });

      if (!response.ok) {
        throw new Error(
          `Failed to fetch character detail (id ${id}): ${response.status} ${response.statusText}`
        );
      }

      return response.json();
    }, `character detail ${id}`);
  }
 }
 ```

 ## 3. Integration in Scraper

 ### How Both Layers Work Together (`scraper.ts`)

 ```typescript
 export class MetronCharacterScraper {
  private client: MetronCharacterApiClient;
  private rateLimiter: RateLimiter;

  constructor(username: string, password: string, outputDir: string) {
    this.client = new MetronCharacterApiClient(username, password);
    this.rateLimiter = new RateLimiter(outputDir, 30, 10000);
  }

  async fetchCharacterDetails(): Promise<void> {
    const allIds = this.tracker.getAllCharacterIds();

    for (let i = 0; i < allIds.length; i++) {
      const id = allIds[i];

      try {
        // STEP 1: Application-level rate limiting
        // Waits if we've hit 30 req/min or 10,000 req/24h
        await this.rateLimiter.waitIfNeeded();
        
        // STEP 2: Make API request
        // If API returns 429, retries with exponential backoff (2s, 4s, 8s, 16s, 32s)
        const detail = await this.client.fetchCharacterDetail(id);
        
        // STEP 3: Record the successful request
        await this.rateLimiter.recordRequest();
        
        // Process the data
        characters.push({
          id: detail.id,
          name: detail.name,
          gcd_id: detail.gcd_id ?? null,
          image: detail.image ?? null,
        });

        // Show progress with rate limit stats
        const progress = this.tracker.getProgress();
        const stats = this.rateLimiter.getStats();
        console.log(
          `[${progress.processed}/${progress.total}] (${progress.percentage.toFixed(1)}%) ` +
          `Processed character ${id}: ${detail.name} | ` +
          `Rate: ${stats.requestsThisMinute}/${stats.minuteLimit}/min, ` +
          `${stats.requestsToday}/${stats.dailyLimit}/day`
        );

      } catch (error) {
        console.error(`Error fetching character ${id}:`, error);
        // Continue with next character
      }
    }
  }
 }
 ```

 ## 4. Example Console Output

 ### Normal Operation

 ```
 [1/16938] (0.0%) Processed character 1: Spider-Man | Rate: 1/30/min, 1/10000/day
 [2/16938] (0.0%) Processed character 2: Batman | Rate: 2/30/min, 2/10000/day
 [3/16938] (0.0%) Processed character 3: Superman | Rate: 3/30/min, 3/10000/day
 ...
 [28/16938] (0.2%) Processed character 28: Iron Man | Rate: 28/30/min, 28/10000/day
 [29/16938] (0.2%) Processed character 29: Thor | Rate: 29/30/min, 29/10000/day
 [30/16938] (0.2%) Processed character 30: Hulk | Rate: 30/30/min, 30/10000/day
 ```

 ### Per-Minute Limit Reached

 ```
 [30/16938] (0.2%) Processed character 30: Hulk | Rate: 30/30/min, 30/10000/day
 ⚠️  Rate limit: 30 requests/minute reached. Waiting 15s...
 [31/16938] (0.2%) Processed character 31: Captain America | Rate: 1/30/min, 31/10000/day
 ```

 ### API Returns 429 Error

 ```
 [150/16938] (0.9%) Processing character 150: Wolverine...
 ⚠️  Rate limited (429) on character detail 150. Waiting 2s before retry 1/5...
 ⚠️  Rate limited (429) on character detail 150. Waiting 4s before retry 2/5...
 [150/16938] (0.9%) Processed character 150: Wolverine | Rate: 15/30/min, 150/10000/day
 ```

 ### Daily Limit Reached

 ```
 [10000/16938] (59.0%) Processed character 10000: Green Lantern | Rate: 25/30/min, 10000/10000/day
 ⚠️  24-hour limit reached (10000 requests). Waiting 8 hour(s)...
 [10001/16938] (59.0%) Processed character 10001: Flash | Rate: 1/30/min, 1/10000/day
 ```

 ## 5. Key Differences

 ### Application-Level Rate Limiting (RateLimiter)
 - **Purpose**: Prevent hitting API limits in the first place
 - **Proactive**: Waits BEFORE making requests
 - **Limits**: 30 req/min, 10,000 req/24h
 - **Persistent**: State saved to disk, survives restarts
 - **Scope**: Controls all requests from the scraper

 ### API-Level 429 Handling (API Client)
 - **Purpose**: Handle cases where API still returns 429
 - **Reactive**: Retries AFTER getting 429 error
 - **Strategy**: Exponential backoff (2s, 4s, 8s, 16s, 32s)
 - **Transient**: No persistent state
 - **Scope**: Per-request error handling

 ## 6. Why Both?

 Having both layers provides:

 1. **Efficiency**: Application-level limiting prevents wasted 429 requests
 2. **Reliability**: API-level handling catches edge cases (clock skew, other clients, etc.)
 3. **Resilience**: If one layer fails, the other provides backup
 4. **Transparency**: Clear logging shows which layer is active

 ## 7. Backoff Calculation

 ```typescript
 // Exponential backoff formula
 const backoffTime = INITIAL_BACKOFF * Math.pow(2, attempt);

 // Results:
 // Attempt 0: 2000 * 2^0 = 2000ms  (2 seconds)
 // Attempt 1: 2000 * 2^1 = 4000ms  (4 seconds)
 // Attempt 2: 2000 * 2^2 = 8000ms  (8 seconds)
 // Attempt 3: 2000 * 2^3 = 16000ms (16 seconds)
 // Attempt 4: 2000 * 2^4 = 32000ms (32 seconds)
 ```

 This exponential backoff gives the API time to recover while being respectful of rate limits.
	# Rate Limiting and 429 Handling

	## 1. Application-Level Rate Limiting

	### RateLimiter Class (`rate-limiter.ts`)

	```typescript
	export class RateLimiter {
	private stateFile: string;
	private state: RateLimitState;
	private maxRequestsPerMinute: number;
	private maxRequestsPer24Hours: number;

	constructor(
	outputDir: string,
	maxRequestsPerMinute = 30,
	maxRequestsPer24Hours = 10000
	) {
	this.stateFile = path.join(outputDir, 'rate-limit-state.json');
	this.maxRequestsPerMinute = maxRequestsPerMinute;
	this.maxRequestsPer24Hours = maxRequestsPer24Hours;
	this.state = {
	requestsInCurrentMinute: 0,
	requestsIn24Hours: 0,
	currentMinuteStart: Date.now(),
	last24HoursStart: Date.now(),
	requestTimestamps: [],
	};
	}

	async waitIfNeeded(): Promise<void> {
	const now = Date.now();

	// Clean up old timestamps
	this.cleanupOldTimestamps();

	// Check 24-hour limit
	if (this.state.requestsIn24Hours >= this.maxRequestsPer24Hours) {
	const oldestTimestamp = this.state.requestTimestamps[0];
	const timeUntilReset = oldestTimestamp + 24 * 60 * 60 * 1000 - now;

	if (timeUntilReset > 0) {
	const hoursToWait = Math.ceil(timeUntilReset / (60 * 60 * 1000));
	console.log(
	`⚠️ 24-hour limit reached (${this.maxRequestsPer24Hours} requests). Waiting ${hoursToWait} hour(s)...`
	);
	await this.sleep(timeUntilReset);
	this.cleanupOldTimestamps();
	}
	}

	// Check per-minute limit
	const timeSinceMinuteStart = now - this.state.currentMinuteStart;

	if (timeSinceMinuteStart >= 60000) {
	// Reset minute counter
	this.state.currentMinuteStart = now;
	this.state.requestsInCurrentMinute = 0;
	} else if (this.state.requestsInCurrentMinute >= this.maxRequestsPerMinute) {
	// Wait until the minute is over
	const timeToWait = 60000 - timeSinceMinuteStart;
	console.log(
	`⚠️ Rate limit: ${this.maxRequestsPerMinute} requests/minute reached. Waiting ${Math.ceil(timeToWait / 1000)}s...`
	);
	await this.sleep(timeToWait);
	this.state.currentMinuteStart = Date.now();
	this.state.requestsInCurrentMinute = 0;
	}
	}

	async recordRequest(): Promise<void> {
	const now = Date.now();

	this.state.requestsInCurrentMinute++;
	this.state.requestsIn24Hours++;
	this.state.requestTimestamps.push(now);

	// Save state every 10 requests
	if (this.state.requestsIn24Hours % 10 === 0) {
	await this.save();
	}
	}

	private cleanupOldTimestamps(): void {
	const now = Date.now();
	const oneDayAgo = now - 24 * 60 * 60 * 1000;

	// Remove timestamps older than 24 hours
	this.state.requestTimestamps = this.state.requestTimestamps.filter(
	(timestamp) => timestamp > oneDayAgo
	);

	this.state.requestsIn24Hours = this.state.requestTimestamps.length;
	}

	private sleep(ms: number): Promise<void> {
	return new Promise((resolve) => setTimeout(resolve, ms));
	}
	}
	```

	## 2. API-Level 429 Handling

	### API Client with Exponential Backoff (`api-client.ts`)

	```typescript
	export class MetronCharacterApiClient {
	private authHeader: string;
	private lastRequestTime = 0;

	private async retryWithBackoff<T>(
	operation: () => Promise<T>,
	context: string
	): Promise<T> {
	const MAX_RETRIES = 5;
	const INITIAL_BACKOFF = 2000; // 2 seconds
	let lastError: Error \| null = null;

	for (let attempt = 0; attempt < MAX_RETRIES; attempt++) {
	try {
	return await operation();
	} catch (error) {
	lastError = error as Error;

	// Check if it's a 429 error
	if (error instanceof Error && error.message.includes('429')) {
	const backoffTime = INITIAL_BACKOFF * Math.pow(2, attempt);
	console.log(
	`⚠️ Rate limited (429) on ${context}. Waiting ${backoffTime / 1000}s before retry ${attempt + 1}/${MAX_RETRIES}...`
	);
	await new Promise((resolve) => setTimeout(resolve, backoffTime));
	continue;
	}

	// For other errors, throw immediately
	throw error;
	}
	}

	throw new Error(
	`Failed after ${MAX_RETRIES} retries for ${context}: ${lastError?.message}`
	);
	}

	async fetchCharacterDetail(id: number): Promise<MetronCharacterDetail> {
	return this.retryWithBackoff(async () => {
	const url = `${METRON_BASE_URL}/character/${id}/`;
	const response = await fetch(url, {
	headers: {
	Authorization: this.authHeader,
	Accept: 'application/json',
	},
	});

	if (!response.ok) {
	throw new Error(
	`Failed to fetch character detail (id ${id}): ${response.status} ${response.statusText}`
	);
	}

	return response.json();
	}, `character detail ${id}`);
	}
	}
	```

	## 3. Integration in Scraper

	### How Both Layers Work Together (`scraper.ts`)

	```typescript
	export class MetronCharacterScraper {
	private client: MetronCharacterApiClient;
	private rateLimiter: RateLimiter;

	constructor(username: string, password: string, outputDir: string) {
	this.client = new MetronCharacterApiClient(username, password);
	this.rateLimiter = new RateLimiter(outputDir, 30, 10000);
	}

	async fetchCharacterDetails(): Promise<void> {
	const allIds = this.tracker.getAllCharacterIds();

	for (let i = 0; i < allIds.length; i++) {
	const id = allIds[i];

	try {
	// STEP 1: Application-level rate limiting
	// Waits if we've hit 30 req/min or 10,000 req/24h
	await this.rateLimiter.waitIfNeeded();

	// STEP 2: Make API request
	// If API returns 429, retries with exponential backoff (2s, 4s, 8s, 16s, 32s)
	const detail = await this.client.fetchCharacterDetail(id);

	// STEP 3: Record the successful request
	await this.rateLimiter.recordRequest();

	// Process the data
	characters.push({
	id: detail.id,
	name: detail.name,
	gcd_id: detail.gcd_id ?? null,
	image: detail.image ?? null,
	});

	// Show progress with rate limit stats
	const progress = this.tracker.getProgress();
	const stats = this.rateLimiter.getStats();
	console.log(
	`[${progress.processed}/${progress.total}] (${progress.percentage.toFixed(1)}%) ` +
	`Processed character ${id}: ${detail.name} \| ` +
	`Rate: ${stats.requestsThisMinute}/${stats.minuteLimit}/min, ` +
	`${stats.requestsToday}/${stats.dailyLimit}/day`
	);

	} catch (error) {
	console.error(`Error fetching character ${id}:`, error);
	// Continue with next character
	}
	}
	}
	}
	```

	## 4. Example Console Output

	### Normal Operation

	```
	[1/16938] (0.0%) Processed character 1: Spider-Man \| Rate: 1/30/min, 1/10000/day
	[2/16938] (0.0%) Processed character 2: Batman \| Rate: 2/30/min, 2/10000/day
	[3/16938] (0.0%) Processed character 3: Superman \| Rate: 3/30/min, 3/10000/day
	...
	[28/16938] (0.2%) Processed character 28: Iron Man \| Rate: 28/30/min, 28/10000/day
	[29/16938] (0.2%) Processed character 29: Thor \| Rate: 29/30/min, 29/10000/day
	[30/16938] (0.2%) Processed character 30: Hulk \| Rate: 30/30/min, 30/10000/day
	```

	### Per-Minute Limit Reached

	```
	[30/16938] (0.2%) Processed character 30: Hulk \| Rate: 30/30/min, 30/10000/day
	⚠️ Rate limit: 30 requests/minute reached. Waiting 15s...
	[31/16938] (0.2%) Processed character 31: Captain America \| Rate: 1/30/min, 31/10000/day
	```

	### API Returns 429 Error

	```
	[150/16938] (0.9%) Processing character 150: Wolverine...
	⚠️ Rate limited (429) on character detail 150. Waiting 2s before retry 1/5...
	⚠️ Rate limited (429) on character detail 150. Waiting 4s before retry 2/5...
	[150/16938] (0.9%) Processed character 150: Wolverine \| Rate: 15/30/min, 150/10000/day
	```

	### Daily Limit Reached

	```
	[10000/16938] (59.0%) Processed character 10000: Green Lantern \| Rate: 25/30/min, 10000/10000/day
	⚠️ 24-hour limit reached (10000 requests). Waiting 8 hour(s)...
	[10001/16938] (59.0%) Processed character 10001: Flash \| Rate: 1/30/min, 1/10000/day
	```

	## 5. Key Differences

	### Application-Level Rate Limiting (RateLimiter)
	- Purpose: Prevent hitting API limits in the first place
	- Proactive: Waits BEFORE making requests
	- Limits: 30 req/min, 10,000 req/24h
	- Persistent: State saved to disk, survives restarts
	- Scope: Controls all requests from the scraper

	### API-Level 429 Handling (API Client)
	- Purpose: Handle cases where API still returns 429
	- Reactive: Retries AFTER getting 429 error
	- Strategy: Exponential backoff (2s, 4s, 8s, 16s, 32s)
	- Transient: No persistent state
	- Scope: Per-request error handling

	## 6. Why Both?

	Having both layers provides:

	1. Efficiency: Application-level limiting prevents wasted 429 requests
	2. Reliability: API-level handling catches edge cases (clock skew, other clients, etc.)
	3. Resilience: If one layer fails, the other provides backup
	4. Transparency: Clear logging shows which layer is active

	## 7. Backoff Calculation

	```typescript
	// Exponential backoff formula
	const backoffTime = INITIAL_BACKOFF * Math.pow(2, attempt);

	// Results:
	// Attempt 0: 2000 * 2^0 = 2000ms (2 seconds)
	// Attempt 1: 2000 * 2^1 = 4000ms (4 seconds)
	// Attempt 2: 2000 * 2^2 = 8000ms (8 seconds)
	// Attempt 3: 2000 * 2^3 = 16000ms (16 seconds)
	// Attempt 4: 2000 * 2^4 = 32000ms (32 seconds)
	```

	This exponential backoff gives the API time to recover while being respectful of rate limits.
No results found