A reusable technique for intercepting, analyzing, and replaying private API calls from any web app using playwriter.
Instead of scraping DOM or taking screenshots, this approach:
- Sets up network interception to capture outgoing API requests
- Triggers the UI action that fires the request
- Inspects the response to understand the schema and pagination
- Replays the API call directly with modified parameters (larger page size, cursor pagination)
This is faster, more reliable, and gets structured data instead of parsed HTML.
playwriter session new
# => 1
playwriter -s 1 -e "
state.page = await context.newPage();
await state.page.goto('https://example.com');
await waitForPageLoad({ page: state.page });
console.log('Loaded:', state.page.url());
" --timeout 30000If the site requires login, either:
- Log in manually in the browser window, or
- Use
state.page.fill()andstate.page.click()to automate login
Install request/response listeners before triggering the action you want to capture. Filter by URL patterns relevant to the target (e.g. /api/, /graphql, subdomains like api.example.com).
playwriter -s 1 -e "
state.requests = [];
state.responses = [];
state.page.on('request', req => {
const url = req.url();
if (url.includes('/api/') || url.includes('/graphql')) {
state.requests.push({
url: url,
method: req.method(),
headers: req.headers(),
postData: req.postData()
});
}
});
state.page.on('response', async res => {
const url = res.url();
if (url.includes('/api/') || url.includes('/graphql')) {
try {
state.responses.push({
url: url,
status: res.status(),
body: await res.json()
});
} catch {}
}
});
console.log('Interception ready');
"Click the button, open the modal, scroll the page -- whatever triggers the API call.
playwriter -s 1 -e "
// Clear previous captures
state.requests = [];
state.responses = [];
// Trigger the action (e.g. click a link, open a modal)
await state.page.click('a[href*=\"/following\"]');
await new Promise(r => setTimeout(r, 3000));
// Review what was captured
console.log('Captured', state.requests.length, 'requests');
state.requests.forEach((r, i) => {
console.log('REQ', i, r.method, r.url.slice(0, 120));
});
state.responses.forEach((r, i) => {
console.log('RES', i, r.status, r.url.slice(0, 120));
});
"Understand the shape of the data: what fields are returned, how pagination works.
playwriter -s 1 -e "
const res = state.responses[0];
const body = res.body;
console.log('Top-level keys:', Object.keys(body));
console.log('Pagination fields:', {
has_more: body.has_more,
next_cursor: body.next_max_id || body.next_cursor || body.end_cursor,
page_size: body.page_size
});
// Sample one record
const items = body.users || body.data || body.items || body.results;
if (items?.length) {
console.log('Items count:', items.length);
console.log('Item keys:', Object.keys(items[0]));
console.log('Sample:', JSON.stringify(items[0], null, 2).slice(0, 500));
}
"Identify which headers are needed to replay the request. Common ones: CSRF tokens, API keys, session claims.
playwriter -s 1 -e "
const h = state.requests[0].headers;
const auth = {};
for (const [k, v] of Object.entries(h)) {
if (k.match(/cookie|csrf|token|auth|api.key|claim|session|x-ig|x-requested/i)) {
auth[k] = v.slice(0, 80) + (v.length > 80 ? '...' : '');
}
}
console.log(JSON.stringify(auth, null, 2));
"Use page.evaluate(fetch(...)) to replay the API from the browser context (preserves cookies). Increase page size and loop through cursors.
playwriter -s 1 -e "
const baseUrl = 'https://example.com/api/v1/endpoint/';
const headers = state.requests[0].headers;
state.allItems = [];
let cursor = null;
let hasMore = true;
let page = 0;
while (hasMore) {
let url = baseUrl + '?count=100';
if (cursor) url += '&max_id=' + cursor;
const data = await state.page.evaluate(async ({ url, headers }) => {
const res = await fetch(url, {
headers: {
'x-csrftoken': headers['x-csrftoken'],
'x-ig-app-id': headers['x-ig-app-id'],
'x-ig-www-claim': headers['x-ig-www-claim'],
'x-requested-with': headers['x-requested-with'],
}
});
return res.json();
}, { url, headers });
const items = data.users || data.items || data.results || [];
state.allItems.push(...items);
hasMore = data.has_more ?? false;
cursor = data.next_max_id || data.next_cursor || null;
page++;
console.log('Page', page, '- got', items.length, '- total:', state.allItems.length);
// Rate limit protection
await new Promise(r => setTimeout(r, 500));
}
console.log('Done:', state.allItems.length, 'items');
" --timeout 300000playwriter -s 1 -e "
const fs = require('node:fs');
const clean = state.allItems.map((u, i) => ({
index: i + 1,
username: u.username,
full_name: u.full_name,
id: u.pk,
is_verified: u.is_verified,
is_private: u.is_private,
}));
fs.writeFileSync('results.json', JSON.stringify(clean, null, 2));
const txt = clean.map(u => u.username + (u.full_name ? ' (' + u.full_name + ')' : '')).join('\n');
fs.writeFileSync('results.txt', txt);
console.log('Saved', clean.length, 'items');
"playwriter -s 1 -e "
state.page.removeAllListeners('request');
state.page.removeAllListeners('response');
console.log('Listeners removed');
"- Increase
count/limitparam: APIs often accept larger page sizes than the UI uses. Try100or200instead of the default12. - Watch for rate limits: Add
500-1000msdelays between requests. If you get 429s, back off. - GraphQL APIs: Check
postDatafor the query/variables instead of URL params. Replay withmethod: 'POST'and the same body, modifying pagination variables. - Use
page.evaluate(fetch(...)): Replaying from the browser context inherits cookies automatically -- no need to manually pass cookie headers. - Multiple endpoints: Some UIs fire several API calls. Inspect all captured requests to find the one carrying the data you want.
- Cursor formats vary: Common patterns are
max_id(numeric offset),end_cursor(opaque base64 string),page(page number),offset(numeric offset).
Endpoint: GET /api/v1/friendships/{user_id}/following/?count=100&max_id={cursor}
Auth: x-csrftoken, x-ig-app-id, x-ig-www-claim, x-requested-with
Paginate: has_more (bool) + next_max_id (numeric cursor)
Response: { users: [{ username, full_name, pk, is_verified, is_private, ... }], has_more, next_max_id }
Rate: ~500ms between requests, 100 per page -> 1969 accounts in 20 requests / ~15 seconds