Puppeteer 网络请求处理 #
网络请求基础 #
请求生命周期 #
text
┌─────────────────────────────────────────────────────────────┐
│ 网络请求生命周期 │
├─────────────────────────────────────────────────────────────┤
│ │
│ Request Response RequestFinished │
│ ┌──────┐ ┌──────┐ ┌──────┐ │
│ │ 发起 │ ──────► │ 响应 │ ────────► │ 完成 │ │
│ └──────┘ └──────┘ └──────┘ │
│ │ │ │ │
│ ▼ ▼ ▼ │
│ request response requestfinished │
│ 事件触发 事件触发 事件触发 │
│ │
│ 失败时触发 requestfailed 事件 │
│ │
└─────────────────────────────────────────────────────────────┘
监听网络事件 #
javascript
// 监听请求
page.on('request', (request) => {
console.log('Request:', request.url());
});
// 监听响应
page.on('response', (response) => {
console.log('Response:', response.url(), response.status());
});
// 监听请求完成
page.on('requestfinished', (request) => {
console.log('Finished:', request.url());
});
// 监听请求失败
page.on('requestfailed', (request) => {
console.log('Failed:', request.url(), request.failure().errorText);
});
请求对象 #
Request 属性 #
javascript
page.on('request', (request) => {
console.log('URL:', request.url()); // 请求 URL
console.log('Method:', request.method()); // 请求方法
console.log('ResourceType:', request.resourceType()); // 资源类型
console.log('Headers:', request.headers()); // 请求头
console.log('PostData:', request.postData()); // POST 数据
console.log('IsNavigation:', request.isNavigationRequest()); // 是否导航请求
console.log('Frame:', request.frame()); // 所属 Frame
console.log('RedirectChain:', request.redirectChain()); // 重定向链
});
资源类型 #
javascript
page.on('request', (request) => {
const type = request.resourceType();
// 常见资源类型
// document - HTML 文档
// stylesheet - CSS 样式表
// image - 图片
// media - 音视频
// font - 字体
// script - JavaScript
// texttrack - 字幕
// xhr - XMLHttpRequest
// fetch - Fetch API
// eventsource - Server-Sent Events
// websocket - WebSocket
// manifest - Web App Manifest
// other - 其他
});
响应对象 #
Response 属性 #
javascript
page.on('response', (response) => {
console.log('URL:', response.url()); // 响应 URL
console.log('Status:', response.status()); // 状态码
console.log('StatusText:', response.statusText()); // 状态文本
console.log('Headers:', response.headers()); // 响应头
console.log('Ok:', response.ok()); // 是否成功 (200-299)
console.log('FromCache:', response.fromCache()); // 是否来自缓存
console.log('FromServiceWorker:', response.fromServiceWorker()); // 是否来自 Service Worker
console.log('SecurityDetails:', response.securityDetails()); // 安全详情
});
获取响应内容 #
javascript
page.on('response', async (response) => {
if (response.request().resourceType() === 'xhr') {
try {
// 获取 JSON 响应
const json = await response.json();
console.log('JSON:', json);
// 获取文本响应
const text = await response.text();
console.log('Text:', text);
// 获取 Buffer
const buffer = await response.buffer();
console.log('Buffer size:', buffer.length);
} catch (error) {
console.error('Error reading response:', error);
}
}
});
请求拦截 #
启用拦截 #
javascript
// 启用请求拦截
await page.setRequestInterception(true);
page.on('request', (request) => {
// 继续请求
request.continue();
});
阻止请求 #
javascript
await page.setRequestInterception(true);
page.on('request', (request) => {
// 阻止图片加载
if (request.resourceType() === 'image') {
request.abort();
} else {
request.continue();
}
});
// 阻止多种资源类型
const blockedTypes = ['image', 'stylesheet', 'font', 'media'];
page.on('request', (request) => {
if (blockedTypes.includes(request.resourceType())) {
request.abort();
} else {
request.continue();
}
});
阻止特定 URL #
javascript
await page.setRequestInterception(true);
page.on('request', (request) => {
const url = request.url();
// 阻止广告
if (url.includes('doubleclick.net') || url.includes('googlesyndication.com')) {
request.abort();
return;
}
// 阻止追踪脚本
if (url.includes('analytics') || url.includes('tracking')) {
request.abort();
return;
}
request.continue();
});
修改请求 #
javascript
await page.setRequestInterception(true);
page.on('request', (request) => {
// 修改请求头
const headers = {
...request.headers(),
'X-Custom-Header': 'Custom Value',
'User-Agent': 'Mozilla/5.0 Custom UA'
};
request.continue({ headers });
});
// 修改 POST 数据
page.on('request', (request) => {
if (request.method() === 'POST' && request.url().includes('/api/submit')) {
const postData = JSON.parse(request.postData());
postData.timestamp = Date.now();
request.continue({
postData: JSON.stringify(postData)
});
} else {
request.continue();
}
});
重定向请求 #
javascript
await page.setRequestInterception(true);
page.on('request', (request) => {
// 重定向到本地文件
if (request.url().endsWith('.js')) {
request.continue({
url: 'file:///path/to/local/file.js'
});
} else {
request.continue();
}
});
响应模拟 #
使用 route 模拟响应 #
javascript
// 拦截并模拟 API 响应
await page.route('**/api/users', (route) => {
route.fulfill({
status: 200,
contentType: 'application/json',
body: JSON.stringify([
{ id: 1, name: 'John' },
{ id: 2, name: 'Jane' }
])
});
});
// 拦截特定 URL 模式
await page.route('**/api/**', (route) => {
const url = route.request().url();
route.fulfill({
status: 200,
contentType: 'application/json',
body: JSON.stringify({ mocked: true, url })
});
});
模拟不同状态码 #
javascript
// 模拟 404 错误
await page.route('**/api/not-found', (route) => {
route.fulfill({
status: 404,
contentType: 'application/json',
body: JSON.stringify({ error: 'Not found' })
});
});
// 模拟 500 错误
await page.route('**/api/error', (route) => {
route.fulfill({
status: 500,
contentType: 'application/json',
body: JSON.stringify({ error: 'Internal server error' })
});
});
// 模拟网络错误
await page.route('**/api/network-error', (route) => {
route.abort('failed');
});
模拟延迟响应 #
javascript
await page.route('**/api/slow', async (route) => {
await new Promise(resolve => setTimeout(resolve, 3000));
route.fulfill({
status: 200,
contentType: 'application/json',
body: JSON.stringify({ data: 'delayed response' })
});
});
使用 Fixture 文件 #
javascript
const fs = require('fs');
// 从文件加载模拟数据
await page.route('**/api/products', (route) => {
const data = fs.readFileSync('./fixtures/products.json', 'utf8');
route.fulfill({
status: 200,
contentType: 'application/json',
body: data
});
});
高级网络操作 #
请求认证 #
javascript
// 处理 HTTP 认证
await page.authenticate({
username: 'user',
password: 'pass'
});
await page.goto('https://protected.example.com');
设置额外 HTTP 头 #
javascript
// 设置全局请求头
await page.setExtraHTTPHeaders({
'X-Custom-Header': 'value',
'Authorization': 'Bearer token123'
});
处理 WebSocket #
javascript
// 监听 WebSocket
page.on('websocket', (ws) => {
console.log('WebSocket opened:', ws.url());
ws.on('framesreceived', (frames) => {
console.log('Frames received:', frames);
});
ws.on('framessent', (frames) => {
console.log('Frames sent:', frames);
});
ws.on('close', () => {
console.log('WebSocket closed');
});
});
网络监控 #
javascript
// 收集所有请求信息
const requests = [];
page.on('request', (request) => {
requests.push({
url: request.url(),
method: request.method(),
type: request.resourceType(),
startTime: Date.now()
});
});
page.on('response', (response) => {
const request = requests.find(r => r.url === response.url());
if (request) {
request.status = response.status();
request.endTime = Date.now();
request.duration = request.endTime - request.startTime;
request.size = response.headers()['content-length'] || 0;
}
});
await page.goto('https://example.com');
// 输出请求统计
console.log('Total requests:', requests.length);
console.log('Total size:', requests.reduce((sum, r) => sum + parseInt(r.size || 0), 0));
console.log('Average duration:', requests.reduce((sum, r) => sum + (r.duration || 0), 0) / requests.length);
网络限速 #
javascript
// 使用 CDP 模拟网络条件
const client = await page.createCDPSession();
await client.send('Network.emulateNetworkConditions', {
offline: false,
downloadThroughput: (500 * 1024) / 8, // 500 KB/s
uploadThroughput: (500 * 1024) / 8, // 500 KB/s
latency: 100 // 100ms 延迟
});
// 预设网络条件
const networkConditions = {
offline: {
offline: true,
downloadThroughput: 0,
uploadThroughput: 0,
latency: 0
},
slow3G: {
offline: false,
downloadThroughput: (500 * 1024) / 8,
uploadThroughput: (500 * 1024) / 8,
latency: 300
},
fast3G: {
offline: false,
downloadThroughput: (1.6 * 1024 * 1024) / 8,
uploadThroughput: (750 * 1024) / 8,
latency: 150
}
};
离线模式 #
javascript
// 设置离线模式
const client = await page.createCDPSession();
await client.send('Network.emulateNetworkConditions', {
offline: true,
downloadThroughput: 0,
uploadThroughput: 0,
latency: 0
});
// 恢复在线
await client.send('Network.emulateNetworkConditions', {
offline: false,
downloadThroughput: -1,
uploadThroughput: -1,
latency: 0
});
等待网络请求 #
waitForRequest #
javascript
// 等待特定请求
const request = await page.waitForRequest(
req => req.url().includes('/api/data')
);
console.log('Request URL:', request.url());
// 等待请求并点击
const [request] = await Promise.all([
page.waitForRequest(req => req.url().includes('/api/submit')),
page.click('#submit-button')
]);
waitForResponse #
javascript
// 等待特定响应
const response = await page.waitForResponse(
res => res.url().includes('/api/data')
);
const data = await response.json();
console.log('Response data:', data);
// 等待响应并点击
const [response] = await Promise.all([
page.waitForResponse(res => res.url().includes('/api/submit')),
page.click('#submit-button')
]);
const result = await response.json();
等待所有请求完成 #
javascript
// 等待网络空闲
await page.goto('https://example.com', {
waitUntil: 'networkidle0' // 500ms 内无网络请求
});
// 或使用 networkidle2
await page.goto('https://example.com', {
waitUntil: 'networkidle2' // 500ms 内不超过 2 个网络请求
});
完整示例 #
API 测试 #
javascript
const puppeteer = require('puppeteer');
const assert = require('assert');
async function testAPI() {
const browser = await puppeteer.launch();
const page = await browser.newPage();
// 模拟 API 响应
await page.route('**/api/users', (route) => {
route.fulfill({
status: 200,
contentType: 'application/json',
body: JSON.stringify([
{ id: 1, name: 'John' },
{ id: 2, name: 'Jane' }
])
});
});
await page.goto('https://example.com');
// 触发 API 请求
const response = await page.waitForResponse(
res => res.url().includes('/api/users')
);
// 验证响应
assert.strictEqual(response.status(), 200);
const users = await response.json();
assert.strictEqual(users.length, 2);
console.log('API test passed!');
await browser.close();
}
testAPI();
性能监控 #
javascript
const puppeteer = require('puppeteer');
async function monitorPerformance(url) {
const browser = await puppeteer.launch();
const page = await browser.newPage();
const requests = new Map();
// 监控请求
page.on('request', (request) => {
requests.set(request.url(), {
url: request.url(),
method: request.method(),
type: request.resourceType(),
startTime: Date.now()
});
});
// 监控响应
page.on('response', (response) => {
const request = requests.get(response.url());
if (request) {
request.status = response.status();
request.endTime = Date.now();
request.duration = request.endTime - request.startTime;
request.size = parseInt(response.headers()['content-length'] || '0');
}
});
await page.goto(url, { waitUntil: 'networkidle0' });
// 生成报告
const report = {
totalRequests: requests.size,
byType: {},
slowRequests: [],
failedRequests: []
};
for (const [, req] of requests) {
// 按类型统计
if (!report.byType[req.type]) {
report.byType[req.type] = { count: 0, totalSize: 0 };
}
report.byType[req.type].count++;
report.byType[req.type].totalSize += req.size || 0;
// 慢请求
if (req.duration > 1000) {
report.slowRequests.push(req);
}
// 失败请求
if (req.status >= 400) {
report.failedRequests.push(req);
}
}
console.log('Performance Report:', JSON.stringify(report, null, 2));
await browser.close();
}
monitorPerformance('https://example.com');
爬虫优化 #
javascript
const puppeteer = require('puppeteer');
async function optimizedScraper(url) {
const browser = await puppeteer.launch();
const page = await browser.newPage();
// 启用请求拦截
await page.setRequestInterception(true);
// 阻止不必要的资源
const blockedTypes = ['image', 'stylesheet', 'font', 'media'];
page.on('request', (request) => {
if (blockedTypes.includes(request.resourceType())) {
request.abort();
} else if (request.url().includes('analytics') || request.url().includes('tracking')) {
request.abort();
} else {
request.continue();
}
});
// 设置合理的超时
page.setDefaultTimeout(30000);
await page.goto(url, { waitUntil: 'domcontentloaded' });
// 抓取数据
const data = await page.evaluate(() => {
return {
title: document.title,
content: document.querySelector('.content')?.textContent
};
});
await browser.close();
return data;
}
下一步 #
现在你已经掌握了 Puppeteer 的网络请求处理功能,接下来学习 高级功能 了解更多高级技巧!
最后更新:2026-03-28