Puppeteer 网络请求处理 #

网络请求基础 #

请求生命周期 #

text
┌─────────────────────────────────────────────────────────────┐
│                    网络请求生命周期                           │
├─────────────────────────────────────────────────────────────┤
│                                                             │
│   Request          Response          RequestFinished        │
│   ┌──────┐         ┌──────┐          ┌──────┐              │
│   │ 发起 │ ──────► │ 响应 │ ────────► │ 完成 │              │
│   └──────┘         └──────┘          └──────┘              │
│      │                │                  │                  │
│      ▼                ▼                  ▼                  │
│   request          response          requestfinished        │
│   事件触发          事件触发            事件触发              │
│                                                             │
│   失败时触发 requestfailed 事件                               │
│                                                             │
└─────────────────────────────────────────────────────────────┘

监听网络事件 #

javascript
// 监听请求
page.on('request', (request) => {
  console.log('Request:', request.url());
});

// 监听响应
page.on('response', (response) => {
  console.log('Response:', response.url(), response.status());
});

// 监听请求完成
page.on('requestfinished', (request) => {
  console.log('Finished:', request.url());
});

// 监听请求失败
page.on('requestfailed', (request) => {
  console.log('Failed:', request.url(), request.failure().errorText);
});

请求对象 #

Request 属性 #

javascript
page.on('request', (request) => {
  console.log('URL:', request.url());                    // 请求 URL
  console.log('Method:', request.method());              // 请求方法
  console.log('ResourceType:', request.resourceType());  // 资源类型
  console.log('Headers:', request.headers());            // 请求头
  console.log('PostData:', request.postData());          // POST 数据
  console.log('IsNavigation:', request.isNavigationRequest()); // 是否导航请求
  console.log('Frame:', request.frame());                // 所属 Frame
  console.log('RedirectChain:', request.redirectChain()); // 重定向链
});

资源类型 #

javascript
page.on('request', (request) => {
  const type = request.resourceType();
  
  // 常见资源类型
  // document    - HTML 文档
  // stylesheet  - CSS 样式表
  // image       - 图片
  // media       - 音视频
  // font        - 字体
  // script      - JavaScript
  // texttrack   - 字幕
  // xhr         - XMLHttpRequest
  // fetch       - Fetch API
  // eventsource - Server-Sent Events
  // websocket   - WebSocket
  // manifest    - Web App Manifest
  // other       - 其他
});

响应对象 #

Response 属性 #

javascript
page.on('response', (response) => {
  console.log('URL:', response.url());           // 响应 URL
  console.log('Status:', response.status());     // 状态码
  console.log('StatusText:', response.statusText()); // 状态文本
  console.log('Headers:', response.headers());   // 响应头
  console.log('Ok:', response.ok());             // 是否成功 (200-299)
  console.log('FromCache:', response.fromCache()); // 是否来自缓存
  console.log('FromServiceWorker:', response.fromServiceWorker()); // 是否来自 Service Worker
  console.log('SecurityDetails:', response.securityDetails()); // 安全详情
});

获取响应内容 #

javascript
page.on('response', async (response) => {
  if (response.request().resourceType() === 'xhr') {
    try {
      // 获取 JSON 响应
      const json = await response.json();
      console.log('JSON:', json);
      
      // 获取文本响应
      const text = await response.text();
      console.log('Text:', text);
      
      // 获取 Buffer
      const buffer = await response.buffer();
      console.log('Buffer size:', buffer.length);
    } catch (error) {
      console.error('Error reading response:', error);
    }
  }
});

请求拦截 #

启用拦截 #

javascript
// 启用请求拦截
await page.setRequestInterception(true);

page.on('request', (request) => {
  // 继续请求
  request.continue();
});

阻止请求 #

javascript
await page.setRequestInterception(true);

page.on('request', (request) => {
  // 阻止图片加载
  if (request.resourceType() === 'image') {
    request.abort();
  } else {
    request.continue();
  }
});

// 阻止多种资源类型
const blockedTypes = ['image', 'stylesheet', 'font', 'media'];

page.on('request', (request) => {
  if (blockedTypes.includes(request.resourceType())) {
    request.abort();
  } else {
    request.continue();
  }
});

阻止特定 URL #

javascript
await page.setRequestInterception(true);

page.on('request', (request) => {
  const url = request.url();
  
  // 阻止广告
  if (url.includes('doubleclick.net') || url.includes('googlesyndication.com')) {
    request.abort();
    return;
  }
  
  // 阻止追踪脚本
  if (url.includes('analytics') || url.includes('tracking')) {
    request.abort();
    return;
  }
  
  request.continue();
});

修改请求 #

javascript
await page.setRequestInterception(true);

page.on('request', (request) => {
  // 修改请求头
  const headers = {
    ...request.headers(),
    'X-Custom-Header': 'Custom Value',
    'User-Agent': 'Mozilla/5.0 Custom UA'
  };
  
  request.continue({ headers });
});

// 修改 POST 数据
page.on('request', (request) => {
  if (request.method() === 'POST' && request.url().includes('/api/submit')) {
    const postData = JSON.parse(request.postData());
    postData.timestamp = Date.now();
    
    request.continue({
      postData: JSON.stringify(postData)
    });
  } else {
    request.continue();
  }
});

重定向请求 #

javascript
await page.setRequestInterception(true);

page.on('request', (request) => {
  // 重定向到本地文件
  if (request.url().endsWith('.js')) {
    request.continue({
      url: 'file:///path/to/local/file.js'
    });
  } else {
    request.continue();
  }
});

响应模拟 #

使用 route 模拟响应 #

javascript
// 拦截并模拟 API 响应
await page.route('**/api/users', (route) => {
  route.fulfill({
    status: 200,
    contentType: 'application/json',
    body: JSON.stringify([
      { id: 1, name: 'John' },
      { id: 2, name: 'Jane' }
    ])
  });
});

// 拦截特定 URL 模式
await page.route('**/api/**', (route) => {
  const url = route.request().url();
  
  route.fulfill({
    status: 200,
    contentType: 'application/json',
    body: JSON.stringify({ mocked: true, url })
  });
});

模拟不同状态码 #

javascript
// 模拟 404 错误
await page.route('**/api/not-found', (route) => {
  route.fulfill({
    status: 404,
    contentType: 'application/json',
    body: JSON.stringify({ error: 'Not found' })
  });
});

// 模拟 500 错误
await page.route('**/api/error', (route) => {
  route.fulfill({
    status: 500,
    contentType: 'application/json',
    body: JSON.stringify({ error: 'Internal server error' })
  });
});

// 模拟网络错误
await page.route('**/api/network-error', (route) => {
  route.abort('failed');
});

模拟延迟响应 #

javascript
await page.route('**/api/slow', async (route) => {
  await new Promise(resolve => setTimeout(resolve, 3000));
  
  route.fulfill({
    status: 200,
    contentType: 'application/json',
    body: JSON.stringify({ data: 'delayed response' })
  });
});

使用 Fixture 文件 #

javascript
const fs = require('fs');

// 从文件加载模拟数据
await page.route('**/api/products', (route) => {
  const data = fs.readFileSync('./fixtures/products.json', 'utf8');
  
  route.fulfill({
    status: 200,
    contentType: 'application/json',
    body: data
  });
});

高级网络操作 #

请求认证 #

javascript
// 处理 HTTP 认证
await page.authenticate({
  username: 'user',
  password: 'pass'
});

await page.goto('https://protected.example.com');

设置额外 HTTP 头 #

javascript
// 设置全局请求头
await page.setExtraHTTPHeaders({
  'X-Custom-Header': 'value',
  'Authorization': 'Bearer token123'
});

处理 WebSocket #

javascript
// 监听 WebSocket
page.on('websocket', (ws) => {
  console.log('WebSocket opened:', ws.url());
  
  ws.on('framesreceived', (frames) => {
    console.log('Frames received:', frames);
  });
  
  ws.on('framessent', (frames) => {
    console.log('Frames sent:', frames);
  });
  
  ws.on('close', () => {
    console.log('WebSocket closed');
  });
});

网络监控 #

javascript
// 收集所有请求信息
const requests = [];

page.on('request', (request) => {
  requests.push({
    url: request.url(),
    method: request.method(),
    type: request.resourceType(),
    startTime: Date.now()
  });
});

page.on('response', (response) => {
  const request = requests.find(r => r.url === response.url());
  if (request) {
    request.status = response.status();
    request.endTime = Date.now();
    request.duration = request.endTime - request.startTime;
    request.size = response.headers()['content-length'] || 0;
  }
});

await page.goto('https://example.com');

// 输出请求统计
console.log('Total requests:', requests.length);
console.log('Total size:', requests.reduce((sum, r) => sum + parseInt(r.size || 0), 0));
console.log('Average duration:', requests.reduce((sum, r) => sum + (r.duration || 0), 0) / requests.length);

网络限速 #

javascript
// 使用 CDP 模拟网络条件
const client = await page.createCDPSession();
await client.send('Network.emulateNetworkConditions', {
  offline: false,
  downloadThroughput: (500 * 1024) / 8,  // 500 KB/s
  uploadThroughput: (500 * 1024) / 8,    // 500 KB/s
  latency: 100                           // 100ms 延迟
});

// 预设网络条件
const networkConditions = {
  offline: {
    offline: true,
    downloadThroughput: 0,
    uploadThroughput: 0,
    latency: 0
  },
  slow3G: {
    offline: false,
    downloadThroughput: (500 * 1024) / 8,
    uploadThroughput: (500 * 1024) / 8,
    latency: 300
  },
  fast3G: {
    offline: false,
    downloadThroughput: (1.6 * 1024 * 1024) / 8,
    uploadThroughput: (750 * 1024) / 8,
    latency: 150
  }
};

离线模式 #

javascript
// 设置离线模式
const client = await page.createCDPSession();
await client.send('Network.emulateNetworkConditions', {
  offline: true,
  downloadThroughput: 0,
  uploadThroughput: 0,
  latency: 0
});

// 恢复在线
await client.send('Network.emulateNetworkConditions', {
  offline: false,
  downloadThroughput: -1,
  uploadThroughput: -1,
  latency: 0
});

等待网络请求 #

waitForRequest #

javascript
// 等待特定请求
const request = await page.waitForRequest(
  req => req.url().includes('/api/data')
);
console.log('Request URL:', request.url());

// 等待请求并点击
const [request] = await Promise.all([
  page.waitForRequest(req => req.url().includes('/api/submit')),
  page.click('#submit-button')
]);

waitForResponse #

javascript
// 等待特定响应
const response = await page.waitForResponse(
  res => res.url().includes('/api/data')
);
const data = await response.json();
console.log('Response data:', data);

// 等待响应并点击
const [response] = await Promise.all([
  page.waitForResponse(res => res.url().includes('/api/submit')),
  page.click('#submit-button')
]);

const result = await response.json();

等待所有请求完成 #

javascript
// 等待网络空闲
await page.goto('https://example.com', {
  waitUntil: 'networkidle0'  // 500ms 内无网络请求
});

// 或使用 networkidle2
await page.goto('https://example.com', {
  waitUntil: 'networkidle2'  // 500ms 内不超过 2 个网络请求
});

完整示例 #

API 测试 #

javascript
const puppeteer = require('puppeteer');
const assert = require('assert');

async function testAPI() {
  const browser = await puppeteer.launch();
  const page = await browser.newPage();
  
  // 模拟 API 响应
  await page.route('**/api/users', (route) => {
    route.fulfill({
      status: 200,
      contentType: 'application/json',
      body: JSON.stringify([
        { id: 1, name: 'John' },
        { id: 2, name: 'Jane' }
      ])
    });
  });
  
  await page.goto('https://example.com');
  
  // 触发 API 请求
  const response = await page.waitForResponse(
    res => res.url().includes('/api/users')
  );
  
  // 验证响应
  assert.strictEqual(response.status(), 200);
  const users = await response.json();
  assert.strictEqual(users.length, 2);
  
  console.log('API test passed!');
  await browser.close();
}

testAPI();

性能监控 #

javascript
const puppeteer = require('puppeteer');

async function monitorPerformance(url) {
  const browser = await puppeteer.launch();
  const page = await browser.newPage();
  
  const requests = new Map();
  
  // 监控请求
  page.on('request', (request) => {
    requests.set(request.url(), {
      url: request.url(),
      method: request.method(),
      type: request.resourceType(),
      startTime: Date.now()
    });
  });
  
  // 监控响应
  page.on('response', (response) => {
    const request = requests.get(response.url());
    if (request) {
      request.status = response.status();
      request.endTime = Date.now();
      request.duration = request.endTime - request.startTime;
      request.size = parseInt(response.headers()['content-length'] || '0');
    }
  });
  
  await page.goto(url, { waitUntil: 'networkidle0' });
  
  // 生成报告
  const report = {
    totalRequests: requests.size,
    byType: {},
    slowRequests: [],
    failedRequests: []
  };
  
  for (const [, req] of requests) {
    // 按类型统计
    if (!report.byType[req.type]) {
      report.byType[req.type] = { count: 0, totalSize: 0 };
    }
    report.byType[req.type].count++;
    report.byType[req.type].totalSize += req.size || 0;
    
    // 慢请求
    if (req.duration > 1000) {
      report.slowRequests.push(req);
    }
    
    // 失败请求
    if (req.status >= 400) {
      report.failedRequests.push(req);
    }
  }
  
  console.log('Performance Report:', JSON.stringify(report, null, 2));
  await browser.close();
}

monitorPerformance('https://example.com');

爬虫优化 #

javascript
const puppeteer = require('puppeteer');

async function optimizedScraper(url) {
  const browser = await puppeteer.launch();
  const page = await browser.newPage();
  
  // 启用请求拦截
  await page.setRequestInterception(true);
  
  // 阻止不必要的资源
  const blockedTypes = ['image', 'stylesheet', 'font', 'media'];
  
  page.on('request', (request) => {
    if (blockedTypes.includes(request.resourceType())) {
      request.abort();
    } else if (request.url().includes('analytics') || request.url().includes('tracking')) {
      request.abort();
    } else {
      request.continue();
    }
  });
  
  // 设置合理的超时
  page.setDefaultTimeout(30000);
  
  await page.goto(url, { waitUntil: 'domcontentloaded' });
  
  // 抓取数据
  const data = await page.evaluate(() => {
    return {
      title: document.title,
      content: document.querySelector('.content')?.textContent
    };
  });
  
  await browser.close();
  return data;
}

下一步 #

现在你已经掌握了 Puppeteer 的网络请求处理功能,接下来学习 高级功能 了解更多高级技巧!

最后更新:2026-03-28