Puppeteer JavaScript 执行与页面评估 #

基本概念 #

执行环境 #

text
┌─────────────────────────────────────────────────────────────┐
│                    Puppeteer 执行环境                         │
├─────────────────────────────────────────────────────────────┤
│                                                             │
│   Node.js 环境                    浏览器环境                  │
│   ┌──────────────┐              ┌──────────────┐           │
│   │  Puppeteer   │              │   页面 DOM    │           │
│   │  脚本代码    │ ◄──────────► │   JavaScript  │           │
│   │              │   序列化传输  │   window      │           │
│   └──────────────┘              └──────────────┘           │
│                                                             │
│   page.evaluate() 用于在浏览器环境中执行代码                   │
│                                                             │
└─────────────────────────────────────────────────────────────┘

数据传递 #

javascript
// Node.js -> 浏览器:通过参数传递
const result = await page.evaluate((a, b) => {
  return a + b;  // 在浏览器环境中执行
}, 1, 2);

// 浏览器 -> Node.js:通过返回值传递
const title = await page.evaluate(() => {
  return document.title;  // 返回到 Node.js
});

page.evaluate() #

基本用法 #

javascript
// 执行简单表达式
const result = await page.evaluate(() => {
  return 1 + 1;
});
console.log(result);  // 2

// 获取页面信息
const title = await page.evaluate(() => document.title);
const url = await page.evaluate(() => window.location.href);

// 获取 DOM 信息
const text = await page.evaluate(() => {
  return document.querySelector('.title').textContent;
});

传递参数 #

javascript
// 传递单个参数
const result = await page.evaluate((num) => {
  return num * 2;
}, 5);
console.log(result);  // 10

// 传递多个参数
const result = await page.evaluate((a, b, c) => {
  return a + b + c;
}, 1, 2, 3);
console.log(result);  // 6

// 传递对象
const result = await page.evaluate((config) => {
  return `${config.name}: ${config.value}`;
}, { name: 'test', value: 123 });
console.log(result);  // 'test: 123'

返回复杂数据 #

javascript
// 返回对象
const pageInfo = await page.evaluate(() => {
  return {
    title: document.title,
    url: window.location.href,
    width: window.innerWidth,
    height: window.innerHeight
  };
});
console.log(pageInfo);

// 返回数组
const links = await page.evaluate(() => {
  return Array.from(document.querySelectorAll('a')).map(a => ({
    text: a.textContent,
    href: a.href
  }));
});
console.log(links);

// 返回嵌套数据
const data = await page.evaluate(() => {
  return {
    user: {
      name: document.querySelector('.username').textContent,
      email: document.querySelector('.email').textContent
    },
    items: Array.from(document.querySelectorAll('.item')).map(el => ({
      id: el.dataset.id,
      name: el.querySelector('.name').textContent
    }))
  };
});

注意事项 #

javascript
// ❌ 错误:不能直接使用外部变量
const selector = '.title';
const text = await page.evaluate(() => {
  return document.querySelector(selector).textContent;  // selector 未定义
});

// ✅ 正确:通过参数传递
const selector = '.title';
const text = await page.evaluate((sel) => {
  return document.querySelector(sel).textContent;
}, selector);

// ❌ 错误:不能返回 DOM 元素
const element = await page.evaluate(() => {
  return document.querySelector('.title');  // 无法序列化
});

// ✅ 正确:返回 ElementHandle
const element = await page.$('.title');

page.evaluateHandle() #

基本用法 #

javascript
// 返回 JSHandle
const handle = await page.evaluateHandle(() => {
  return document.body;
});

// JSHandle 可以在后续 evaluate 中使用
const tagName = await page.evaluate((body) => {
  return body.tagName;
}, handle);
console.log(tagName);  // 'BODY'

获取属性 #

javascript
// 获取对象属性
const handle = await page.evaluateHandle(() => ({
  name: 'John',
  age: 30
}));

const properties = await handle.getProperties();
for (const [key, value] of properties) {
  console.log(key, await value.jsonValue());
}

// 获取单个属性
const nameHandle = await handle.getProperty('name');
const name = await nameHandle.jsonValue();
console.log(name);  // 'John'

jsonValue() #

javascript
// 将 JSHandle 转换为 JSON 值
const handle = await page.evaluateHandle(() => ({
  name: 'John',
  age: 30
}));

const value = await handle.jsonValue();
console.log(value);  // { name: 'John', age: 30 }

dispose() #

javascript
// 释放 JSHandle
const handle = await page.evaluateHandle(() => document.body);
// ... 使用 handle
await handle.dispose();

$eval 和 $$eval #

page.$eval() #

对单个元素执行操作:

javascript
// 获取元素文本
const text = await page.$eval('.title', el => el.textContent);

// 获取属性
const href = await page.$eval('a.link', el => el.href);
const src = await page.$eval('img', el => el.getAttribute('src'));

// 传递参数
const result = await page.$eval('.container', (el, className) => {
  el.classList.add(className);
  return el.className;
}, 'active');

// 修改元素
await page.$eval('#input', el => el.value = 'new value');
await page.$eval('#button', el => el.click());

page.$$eval() #

对多个元素执行操作:

javascript
// 获取所有链接文本
const texts = await page.$$eval('a', elements => 
  elements.map(el => el.textContent)
);

// 获取所有图片 src
const srcs = await page.$$eval('img', elements => 
  elements.map(el => el.src)
);

// 传递参数
const count = await page.$$eval('.item', (elements, className) => {
  elements.forEach(el => el.classList.add(className));
  return elements.length;
}, 'highlight');

// 过滤元素
const visibleItems = await page.$$eval('.item', elements =>
  elements.filter(el => {
    const style = window.getComputedStyle(el);
    return style.display !== 'none';
  }).map(el => el.textContent)
);

暴露函数到页面 #

page.exposeFunction() #

将 Node.js 函数暴露到浏览器环境:

javascript
// 暴露简单函数
await page.exposeFunction('md5', (text) => {
  const crypto = require('crypto');
  return crypto.createHash('md5').update(text).digest('hex');
});

// 在页面中使用
const hash = await page.evaluate(async () => {
  return await window.md5('hello world');
});
console.log(hash);

实际应用示例 #

javascript
const puppeteer = require('puppeteer');
const fs = require('fs');

async function main() {
  const browser = await puppeteer.launch();
  const page = await browser.newPage();
  
  // 暴露文件读取函数
  await page.exposeFunction('readFile', (path) => {
    return fs.readFileSync(path, 'utf8');
  });
  
  // 暴露文件写入函数
  await page.exposeFunction('writeFile', (path, content) => {
    fs.writeFileSync(path, content);
    return true;
  });
  
  // 暴露日志函数
  await page.exposeFunction('log', (...args) => {
    console.log('[Page]', ...args);
  });
  
  await page.goto('https://example.com');
  
  // 在页面中使用暴露的函数
  await page.evaluate(async () => {
    const content = await window.readFile('./data.txt');
    window.log('File content:', content);
    
    await window.writeFile('./output.txt', 'Hello from page!');
  });
  
  await browser.close();
}

暴露 API 客户端 #

javascript
const puppeteer = require('puppeteer');
const axios = require('axios');

async function main() {
  const browser = await puppeteer.launch();
  const page = await browser.newPage();
  
  // 暴露 API 请求函数
  await page.exposeFunction('fetchAPI', async (endpoint, options = {}) => {
    try {
      const response = await axios({
        url: `https://api.example.com${endpoint}`,
        ...options
      });
      return response.data;
    } catch (error) {
      return { error: error.message };
    }
  });
  
  await page.goto('https://example.com');
  
  // 在页面中调用 API
  const data = await page.evaluate(async () => {
    return await window.fetchAPI('/users', {
      method: 'GET'
    });
  });
  
  console.log(data);
  await browser.close();
}

在页面中注入脚本 #

page.evaluateOnNewDocument() #

在页面加载前注入脚本:

javascript
// 注入全局变量
await page.evaluateOnNewDocument(() => {
  window.myCustomVar = 'injected value';
});

// 覆盖原生对象
await page.evaluateOnNewDocument(() => {
  Object.defineProperty(navigator, 'webdriver', {
    get: () => false
  });
});

// 注入自定义方法
await page.evaluateOnNewDocument(() => {
  window.getCustomData = () => {
    return {
      timestamp: Date.now(),
      url: window.location.href
    };
  };
});

await page.goto('https://example.com');

隐藏自动化特征 #

javascript
await page.evaluateOnNewDocument(() => {
  // 隐藏 webdriver
  Object.defineProperty(navigator, 'webdriver', {
    get: () => false
  });
  
  // 添加 chrome 对象
  window.chrome = {
    runtime: {}
  };
  
  // 修改 plugins
  Object.defineProperty(navigator, 'plugins', {
    get: () => [1, 2, 3, 4, 5]
  });
  
  // 修改 languages
  Object.defineProperty(navigator, 'languages', {
    get: () => ['zh-CN', 'zh', 'en']
  });
});

page.addScriptTag() #

添加外部脚本:

javascript
// 添加本地脚本
await page.addScriptTag({ path: './inject.js' });

// 添加远程脚本
await page.addScriptTag({ url: 'https://code.jquery.com/jquery-3.6.0.min.js' });

// 添加内联脚本
await page.addScriptTag({ content: 'console.log("Injected!");' });

// 使用注入的库
await page.addScriptTag({ url: 'https://cdn.jsdelivr.net/npm/axios/dist/axios.min.js' });
const data = await page.evaluate(async () => {
  const response = await axios.get('/api/data');
  return response.data;
});

page.addStyleTag() #

添加样式:

javascript
// 添加本地样式
await page.addStyleTag({ path: './styles.css' });

// 添加远程样式
await page.addStyleTag({ url: 'https://cdn.jsdelivr.net/npm/bootstrap/dist/css/bootstrap.min.css' });

// 添加内联样式
await page.addStyleTag({ content: 'body { background: #f0f0f0; }' });

高级用法 #

执行异步代码 #

javascript
// 在 evaluate 中使用 async/await
const data = await page.evaluate(async () => {
  const response = await fetch('/api/data');
  return response.json();
});

// 等待 Promise
const result = await page.evaluate(() => {
  return new Promise((resolve) => {
    setTimeout(() => resolve('done'), 1000);
  });
});

使用 ElementHandle #

javascript
// 获取 ElementHandle
const element = await page.$('.card');

// 在 evaluate 中使用
const text = await page.evaluate((el) => {
  return el.textContent;
}, element);

// 使用 evaluateHandle
const handle = await element.evaluateHandle((el) => {
  return el.querySelector('.title');
});
const title = await handle.jsonValue();

执行上下文隔离 #

javascript
// 创建隔离的执行上下文
const context = await page.createExecutionContext();

// 在特定上下文中执行
const result = await context.evaluate(() => {
  return window.location.href;
});

// 在 iframe 中执行
const frame = page.frames().find(f => f.name() === 'myframe');
const frameResult = await frame.evaluate(() => {
  return document.title;
});

错误处理 #

javascript
// 捕获 evaluate 中的错误
try {
  const result = await page.evaluate(() => {
    throw new Error('Custom error');
  });
} catch (error) {
  console.error('Evaluate error:', error.message);
}

// 安全访问元素
const text = await page.evaluate(() => {
  const el = document.querySelector('.maybe-not-exist');
  return el ? el.textContent : null;
});

性能优化 #

减少序列化开销 #

javascript
// ❌ 多次调用 evaluate
const title = await page.evaluate(() => document.title);
const url = await page.evaluate(() => window.location.href);
const width = await page.evaluate(() => window.innerWidth);

// ✅ 一次调用获取多个值
const pageInfo = await page.evaluate(() => ({
  title: document.title,
  url: window.location.href,
  width: window.innerWidth
}));

批量操作 #

javascript
// ❌ 逐个获取元素属性
const items = [];
for (const selector of selectors) {
  const text = await page.$eval(selector, el => el.textContent);
  items.push(text);
}

// ✅ 批量获取
const items = await page.evaluate((selectors) => {
  return selectors.map(sel => {
    const el = document.querySelector(sel);
    return el ? el.textContent : null;
  });
}, selectors);

完整示例 #

数据抓取 #

javascript
const puppeteer = require('puppeteer');

async function scrapeProducts(url) {
  const browser = await puppeteer.launch();
  const page = await browser.newPage();
  
  await page.goto(url, { waitUntil: 'networkidle0' });
  
  // 暴露数据处理函数
  await page.exposeFunction('processPrice', (priceStr) => {
    return parseFloat(priceStr.replace(/[^0-9.]/g, ''));
  });
  
  // 抓取数据
  const products = await page.evaluate(async () => {
    const items = Array.from(document.querySelectorAll('.product'));
    
    return Promise.all(items.map(async (item) => {
      const priceText = item.querySelector('.price')?.textContent || '0';
      const price = await window.processPrice(priceText);
      
      return {
        name: item.querySelector('.name')?.textContent.trim(),
        price: price,
        image: item.querySelector('img')?.src,
        rating: parseFloat(item.querySelector('.rating')?.textContent || '0'),
        reviews: parseInt(item.querySelector('.review-count')?.textContent || '0')
      };
    }));
  });
  
  await browser.close();
  return products;
}

scrapeProducts('https://example.com/products').then(console.log);

页面状态检查 #

javascript
const puppeteer = require('puppeteer');

async function checkPageState(url) {
  const browser = await puppeteer.launch();
  const page = await browser.newPage();
  
  await page.goto(url);
  
  const state = await page.evaluate(() => {
    return {
      // 基本状态
      readyState: document.readyState,
      title: document.title,
      url: window.location.href,
      
      // DOM 状态
      elementCount: document.querySelectorAll('*').length,
      imageCount: document.querySelectorAll('img').length,
      scriptCount: document.querySelectorAll('script').length,
      
      // 错误状态
      hasErrors: window.onerror !== null,
      
      // 性能状态
      loadTime: window.performance.timing.loadEventEnd - window.performance.timing.navigationStart,
      
      // 可见性
      visibilityState: document.visibilityState,
      hidden: document.hidden,
      
      // 存储状态
      localStorageKeys: Object.keys(localStorage),
      sessionStorageKeys: Object.keys(sessionStorage),
      cookies: document.cookie
    };
  });
  
  await browser.close();
  return state;
}

表单验证 #

javascript
const puppeteer = require('puppeteer');

async function validateForm(url, formData) {
  const browser = await puppeteer.launch({ headless: false });
  const page = await browser.newPage();
  
  await page.goto(url);
  
  // 填写表单
  await page.type('#email', formData.email);
  await page.type('#password', formData.password);
  
  // 检查客户端验证
  const validation = await page.evaluate(() => {
    const emailInput = document.querySelector('#email');
    const passwordInput = document.querySelector('#password');
    
    return {
      emailValid: emailInput.checkValidity(),
      passwordValid: passwordInput.checkValidity(),
      emailError: emailInput.validationMessage,
      passwordError: passwordInput.validationMessage
    };
  });
  
  console.log('Validation:', validation);
  
  await browser.close();
  return validation;
}

下一步 #

现在你已经掌握了 Puppeteer 的 JavaScript 执行与页面评估功能,接下来学习 网络请求处理 了解如何拦截和修改网络请求!

最后更新:2026-03-28