Selenium 基础使用 #

WebDriver 核心概念 #

架构概览 #

text
┌─────────────────────────────────────────────────────────────┐
│                    WebDriver 工作流程                        │
├─────────────────────────────────────────────────────────────┤
│                                                             │
│   测试脚本                                                    │
│      │                                                      │
│      ▼                                                      │
│   WebDriver API ──── 统一的编程接口                          │
│      │                                                      │
│      ▼                                                      │
│   JSON Wire Protocol ──── 标准化通信协议                     │
│      │                                                      │
│      ▼                                                      │
│   Browser Driver ──── 浏览器驱动程序                         │
│      │                                                      │
│      ▼                                                      │
│   浏览器 ──── 执行实际操作                                    │
│                                                             │
└─────────────────────────────────────────────────────────────┘

核心对象 #

python
from selenium import webdriver
from selenium.webdriver.common.by import By

# WebDriver - 浏览器驱动对象
driver = webdriver.Chrome()

# WebElement - 页面元素对象
element = driver.find_element(By.ID, "username")

浏览器控制 #

创建和关闭浏览器 #

python
from selenium import webdriver
from selenium.webdriver.chrome.options import Options

# 创建浏览器实例
driver = webdriver.Chrome()

# 使用配置创建
options = Options()
options.add_argument('--headless')
driver = webdriver.Chrome(options=options)

# 关闭当前窗口
driver.close()

# 关闭浏览器并结束进程
driver.quit()

窗口管理 #

python
from selenium import webdriver

driver = webdriver.Chrome()

# 最大化窗口
driver.maximize_window()

# 最小化窗口
driver.minimize_window()

# 全屏
driver.fullscreen_window()

# 设置窗口大小
driver.set_window_size(1024, 768)

# 获取窗口大小
size = driver.get_window_size()
print(f"宽度: {size['width']}, 高度: {size['height']}")

# 设置窗口位置
driver.set_window_position(100, 100)

# 获取窗口位置
position = driver.get_window_position()
print(f"X: {position['x']}, Y: {position['y']}")

# 获取窗口句柄
handle = driver.current_window_handle
print(f"当前窗口句柄: {handle}")

# 获取所有窗口句柄
handles = driver.window_handles
print(f"所有窗口句柄: {handles}")

多窗口切换 #

python
from selenium import webdriver

driver = webdriver.Chrome()
driver.get("https://example.com")

# 保存当前窗口句柄
main_window = driver.current_window_handle

# 点击打开新窗口的链接
driver.find_element(By.LINK_TEXT, "Open New Window").click()

# 切换到新窗口
for handle in driver.window_handles:
    if handle != main_window:
        driver.switch_to.window(handle)
        break

# 在新窗口中操作
print(driver.title)

# 关闭新窗口
driver.close()

# 切换回主窗口
driver.switch_to.window(main_window)

driver.quit()

页面导航 #

基本导航操作 #

python
from selenium import webdriver

driver = webdriver.Chrome()

# 打开网页
driver.get("https://www.baidu.com")

# 获取当前 URL
current_url = driver.current_url
print(f"当前 URL: {current_url}")

# 获取页面标题
title = driver.title
print(f"页面标题: {title}")

# 后退
driver.back()

# 前进
driver.forward()

# 刷新页面
driver.refresh()

driver.quit()

页面源码操作 #

python
from selenium import webdriver

driver = webdriver.Chrome()
driver.get("https://example.com")

# 获取页面源码
page_source = driver.page_source
print(page_source[:500])  # 打印前 500 个字符

# 检查页面内容
if "Welcome" in driver.page_source:
    print("页面包含 Welcome")

driver.quit()

元素基础操作 #

查找单个元素 #

python
from selenium import webdriver
from selenium.webdriver.common.by import By

driver = webdriver.Chrome()
driver.get("https://example.com")

# 各种定位方式
element_by_id = driver.find_element(By.ID, "username")
element_by_name = driver.find_element(By.NAME, "password")
element_by_class = driver.find_element(By.CLASS_NAME, "btn-primary")
element_by_tag = driver.find_element(By.TAG_NAME, "input")
element_by_css = driver.find_element(By.CSS_SELECTOR, "#username")
element_by_xpath = driver.find_element(By.XPATH, "//input[@id='username']")
element_by_link = driver.find_element(By.LINK_TEXT, "登录")
element_by_partial_link = driver.find_element(By.PARTIAL_LINK_TEXT, "登")

driver.quit()

查找多个元素 #

python
from selenium import webdriver
from selenium.webdriver.common.by import By

driver = webdriver.Chrome()
driver.get("https://example.com")

# 查找多个元素,返回列表
elements = driver.find_elements(By.CLASS_NAME, "item")

# 遍历所有元素
for element in elements:
    print(element.text)

# 检查元素数量
print(f"找到 {len(elements)} 个元素")

driver.quit()

元素信息获取 #

python
from selenium import webdriver
from selenium.webdriver.common.by import By

driver = webdriver.Chrome()
driver.get("https://example.com")

element = driver.find_element(By.ID, "username")

# 获取元素文本
text = element.text
print(f"文本内容: {text}")

# 获取属性值
value = element.get_attribute("value")
placeholder = element.get_attribute("placeholder")
class_name = element.get_attribute("class")
print(f"值: {value}, 占位符: {placeholder}")

# 获取 CSS 属性
css_value = element.value_of_css_property("color")
print(f"颜色: {css_value}")

# 获取元素位置和大小
location = element.location
size = element.size
print(f"位置: x={location['x']}, y={location['y']}")
print(f"大小: width={size['width']}, height={size['height']}")

# 获取矩形信息
rect = element.rect
print(f"矩形: {rect}")

driver.quit()

元素状态检查 #

python
from selenium import webdriver
from selenium.webdriver.common.by import By

driver = webdriver.Chrome()
driver.get("https://example.com")

element = driver.find_element(By.ID, "submit")

# 检查是否显示
is_displayed = element.is_displayed()
print(f"是否显示: {is_displayed}")

# 检查是否可用
is_enabled = element.is_enabled()
print(f"是否可用: {is_enabled}")

# 检查是否选中(用于复选框、单选框)
is_selected = element.is_selected()
print(f"是否选中: {is_selected}")

driver.quit()

表单操作 #

文本输入 #

python
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys

driver = webdriver.Chrome()
driver.get("https://example.com/login")

# 找到输入框
username = driver.find_element(By.ID, "username")

# 清空输入框
username.clear()

# 输入文本
username.send_keys("test_user")

# 模拟键盘输入
username.send_keys(Keys.ENTER)
username.send_keys(Keys.TAB)

# 慢速输入(模拟真实用户)
import time
text = "Hello World"
for char in text:
    username.send_keys(char)
    time.sleep(0.1)

driver.quit()

下拉选择框 #

python
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import Select

driver = webdriver.Chrome()
driver.get("https://example.com/form")

# 找到下拉框元素
select_element = driver.find_element(By.ID, "country")
select = Select(select_element)

# 通过索引选择
select.select_by_index(0)

# 通过 value 属性选择
select.select_by_value("china")

# 通过可见文本选择
select.select_by_visible_text("中国")

# 获取所有选项
options = select.options
for option in options:
    print(option.text)

# 获取当前选中项
selected = select.first_selected_option
print(f"当前选中: {selected.text}")

# 获取所有选中项(多选下拉框)
all_selected = select.all_selected_options

# 取消选择
select.deselect_all()
select.deselect_by_value("china")
select.deselect_by_visible_text("中国")
select.deselect_by_index(0)

driver.quit()

复选框和单选框 #

python
from selenium import webdriver
from selenium.webdriver.common.by import By

driver = webdriver.Chrome()
driver.get("https://example.com/form")

# 复选框
checkbox = driver.find_element(By.ID, "agree")

# 检查是否选中
if not checkbox.is_selected():
    checkbox.click()

# 取消选中
if checkbox.is_selected():
    checkbox.click()

# 单选框
radio = driver.find_element(By.ID, "gender-male")

# 选中单选框
if not radio.is_selected():
    radio.click()

driver.quit()

文件上传 #

python
from selenium import webdriver
from selenium.webdriver.common.by import By

driver = webdriver.Chrome()
driver.get("https://example.com/upload")

# 方式一:直接发送文件路径
file_input = driver.find_element(By.ID, "file-upload")
file_input.send_keys("/path/to/file.txt")

# 方式二:使用绝对路径
import os
file_path = os.path.abspath("./test_file.txt")
file_input.send_keys(file_path)

driver.quit()

点击操作 #

基本点击 #

python
from selenium import webdriver
from selenium.webdriver.common.by import By

driver = webdriver.Chrome()
driver.get("https://example.com")

# 找到按钮
button = driver.find_element(By.ID, "submit")

# 点击按钮
button.click()

driver.quit()

JavaScript 点击 #

python
from selenium import webdriver
from selenium.webdriver.common.by import By

driver = webdriver.Chrome()
driver.get("https://example.com")

element = driver.find_element(By.ID, "hidden-button")

# 使用 JavaScript 点击(适用于被遮挡的元素)
driver.execute_script("arguments[0].click();", element)

driver.quit()

滚动操作 #

页面滚动 #

python
from selenium import webdriver

driver = webdriver.Chrome()
driver.get("https://example.com")

# 滚动到页面底部
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

# 滚动到页面顶部
driver.execute_script("window.scrollTo(0, 0);")

# 滚动指定距离
driver.execute_script("window.scrollBy(0, 500);")

# 滚动到指定位置
driver.execute_script("window.scrollTo(500, 300);")

driver.quit()

滚动到元素 #

python
from selenium import webdriver
from selenium.webdriver.common.by import By

driver = webdriver.Chrome()
driver.get("https://example.com")

element = driver.find_element(By.ID, "target-element")

# 滚动到元素可见
driver.execute_script("arguments[0].scrollIntoView();", element)

# 滚动到元素顶部对齐
driver.execute_script("arguments[0].scrollIntoView({block: 'start'});", element)

# 滚动到元素底部对齐
driver.execute_script("arguments[0].scrollIntoView({block: 'end'});", element)

# 滚动到元素居中
driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", element)

driver.quit()

iframe 处理 #

切换到 iframe #

python
from selenium import webdriver
from selenium.webdriver.common.by import By

driver = webdriver.Chrome()
driver.get("https://example.com")

# 方式一:通过索引切换
driver.switch_to.frame(0)

# 方式二:通过 name 或 id 切换
driver.switch_to.frame("frame-name")
driver.switch_to.frame("frame-id")

# 方式三:通过 WebElement 切换
iframe = driver.find_element(By.TAG_NAME, "iframe")
driver.switch_to.frame(iframe)

# 在 iframe 中操作
driver.find_element(By.ID, "button").click()

# 切换回主文档
driver.switch_to.default_content()

# 切换到父级 frame
driver.switch_to.parent_frame()

driver.quit()

弹窗处理 #

JavaScript 弹窗 #

python
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

driver = webdriver.Chrome()
driver.get("https://example.com")

# 触发弹窗
driver.find_element(By.ID, "alert-button").click()

# 等待弹窗出现
WebDriverWait(driver, 10).until(EC.alert_is_present())

# 获取弹窗对象
alert = driver.switch_to.alert

# 获取弹窗文本
print(alert.text)

# 接受弹窗(点击确定)
alert.accept()

# 取消弹窗(点击取消)
alert.dismiss()

# 在弹窗中输入文本(prompt 类型)
alert.send_keys("输入内容")
alert.accept()

driver.quit()
python
from selenium import webdriver

driver = webdriver.Chrome()
driver.get("https://example.com")

# 获取所有 Cookie
cookies = driver.get_cookies()
for cookie in cookies:
    print(cookie)

# 获取指定 Cookie
cookie = driver.get_cookie("session_id")
print(cookie)

# 添加 Cookie
driver.add_cookie({
    'name': 'test_cookie',
    'value': 'test_value',
    'domain': 'example.com',
    'path': '/',
    'secure': True
})

# 删除指定 Cookie
driver.delete_cookie("test_cookie")

# 删除所有 Cookie
driver.delete_all_cookies()

driver.quit()

截图操作 #

页面截图 #

python
from selenium import webdriver
import time

driver = webdriver.Chrome()
driver.get("https://example.com")

# 保存整页截图
driver.save_screenshot("full_page.png")

# 另一种方式
screenshot = driver.get_screenshot_as_png()
with open("screenshot.png", "wb") as f:
    f.write(screenshot)

# 保存为 base64
base64_screenshot = driver.get_screenshot_as_base64()
print(base64_screenshot[:100])

driver.quit()

元素截图 #

python
from selenium import webdriver
from selenium.webdriver.common.by import By

driver = webdriver.Chrome()
driver.get("https://example.com")

element = driver.find_element(By.ID, "logo")

# 元素截图
element.screenshot("element.png")

driver.quit()

JavaScript 执行 #

执行 JavaScript 代码 #

python
from selenium import webdriver

driver = webdriver.Chrome()
driver.get("https://example.com")

# 执行简单 JavaScript
driver.execute_script("alert('Hello');")

# 执行并获取返回值
title = driver.execute_script("return document.title;")
print(f"标题: {title}")

# 执行带参数的 JavaScript
element = driver.find_element(By.ID, "username")
driver.execute_script("arguments[0].value = 'test';", element)

# 执行异步 JavaScript
driver.execute_async_script("""
    var callback = arguments[arguments.length - 1];
    setTimeout(function() {
        callback('Done');
    }, 1000);
""")

driver.quit()

常用 JavaScript 操作 #

python
from selenium import webdriver
from selenium.webdriver.common.by import By

driver = webdriver.Chrome()
driver.get("https://example.com")

# 修改元素样式
element = driver.find_element(By.ID, "banner")
driver.execute_script("arguments[0].style.display = 'none';", element)

# 获取元素属性
value = driver.execute_script("return arguments[0].getAttribute('value');", element)

# 移除元素
driver.execute_script("arguments[0].remove();", element)

# 获取页面滚动高度
scroll_height = driver.execute_script("return document.body.scrollHeight;")

# 获取浏览器窗口大小
window_size = driver.execute_script("return [window.innerWidth, window.innerHeight];")

driver.quit()

实战示例 #

登录流程示例 #

python
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

def login_test():
    driver = webdriver.Chrome()
    
    try:
        driver.get("https://example.com/login")
        driver.maximize_window()
        
        username = driver.find_element(By.ID, "username")
        password = driver.find_element(By.ID, "password")
        submit = driver.find_element(By.ID, "submit")
        
        username.clear()
        username.send_keys("test_user")
        
        password.clear()
        password.send_keys("test_password")
        
        submit.click()
        
        WebDriverWait(driver, 10).until(
            EC.title_contains("Dashboard")
        )
        
        assert "Dashboard" in driver.title
        print("登录测试通过")
        
    finally:
        driver.quit()

if __name__ == "__main__":
    login_test()

搜索流程示例 #

python
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

def search_test():
    driver = webdriver.Chrome()
    
    try:
        driver.get("https://www.baidu.com")
        
        search_box = driver.find_element(By.ID, "kw")
        search_box.send_keys("Selenium 自动化测试")
        search_box.send_keys(Keys.ENTER)
        
        WebDriverWait(driver, 10).until(
            EC.presence_of_element_located((By.ID, "content_left"))
        )
        
        results = driver.find_elements(By.CSS_SELECTOR, ".result")
        print(f"找到 {len(results)} 条结果")
        
        for i, result in enumerate(results[:5], 1):
            title = result.find_element(By.TAG_NAME, "h3").text
            print(f"{i}. {title}")
        
    finally:
        driver.quit()

if __name__ == "__main__":
    search_test()

下一步 #

掌握了基础操作后,接下来学习 元素定位 深入了解各种定位策略!

最后更新:2026-03-28