Selenium 高级处理 #

多窗口处理 #

窗口句柄 #

text
┌─────────────────────────────────────────────────────────────┐
│                    窗口句柄管理                               │
├─────────────────────────────────────────────────────────────┤
│                                                             │
│   主窗口 (handle_1)                                          │
│   ┌─────────────────────────────────────────────────────┐   │
│   │  点击链接 ──> 打开新窗口                              │   │
│   └─────────────────────────────────────────────────────┘   │
│                         │                                   │
│                         ▼                                   │
│   新窗口 (handle_2)                                          │
│   ┌─────────────────────────────────────────────────────┐   │
│   │  在新窗口中操作                                       │   │
│   └─────────────────────────────────────────────────────┘   │
│                         │                                   │
│                         ▼                                   │
│   关闭新窗口 ──> 切换回主窗口                                 │
│                                                             │
└─────────────────────────────────────────────────────────────┘

基本操作 #

python
from selenium import webdriver
from selenium.webdriver.common.by import By

driver = webdriver.Chrome()
driver.get("https://example.com")

# 获取当前窗口句柄
main_window = driver.current_window_handle
print(f"主窗口句柄: {main_window}")

# 获取所有窗口句柄
all_handles = driver.window_handles
print(f"所有窗口句柄: {all_handles}")

# 点击打开新窗口
driver.find_element(By.LINK_TEXT, "打开新窗口").click()

# 获取新的窗口句柄列表
new_handles = driver.window_handles

# 找到新窗口的句柄
new_window = None
for handle in new_handles:
    if handle != main_window:
        new_window = handle
        break

# 切换到新窗口
driver.switch_to.window(new_window)

# 在新窗口中操作
print(f"新窗口标题: {driver.title}")

# 关闭新窗口
driver.close()

# 切换回主窗口
driver.switch_to.window(main_window)

driver.quit()

多窗口切换工具类 #

python
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class WindowManager:
    def __init__(self, driver):
        self.driver = driver
    
    def get_main_window(self):
        """获取主窗口句柄"""
        return self.driver.current_window_handle
    
    def switch_to_new_window(self, timeout=10):
        """切换到新打开的窗口"""
        current_handles = self.driver.window_handles
        WebDriverWait(self.driver, timeout).until(
            lambda d: len(d.window_handles) > len(current_handles)
        )
        new_handles = self.driver.window_handles
        new_window = [h for h in new_handles if h not in current_handles][0]
        self.driver.switch_to.window(new_window)
        return new_window
    
    def switch_to_window_by_title(self, title, timeout=10):
        """通过标题切换窗口"""
        start_time = time.time()
        while time.time() - start_time < timeout:
            for handle in self.driver.window_handles:
                self.driver.switch_to.window(handle)
                if title in self.driver.title:
                    return handle
            time.sleep(0.5)
        raise Exception(f"未找到标题包含 '{title}' 的窗口")
    
    def switch_to_window_by_url(self, url_pattern, timeout=10):
        """通过 URL 切换窗口"""
        import time
        start_time = time.time()
        while time.time() - start_time < timeout:
            for handle in self.driver.window_handles:
                self.driver.switch_to.window(handle)
                if url_pattern in self.driver.current_url:
                    return handle
            time.sleep(0.5)
        raise Exception(f"未找到 URL 包含 '{url_pattern}' 的窗口")
    
    def close_current_window_and_switch_to_main(self, main_handle):
        """关闭当前窗口并切换回主窗口"""
        self.driver.close()
        self.driver.switch_to.window(main_handle)
    
    def close_all_except_main(self, main_handle):
        """关闭除主窗口外的所有窗口"""
        for handle in self.driver.window_handles:
            if handle != main_handle:
                self.driver.switch_to.window(handle)
                self.driver.close()
        self.driver.switch_to.window(main_handle)

import time

# 使用示例
driver = webdriver.Chrome()
driver.get("https://example.com")

window_manager = WindowManager(driver)
main_window = window_manager.get_main_window()

# 打开新窗口
driver.find_element(By.LINK_TEXT, "打开新窗口").click()

# 切换到新窗口
window_manager.switch_to_new_window()

# 操作后关闭并返回主窗口
window_manager.close_current_window_and_switch_to_main(main_window)

driver.quit()

iframe 处理 #

iframe 结构 #

text
┌─────────────────────────────────────────────────────────────┐
│                      主页面                                   │
│  ┌───────────────────────────────────────────────────────┐  │
│  │                                                       │  │
│  │  ┌─────────────────────────────────────────────────┐  │  │
│  │  │                   iframe                         │  │  │
│  │  │  ┌─────────────────────────────────────────────┐│  │  │
│  │  │  │                                             ││  │  │
│  │  │  │   iframe 内的元素                            ││  │  │
│  │  │  │                                             ││  │  │
│  │  │  └─────────────────────────────────────────────┘│  │  │
│  │  │                                                 │  │  │
│  │  └─────────────────────────────────────────────────┘  │  │
│  │                                                       │  │
│  └───────────────────────────────────────────────────────┘  │
│                                                             │
└─────────────────────────────────────────────────────────────┘

基本操作 #

python
from selenium import webdriver
from selenium.webdriver.common.by import By

driver = webdriver.Chrome()
driver.get("https://example.com")

# 方式一:通过索引切换(从 0 开始)
driver.switch_to.frame(0)

# 方式二:通过 name 或 id 属性切换
driver.switch_to.frame("frame-name")
driver.switch_to.frame("frame-id")

# 方式三:通过 WebElement 切换
iframe = driver.find_element(By.TAG_NAME, "iframe")
driver.switch_to.frame(iframe)

# 在 iframe 中操作
driver.find_element(By.ID, "button").click()

# 切换回主文档
driver.switch_to.default_content()

# 切换到父级 frame(嵌套 iframe)
driver.switch_to.parent_frame()

driver.quit()

嵌套 iframe 处理 #

python
from selenium import webdriver
from selenium.webdriver.common.by import By

driver = webdriver.Chrome()
driver.get("https://example.com")

# 嵌套 iframe 结构
# 主页面 -> iframe1 -> iframe2 -> iframe3

# 切换到第一层 iframe
driver.switch_to.frame("iframe1")

# 切换到第二层 iframe
driver.switch_to.frame("iframe2")

# 切换到第三层 iframe
driver.switch_to.frame("iframe3")

# 在最内层 iframe 中操作
driver.find_element(By.ID, "element").click()

# 切换回第二层 iframe
driver.switch_to.parent_frame()

# 切换回主文档
driver.switch_to.default_content()

driver.quit()

iframe 工具类 #

python
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class FrameHelper:
    def __init__(self, driver):
        self.driver = driver
    
    def switch_to_frame(self, frame_reference, timeout=10):
        """切换到指定 frame"""
        if isinstance(frame_reference, int):
            WebDriverWait(self.driver, timeout).until(
                EC.frame_to_be_available_and_switch_to_it(frame_reference)
            )
        elif isinstance(frame_reference, str):
            WebDriverWait(self.driver, timeout).until(
                EC.frame_to_be_available_and_switch_to_it((By.ID, frame_reference))
            )
        else:
            WebDriverWait(self.driver, timeout).until(
                EC.frame_to_be_available_and_switch_to_it(frame_reference)
            )
    
    def switch_to_default(self):
        """切换回主文档"""
        self.driver.switch_to.default_content()
    
    def switch_to_parent(self):
        """切换到父级 frame"""
        self.driver.switch_to.parent_frame()
    
    def execute_in_frame(self, frame_reference, func, timeout=10):
        """在 frame 中执行操作后自动返回"""
        self.switch_to_frame(frame_reference, timeout)
        try:
            result = func()
            return result
        finally:
            self.switch_to_default()

# 使用示例
driver = webdriver.Chrome()
driver.get("https://example.com")

frame_helper = FrameHelper(driver)

# 在 iframe 中执行操作
def click_button():
    driver.find_element(By.ID, "button").click()
    return "clicked"

result = frame_helper.execute_in_frame("iframe-id", click_button)
print(result)

driver.quit()

弹窗处理 #

JavaScript 弹窗类型 #

text
┌─────────────────────────────────────────────────────────────┐
│                    JavaScript 弹窗类型                        │
├─────────────────────────────────────────────────────────────┤
│                                                             │
│  Alert(警告框)                                             │
│  ┌─────────────────────────────────────────────────────┐   │
│  │  [提示信息]                                          │   │
│  │                                                     │   │
│  │                              [确定]                  │   │
│  └─────────────────────────────────────────────────────┘   │
│                                                             │
│  Confirm(确认框)                                           │
│  ┌─────────────────────────────────────────────────────┐   │
│  │  [提示信息]                                          │   │
│  │                                                     │   │
│  │              [确定]        [取消]                    │   │
│  └─────────────────────────────────────────────────────┘   │
│                                                             │
│  Prompt(输入框)                                            │
│  ┌─────────────────────────────────────────────────────┐   │
│  │  [提示信息]                                          │   │
│  │  [________________]                                  │   │
│  │                                                     │   │
│  │              [确定]        [取消]                    │   │
│  └─────────────────────────────────────────────────────┘   │
│                                                             │
└─────────────────────────────────────────────────────────────┘

基本操作 #

python
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

driver = webdriver.Chrome()
driver.get("https://example.com")

# 触发弹窗
driver.find_element(By.ID, "alert-button").click()

# 等待弹窗出现
WebDriverWait(driver, 10).until(EC.alert_is_present())

# 获取弹窗对象
alert = driver.switch_to.alert

# 获取弹窗文本
text = alert.text
print(f"弹窗文本: {text}")

# 接受弹窗(点击确定)
alert.accept()

# 取消弹窗(点击取消)
alert.dismiss()

# 在 prompt 弹窗中输入文本
alert.send_keys("输入内容")
alert.accept()

driver.quit()

弹窗处理工具类 #

python
from selenium import webdriver
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC

class AlertHelper:
    def __init__(self, driver, timeout=10):
        self.driver = driver
        self.timeout = timeout
    
    def wait_for_alert(self):
        """等待弹窗出现"""
        return WebDriverWait(self.driver, self.timeout).until(EC.alert_is_present())
    
    def accept_alert(self):
        """接受弹窗"""
        alert = self.wait_for_alert()
        alert.accept()
    
    def dismiss_alert(self):
        """取消弹窗"""
        alert = self.wait_for_alert()
        alert.dismiss()
    
    def get_alert_text(self):
        """获取弹窗文本"""
        alert = self.wait_for_alert()
        return alert.text
    
    def send_keys_to_alert(self, text):
        """在弹窗中输入文本"""
        alert = self.wait_for_alert()
        alert.send_keys(text)
        alert.accept()
    
    def handle_alert(self, accept=True, text=None):
        """统一处理弹窗"""
        alert = self.wait_for_alert()
        alert_text = alert.text
        
        if text:
            alert.send_keys(text)
        
        if accept:
            alert.accept()
        else:
            alert.dismiss()
        
        return alert_text

# 使用示例
driver = webdriver.Chrome()
driver.get("https://example.com")

alert_helper = AlertHelper(driver)

# 处理确认弹窗
alert_text = alert_helper.handle_alert(accept=True)
print(f"已确认弹窗: {alert_text}")

# 处理输入弹窗
alert_helper.send_keys_to_alert("输入内容")

driver.quit()

文件上传 #

普通文件上传 #

python
from selenium import webdriver
from selenium.webdriver.common.by import By
import os

driver = webdriver.Chrome()
driver.get("https://example.com/upload")

# 方式一:直接发送文件路径
file_input = driver.find_element(By.ID, "file-upload")
file_path = "/path/to/file.txt"
file_input.send_keys(file_path)

# 方式二:使用绝对路径
file_path = os.path.abspath("./test_file.txt")
file_input.send_keys(file_path)

# 提交上传
driver.find_element(By.ID, "upload-button").click()

driver.quit()

多文件上传 #

python
from selenium import webdriver
from selenium.webdriver.common.by import By

driver = webdriver.Chrome()
driver.get("https://example.com/upload")

file_input = driver.find_element(By.ID, "multi-file-upload")

# 多文件上传(使用换行符分隔)
files = "/path/to/file1.txt\n/path/to/file2.txt\n/path/to/file3.txt"
file_input.send_keys(files)

driver.quit()

使用 AutoIt 处理系统对话框(Windows) #

python
import os
import subprocess
from selenium import webdriver
from selenium.webdriver.common.by import By

driver = webdriver.Chrome()
driver.get("https://example.com/upload")

# 点击上传按钮触发系统对话框
driver.find_element(By.ID, "upload-button").click()

# 使用 AutoIt 脚本处理系统对话框
# upload.au3 内容:
# ControlFocus("打开", "", "Edit1")
# ControlSetText("打开", "", "Edit1", "C:\path\to\file.txt")
# ControlClick("打开", "", "Button1")

autoit_script = os.path.abspath("upload.au3")
subprocess.run(["AutoIt3.exe", autoit_script])

driver.quit()

使用 pyautogui 处理系统对话框 #

python
import time
import pyautogui
from selenium import webdriver
from selenium.webdriver.common.by import By

driver = webdriver.Chrome()
driver.get("https://example.com/upload")

# 点击上传按钮
driver.find_element(By.ID, "upload-button").click()

# 等待对话框出现
time.sleep(1)

# 使用 pyautogui 输入文件路径
pyautogui.write("/path/to/file.txt")
pyautogui.press("enter")

driver.quit()

文件下载 #

Chrome 下载配置 #

python
from selenium import webdriver
from selenium.webdriver.chrome.options import Options
import os

download_dir = os.path.abspath("./downloads")

options = Options()
prefs = {
    "download.default_directory": download_dir,
    "download.prompt_for_download": False,
    "download.directory_upgrade": True,
    "safebrowsing.enabled": True
}
options.add_experimental_option("prefs", prefs)

driver = webdriver.Chrome(options=options)
driver.get("https://example.com/download")

# 点击下载
driver.find_element(By.LINK_TEXT, "下载文件").click()

# 等待下载完成
import time
def wait_for_download(directory, timeout=60):
    """等待文件下载完成"""
    start_time = time.time()
    while time.time() - start_time < timeout:
        files = os.listdir(directory)
        # 检查是否有 .crdownload 文件(正在下载)
        downloading = [f for f in files if f.endswith('.crdownload')]
        if not downloading and files:
            return os.path.join(directory, files[0])
        time.sleep(1)
    raise Exception("下载超时")

downloaded_file = wait_for_download(download_dir)
print(f"下载完成: {downloaded_file}")

driver.quit()

Firefox 下载配置 #

python
from selenium import webdriver
from selenium.webdriver.firefox.options import Options
import os

download_dir = os.path.abspath("./downloads")

options = Options()
options.set_preference("browser.download.folderList", 2)
options.set_preference("browser.download.dir", download_dir)
options.set_preference("browser.download.manager.showWhenStarting", False)
options.set_preference("browser.helperApps.neverAsk.saveToDisk", 
    "application/pdf,application/zip,application/octet-stream")

driver = webdriver.Firefox(options=options)
driver.get("https://example.com/download")

driver.find_element(By.LINK_TEXT, "下载文件").click()

driver.quit()
python
from selenium import webdriver

driver = webdriver.Chrome()
driver.get("https://example.com")

# 获取所有 Cookie
cookies = driver.get_cookies()
for cookie in cookies:
    print(f"名称: {cookie['name']}, 值: {cookie['value']}")

# 获取指定 Cookie
session_cookie = driver.get_cookie("session_id")
if session_cookie:
    print(f"Session ID: {session_cookie['value']}")

# 添加 Cookie
driver.add_cookie({
    'name': 'test_cookie',
    'value': 'test_value',
    'domain': 'example.com',
    'path': '/',
    'secure': True,
    'httpOnly': False
})

# 删除指定 Cookie
driver.delete_cookie("test_cookie")

# 删除所有 Cookie
driver.delete_all_cookies()

driver.quit()
python
from selenium import webdriver
import json
import os

def login_with_cookies():
    driver = webdriver.Chrome()
    
    cookie_file = "cookies.json"
    
    if os.path.exists(cookie_file):
        # 使用已保存的 Cookie 登录
        driver.get("https://example.com")
        
        with open(cookie_file, "r") as f:
            cookies = json.load(f)
        
        for cookie in cookies:
            driver.add_cookie(cookie)
        
        driver.refresh()
        print("使用 Cookie 登录成功")
    else:
        # 手动登录并保存 Cookie
        driver.get("https://example.com/login")
        
        # ... 执行登录操作 ...
        
        # 保存 Cookie
        cookies = driver.get_cookies()
        with open(cookie_file, "w") as f:
            json.dump(cookies, f)
        
        print("登录成功,Cookie 已保存")
    
    return driver

driver = login_with_cookies()

JavaScript 执行 #

执行 JavaScript #

python
from selenium import webdriver
from selenium.webdriver.common.by import By

driver = webdriver.Chrome()
driver.get("https://example.com")

# 执行简单 JavaScript
driver.execute_script("alert('Hello');")

# 执行并获取返回值
title = driver.execute_script("return document.title;")
print(f"页面标题: {title}")

# 执行带参数的 JavaScript
element = driver.find_element(By.ID, "username")
driver.execute_script("arguments[0].value = 'test';", element)

# 修改元素样式
driver.execute_script("arguments[0].style.border = '2px solid red';", element)

# 移除元素
driver.execute_script("arguments[0].remove();", element)

# 滚动页面
driver.execute_script("window.scrollTo(0, document.body.scrollHeight);")

driver.quit()

常用 JavaScript 操作 #

python
from selenium import webdriver
from selenium.webdriver.common.by import By

driver = webdriver.Chrome()
driver.get("https://example.com")

# 获取页面信息
scroll_height = driver.execute_script("return document.body.scrollHeight;")
viewport_height = driver.execute_script("return window.innerHeight;")
scroll_top = driver.execute_script("return document.documentElement.scrollTop;")

# 滚动到元素
element = driver.find_element(By.ID, "target")
driver.execute_script("arguments[0].scrollIntoView({block: 'center'});", element)

# 高亮元素
def highlight_element(driver, element, duration=2):
    original_style = element.get_attribute("style")
    driver.execute_script(
        "arguments[0].setAttribute('style', 'border: 3px solid red; background: yellow;');",
        element
    )
    import time
    time.sleep(duration)
    driver.execute_script(f"arguments[0].setAttribute('style', '{original_style}');", element)

highlight_element(driver, element)

# 触发事件
driver.execute_script("arguments[0].dispatchEvent(new Event('change'));", element)
driver.execute_script("arguments[0].click();", element)

# 获取元素信息
element_text = driver.execute_script("return arguments[0].innerText;", element)
element_html = driver.execute_script("return arguments[0].outerHTML;", element)

driver.quit()

实战示例 #

完整登录流程 #

python
from selenium import webdriver
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.webdriver.chrome.options import Options
import os

def complete_login_flow():
    download_dir = os.path.abspath("./downloads")
    os.makedirs(download_dir, exist_ok=True)
    
    options = Options()
    prefs = {
        "download.default_directory": download_dir,
        "download.prompt_for_download": False,
    }
    options.add_experimental_option("prefs", prefs)
    
    driver = webdriver.Chrome(options=options)
    wait = WebDriverWait(driver, 10)
    
    try:
        driver.get("https://example.com/login")
        driver.maximize_window()
        
        wait.until(EC.visibility_of_element_located((By.ID, "login-form")))
        
        driver.find_element(By.ID, "username").send_keys("test_user")
        driver.find_element(By.ID, "password").send_keys("test_password")
        
        login_button = wait.until(EC.element_to_be_clickable((By.ID, "login-button")))
        login_button.click()
        
        wait.until(EC.invisibility_of_element_located((By.ID, "loading")))
        
        wait.until(EC.url_contains("dashboard"))
        
        welcome = wait.until(EC.visibility_of_element_located((By.ID, "welcome")))
        print(f"登录成功: {welcome.text}")
        
        cookies = driver.get_cookies()
        print(f"获取到 {len(cookies)} 个 Cookie")
        
        return driver
        
    except Exception as e:
        print(f"登录失败: {e}")
        driver.quit()
        raise

driver = complete_login_flow()

下一步 #

掌握了高级处理后,接下来学习 Actions 链操作 了解更多复杂交互操作!

最后更新:2026-03-28