正则表达式 #

一、regexp包概述 #

regexp包实现正则表达式搜索。

二、基本用法 #

2.1 编译正则表达式 #

import "regexp"

re, err := regexp.Compile(`\d+`)
if err != nil {
    fmt.Println("正则表达式错误:", err)
}

re = regexp.MustCompile(`\d+`)  // panic if error

2.2 匹配检查 #

re := regexp.MustCompile(`\d+`)

fmt.Println(re.MatchString("123"))      // true
fmt.Println(re.MatchString("abc"))      // false
fmt.Println(re.Match([]byte("123")))    // true

三、查找操作 #

3.1 FindString #

查找第一个匹配：

re := regexp.MustCompile(`\d+`)

fmt.Println(re.FindString("abc123def456"))  // "123"

3.2 FindAllString #

查找所有匹配：

re := regexp.MustCompile(`\d+`)

matches := re.FindAllString("abc123def456", -1)
fmt.Println(matches)  // ["123", "456"]

第二个参数限制匹配数量，-1表示全部。

3.3 FindStringIndex #

查找匹配位置：

re := regexp.MustCompile(`\d+`)

loc := re.FindStringIndex("abc123def")
fmt.Println(loc)  // [3, 6]

3.4 FindAllStringIndex #

查找所有匹配位置：

re := regexp.MustCompile(`\d+`)

locs := re.FindAllStringIndex("abc123def456", -1)
fmt.Println(locs)  // [[3, 6], [9, 12]]

3.5 FindStringSubmatch #

查找子匹配：

re := regexp.MustCompile(`(\w+)@(\w+\.\w+)`)

match := re.FindStringSubmatch("email: test@example.com")
fmt.Println(match)      // ["test@example.com", "test", "example.com"]
fmt.Println(match[0])   // "test@example.com"
fmt.Println(match[1])   // "test"
fmt.Println(match[2])   // "example.com"

3.6 FindAllStringSubmatch #

查找所有子匹配：

re := regexp.MustCompile(`(\d+)-(\d+)`)

matches := re.FindAllStringSubmatch("1-2, 3-4, 5-6", -1)
fmt.Println(matches)
// [["1-2" "1" "2"] ["3-4" "3" "4"] ["5-6" "5" "6"]]

四、替换操作 #

4.1 ReplaceAllString #

替换所有匹配：

re := regexp.MustCompile(`\d+`)

result := re.ReplaceAllString("abc123def456", "X")
fmt.Println(result)  // "abcXdefX"

4.2 ReplaceAllStringFunc #

使用函数替换：

re := regexp.MustCompile(`\d+`)

result := re.ReplaceAllStringFunc("abc123def456", func(s string) string {
    n, _ := strconv.Atoi(s)
    return strconv.Itoa(n * 2)
})
fmt.Println(result)  // "abc246def912"

4.3 ReplaceAll #

字节切片替换：

re := regexp.MustCompile(`\d+`)

result := re.ReplaceAll([]byte("abc123"), []byte("X"))
fmt.Println(string(result))  // "abcX"

五、分割操作 #

5.1 Split #

re := regexp.MustCompile(`[,\s]+`)

parts := re.Split("a, b, c, d", -1)
fmt.Println(parts)  // ["a", "b", "c", "d"]

六、正则表达式语法 #

6.1 常用元字符 #

语法	说明
.	任意字符
^	行首
$	行尾
*	0次或多次
+	1次或多次
?	0次或1次
	n次
	至少n次
	n到m次

6.2 字符类 #

语法	说明
[abc]	a或b或c
[^abc]	非a/b/c
[a-z]	a到z
\d	数字
\D	非数字
\w	单词字符
\W	非单词字符
\s	空白字符
\S	非空白字符

6.3 分组 #

语法	说明
(…)	捕获组
(?:…)	非捕获组
(?P…)	命名捕获组

6.4 命名捕获组 #

re := regexp.MustCompile(`(?P<user>\w+)@(?P<domain>\w+\.\w+)`)

match := re.FindStringSubmatch("test@example.com")
fmt.Println(match)

for i, name := range re.SubexpNames() {
    if i > 0 {
        fmt.Printf("%s: %s\n", name, match[i])
    }
}

七、实际应用 #

7.1 验证邮箱 #

func isValidEmail(email string) bool {
    re := regexp.MustCompile(`^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$`)
    return re.MatchString(email)
}

7.2 验证手机号 #

func isValidPhone(phone string) bool {
    re := regexp.MustCompile(`^1[3-9]\d{9}$`)
    return re.MatchString(phone)
}

7.3 提取URL #

func extractURLs(text string) []string {
    re := regexp.MustCompile(`https?://[^\s]+`)
    return re.FindAllString(text, -1)
}

7.4 提取HTML标签内容 #

func extractTagContent(html, tag string) []string {
    re := regexp.MustCompile(`<` + tag + `>(.*?)</` + tag + `>`)
    matches := re.FindAllStringSubmatch(html, -1)
    var results []string
    for _, m := range matches {
        results = append(results, m[1])
    }
    return results
}

7.5 密码强度检查 #

func checkPasswordStrength(password string) bool {
    if len(password) < 8 {
        return false
    }
    
    hasUpper := regexp.MustCompile(`[A-Z]`).MatchString(password)
    hasLower := regexp.MustCompile(`[a-z]`).MatchString(password)
    hasDigit := regexp.MustCompile(`\d`).MatchString(password)
    hasSpecial := regexp.MustCompile(`[!@#$%^&*]`).MatchString(password)
    
    return hasUpper && hasLower && hasDigit && hasSpecial
}

7.6 清理字符串 #

func cleanString(s string) string {
    re := regexp.MustCompile(`\s+`)
    return strings.TrimSpace(re.ReplaceAllString(s, " "))
}

八、性能优化 #

8.1 预编译正则 #

var emailRegex = regexp.MustCompile(`^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\.[a-zA-Z]{2,}$`)

func isValidEmail(email string) bool {
    return emailRegex.MatchString(email)
}

8.2 使用字面量 #

// 好
re := regexp.MustCompile(`\d+`)

// 避免
pattern := "\\d+"
re := regexp.MustCompile(pattern)

九、常见错误 #

9.1 转义问题 #

// 错误
re := regexp.MustCompile("\d+")  // \d被解释为转义

// 正确
re := regexp.MustCompile(`\d+`)  // 使用原始字符串

9.2 贪婪匹配 #

text := "<div>content1</div><div>content2</div>"

// 贪婪匹配
re := regexp.MustCompile(`<div>.*</div>`)
fmt.Println(re.FindString(text))  // 整个字符串

// 非贪婪匹配
re = regexp.MustCompile(`<div>.*?</div>`)
fmt.Println(re.FindString(text))  // <div>content1</div>

十、总结 #

regexp包常用函数：

函数	说明
Compile	编译正则表达式
MustCompile	编译（panic on error）
MatchString	检查是否匹配
FindString	查找第一个匹配
FindAllString	查找所有匹配
ReplaceAllString	替换所有匹配
Split	分割字符串

关键点：

预编译：多次使用时预编译正则表达式
原始字符串：使用反引号避免转义问题
命名捕获组：使用(?P…)命名
非贪婪匹配：使用*?和+?