R语言字符串 #
一、字符串概述 #
在R语言中,字符串使用单引号或双引号创建,存储为字符向量。
二、创建字符串 #
2.1 基本创建 #
r
s1 <- "Hello, World!"
s2 <- 'Hello, World!'
class(s1)
nchar(s1)
2.2 转义字符 #
r
s1 <- "Hello\nWorld"
cat(s1)
s2 <- "Tab\there"
cat(s2)
s3 <- "Quote: \"Hello\""
cat(s3)
s4 <- "Backslash: \\"
cat(s4)
2.3 原始字符串 #
r
s <- r"(C:\Users\name\file.txt)"
print(s)
2.4 字符向量 #
r
fruits <- c("apple", "banana", "cherry")
length(fruits)
nchar(fruits)
三、字符串拼接 #
3.1 paste函数 #
r
paste("Hello", "World")
paste("Hello", "World", sep = "-")
paste("Hello", "World", sep = "")
3.2 paste0函数 #
r
paste0("Hello", "World")
paste0("A", 1:5)
3.3 拼接向量 #
r
fruits <- c("apple", "banana", "cherry")
paste(fruits, collapse = ", ")
paste(fruits, 1:3, sep = "-")
3.4 sprintf函数 #
r
sprintf("Hello, %s!", "World")
sprintf("Value: %.2f", 3.14159)
sprintf("%s is %d years old", "Tom", 25)
四、字符串提取 #
4.1 substr函数 #
r
s <- "Hello, World!"
substr(s, 1, 5)
substr(s, 8, 13)
substr(s, 1, 1) <- "h"
print(s)
4.2 substring函数 #
r
s <- "Hello, World!"
substring(s, 1)
substring(s, 1, 5)
substring(s, c(1, 8), c(5, 13))
4.3 strsplit函数 #
r
s <- "apple,banana,cherry"
strsplit(s, ",")
s <- "Hello World"
strsplit(s, " ")
五、字符串查找 #
5.1 grep函数 #
r
fruits <- c("apple", "banana", "cherry", "apricot")
grep("ap", fruits)
grep("ap", fruits, value = TRUE)
grepl("ap", fruits)
5.2 regexpr函数 #
r
s <- "apple banana apple"
regexpr("apple", s)
gregexpr("apple", s)
5.3 匹配位置 #
r
s <- "Hello, World!"
pos <- regexpr("World", s)
attr(pos, "match.length")
六、字符串替换 #
6.1 sub函数 #
r
s <- "apple banana apple"
sub("apple", "orange", s)
6.2 gsub函数 #
r
s <- "apple banana apple"
gsub("apple", "orange", s)
6.3 chartr函数 #
r
s <- "Hello, World!"
chartr("aeiou", "AEIOU", s)
七、字符串转换 #
7.1 大小写转换 #
r
s <- "Hello, World!"
toupper(s)
tolower(s)
7.2 首字母大写 #
r
library(stringr)
str_to_title("hello world")
7.3 去除空白 #
r
s <- " Hello, World! "
trimws(s)
trimws(s, "left")
trimws(s, "right")
7.4 填充字符串 #
r
s <- "Hello"
str_pad(s, 10, side = "left")
str_pad(s, 10, side = "right")
str_pad(s, 10, side = "both")
八、字符串比较 #
8.1 相等比较 #
r
s1 <- "Hello"
s2 <- "hello"
s1 == s2
identical(s1, s2)
8.2 忽略大小写比较 #
r
s1 <- "Hello"
s2 <- "hello"
tolower(s1) == tolower(s2)
8.3 字符串排序 #
r
fruits <- c("banana", "Apple", "cherry")
sort(fruits)
sort(fruits, decreasing = TRUE)
九、字符串长度 #
9.1 nchar函数 #
r
s <- "Hello, World!"
nchar(s)
fruits <- c("apple", "banana", "cherry")
nchar(fruits)
9.2 nzchar函数 #
r
s1 <- "Hello"
s2 <- ""
nzchar(s1)
nzchar(s2)
十、正则表达式 #
10.1 基本模式 #
r
s <- "apple123 banana456"
grep("[0-9]+", s, value = TRUE)
grep("[a-z]+", s, value = TRUE)
10.2 常用元字符 #
| 元字符 | 说明 |
|---|---|
| . | 任意字符 |
| ^ | 行首 |
| $ | 行尾 |
| * | 0次或多次 |
| + | 1次或多次 |
| ? | 0次或1次 |
| [] | 字符集 |
| () | 分组 |
| \ | 转义 |
10.3 示例 #
r
emails <- c("test@example.com", "invalid", "user@domain.org")
grep("^[a-zA-Z0-9]+@[a-zA-Z0-9]+\\.[a-zA-Z]+$", emails, value = TRUE)
phones <- c("123-456-7890", "1234567890", "123 456 7890")
gsub("[^0-9]", "", phones)
十一、stringr包 #
11.1 安装和加载 #
r
install.packages("stringr")
library(stringr)
11.2 常用函数 #
r
s <- "Hello, World!"
str_length(s)
str_sub(s, 1, 5)
str_to_upper(s)
str_to_lower(s)
str_trim(" Hello ")
str_pad("Hello", 10)
11.3 模式匹配 #
r
s <- c("apple", "banana", "cherry")
str_detect(s, "a")
str_count(s, "a")
str_locate(s, "a")
str_extract(s, "a.+")
str_replace(s, "a", "A")
str_replace_all(s, "a", "A")
十二、实践示例 #
12.1 邮箱验证 #
r
is_valid_email <- function(email) {
pattern <- "^[a-zA-Z0-9._%+-]+@[a-zA-Z0-9.-]+\\.[a-zA-Z]{2,}$"
grepl(pattern, email)
}
is_valid_email("test@example.com")
is_valid_email("invalid-email")
12.2 手机号格式化 #
r
format_phone <- function(phone) {
phone <- gsub("[^0-9]", "", phone)
sprintf("%s-%s-%s",
substr(phone, 1, 3),
substr(phone, 4, 7),
substr(phone, 8, 11))
}
format_phone("12345678901")
format_phone("123-456-78901")
12.3 文本处理 #
r
text <- " The quick brown fox jumps over the lazy dog. "
words <- strsplit(trimws(text), " ")[[1]]
length(words)
nchar(text)
unique(tolower(words))
十三、总结 #
本章学习了:
- 字符串的创建和转义
- 字符串拼接函数
- 字符串提取和分割
- 字符串查找和替换
- 大小写转换和空白处理
- 字符串比较和排序
- 正则表达式基础
- stringr包的使用
字符串处理是数据分析的重要技能,掌握这些函数可以高效处理文本数据!
最后更新:2026-03-27