Erlang字符串 #

一、字符串概述 #

Erlang中的字符串实际上是整数列表，每个整数代表一个Unicode码点。

1.1 字符串的本质 #

erlang

-module(string_essence).
-export([demo/0]).

demo() ->
    String = "hello",
    AsList = [104, 101, 108, 108, 111],
    io:format("\"hello\" = ~p~n", [String]),
    io:format("[104,101,108,108,111] = ~p~n", [AsList]),
    io:format("Equal? ~p~n", [String =:= AsList]),
    ok.

1.2 字符串类型 #

Erlang有两种字符串表示：

字符串（String）：整数列表 "hello"
二进制（Binary）：<<"hello">>

erlang

-module(string_types).
-export([demo/0]).

demo() ->
    String = "hello",
    Binary = <<"hello">>,
    io:format("String: ~p~n", [String]),
    io:format("Binary: ~p~n", [Binary]),
    io:format("is_list(String): ~p~n", [is_list(String)]),
    io:format("is_binary(Binary): ~p~n", [is_binary(Binary)]),
    ok.

二、字符串定义 #

2.1 双引号字符串 #

erlang

-module(string_def).
-export([demo/0]).

demo() ->
    Simple = "hello",
    WithSpace = "hello world",
    WithEscape = "hello\nworld",
    WithQuote = "hello \"world\"",
    io:format("Simple: ~s~n", [Simple]),
    io:format("WithSpace: ~s~n", [WithSpace]),
    io:format("WithEscape: ~s~n", [WithEscape]),
    io:format("WithQuote: ~s~n", [WithQuote]),
    ok.

2.2 转义字符 #

转义序列	说明
`\n`	换行
`\r`	回车
`\t`	制表符
`\\`	反斜杠
`\"`	双引号
`\'`	单引号
`\x{H...}`	Unicode字符

erlang

-module(escape_chars).
-export([demo/0]).

demo() ->
    Newline = "line1\nline2",
    Tab = "col1\tcol2",
    Unicode = "\x{4E2D}\x{6587}",
    io:format("Newline:~n~s~n", [Newline]),
    io:format("Tab: ~s~n", [Tab]),
    io:format("Unicode: ~s~n", [Unicode]),
    ok.

2.3 字符列表 #

erlang

-module(char_list).
-export([demo/0]).

demo() ->
    List = [\$h, \$e, \$l, \$l, \$o],
    io:format("Char list: ~p~n", [List]),
    io:format("As string: ~s~n", [List]),
    
    Dollar = \$A,
    io:format("$A = ~p~n", [Dollar]),
    ok.

$ 符号表示字符的ASCII/Unicode码：

erlang

1> $A.
65
2> $a.
97
3> $\n.
10
4> $\t.
9

三、字符串操作 #

3.1 string模块函数 #

erlang

-module(string_ops).
-export([demo/0]).

demo() ->
    Str = "Hello, World!",
    
    Len = string:length(Str),
    Upper = string:uppercase(Str),
    Lower = string:lowercase(Str),
    Sub = string:slice(Str, 0, 5),
    Split = string:split(Str, ", "),
    
    io:format("Original: ~s~n", [Str]),
    io:format("Length: ~p~n", [Len]),
    io:format("Upper: ~s~n", [Upper]),
    io:format("Lower: ~s~n", [Lower]),
    io:format("Slice(0,5): ~s~n", [Sub]),
    io:format("Split: ~p~n", [Split]),
    ok.

3.2 常用string函数 #

函数	说明	示例
`length/1`	字符串长度	`string:length("hello")` → `5`
`concat/2`	连接字符串	`string:concat("hello", " world")`
`slice/3`	截取子串	`string:slice("hello", 1, 3)` → `"ell"`
`split/2`	分割字符串	`string:split("a,b,c", ",")`
`split/3`	分割字符串	`string:split("a,b,c", ",", all)`
`uppercase/1`	转大写	`string:uppercase("hello")` → `"HELLO"`
`lowercase/1`	转小写	`string:lowercase("HELLO")` → `"hello"`
`trim/1`	去除空白	`string:trim(" hello ")` → `"hello"`
`strip/1`	去除空白（旧）	`string:strip(" hello ")` → `"hello"`
`prefix/2`	检查前缀	`string:prefix("hello", "he")` → `"llo"`
`suffix/2`	检查后缀	`string:suffix("hello", "lo")` → `true`
`find/2`	查找子串	`string:find("hello", "ll")` → `"llo"`
`replace/3`	替换子串	`string:replace("hello", "l", "L")`
`reverse/1`	反转字符串	`string:reverse("hello")` → `"olleh"`
`tokens/2`	分词	`string:tokens("a,b,c", ",")`

3.3 字符串连接 #

erlang

-module(string_concat).
-export([demo/0]).

demo() ->
    A = "Hello",
    B = "World",
    
    Concat = string:concat(A, B),
    WithSpace = string:join([A, B], " "),
    IoFormat = io_lib:format("~s ~s", [A, B]),
    
    io:format("concat: ~s~n", [Concat]),
    io:format("join: ~s~n", [WithSpace]),
    io:format("io_lib:format: ~s~n", [IoFormat]),
    ok.

3.4 字符串分割 #

erlang

-module(string_split).
-export([demo/0]).

demo() ->
    Str = "one,two,three,four",
    
    FirstSplit = string:split(Str, ","),
    AllSplit = string:split(Str, ",", all),
    Tokens = string:tokens(Str, ","),
    
    io:format("First split: ~p~n", [FirstSplit]),
    io:format("All split: ~p~n", [AllSplit]),
    io:format("Tokens: ~p~n", [Tokens]),
    ok.

3.5 字符串查找 #

erlang

-module(string_find).
-export([demo/0]).

demo() ->
    Str = "Hello, World!",
    
    Find = string:find(Str, "World"),
    Prefix = string:prefix(Str, "Hello"),
    Suffix = string:suffix(Str, "!"),
    
    io:format("find(\"World\"): ~p~n", [Find]),
    io:format("prefix(\"Hello\"): ~p~n", [Prefix]),
    io:format("suffix(\"!\"): ~p~n", [Suffix]),
    ok.

3.6 字符串替换 #

erlang

-module(string_replace).
-export([demo/0]).

demo() ->
    Str = "hello world world",
    
    ReplaceFirst = string:replace(Str, "world", "Erlang"),
    ReplaceAll = string:replace(Str, "world", "Erlang", all),
    
    io:format("Original: ~s~n", [Str]),
    io:format("Replace first: ~s~n", [ReplaceFirst]),
    io:format("Replace all: ~s~n", [ReplaceAll]),
    ok.

四、列表操作字符串 #

由于字符串是列表，可以使用列表函数：

4.1 基本列表操作 #

erlang

-module(list_string_ops).
-export([demo/0]).

demo() ->
    Str = "hello",
    
    Head = hd(Str),
    Tail = tl(Str),
    Len = length(Str),
    Rev = lists:reverse(Str),
    
    io:format("Head: ~p (~c)~n", [Head, Head]),
    io:format("Tail: ~s~n", [Tail]),
    io:format("Length: ~p~n", [Len]),
    io:format("Reverse: ~s~n", [Rev]),
    ok.

4.2 列表推导 #

erlang

-module(list_comprehension_str).
-export([demo/0]).

demo() ->
    Str = "hello",
    
    Upper = [char:to_upper(C) || C <- Str],
    OnlyVowels = [C || C <- Str, is_vowel(C)],
    
    io:format("Original: ~s~n", [Str]),
    io:format("Upper: ~s~n", [Upper]),
    io:format("Only vowels: ~s~n", [OnlyVowels]),
    ok.

is_vowel($a) -> true;
is_vowel($e) -> true;
is_vowel($i) -> true;
is_vowel($o) -> true;
is_vowel($u) -> true;
is_vowel(_) -> false.

4.3 列表连接 #

erlang

1> "hello" ++ " " ++ "world".
"hello world"
2> [$. | "erl"].
".erl"
3> "hello" -- "ell".
"ho"

五、格式化输出 #

5.1 io:format #

erlang

-module(io_format).
-export([demo/0]).

demo() ->
    Name = "Erlang",
    Version = 26,
    Pi = math:pi(),
    
    io:format("Name: ~s~n", [Name]),
    io:format("Version: ~p~n", [Version]),
    io:format("Pi: ~.4f~n", [Pi]),
    io:format("All: ~s version ~p, pi=~.2f~n", [Name, Version, Pi]),
    ok.

5.2 格式说明符 #

格式符	说明
`~s`	字符串
`~p`	美化打印
`~w`	原样输出
`~f`	浮点数
`~e`	科学计数法
`~b`	二进制整数
`~x`	十六进制整数
`~n`	换行
`~t`	制表符
`~~`	输出~

5.3 io_lib:format #

返回格式化字符串（IO列表）：

erlang

-module(io_lib_format).
-export([demo/0]).

demo() ->
    Result = io_lib:format("Hello, ~s!~n", ["Erlang"]),
    io:format("Result: ~p~n", [Result]),
    io:format("As string: ~s~n", [Result]),
    ok.

六、字符串与类型转换 #

6.1 字符串与数字 #

erlang

-module(str_num_conv).
-export([demo/0]).

demo() ->
    IntStr = integer_to_list(42),
    FloatStr = float_to_list(3.14),
    
    Int = list_to_integer("123"),
    Float = list_to_float("3.14"),
    
    io:format("int to list: ~p~n", [IntStr]),
    io:format("float to list: ~p~n", [FloatStr]),
    io:format("list to int: ~p~n", [Int]),
    io:format("list to float: ~p~n", [Float]),
    ok.

6.2 字符串与原子 #

erlang

-module(str_atom_conv).
-export([demo/0]).

demo() ->
    AtomStr = atom_to_list(hello),
    Atom = list_to_atom("world"),
    
    io:format("atom to list: ~p~n", [AtomStr]),
    io:format("list to atom: ~p~n", [Atom]),
    ok.

6.3 字符串与二进制 #

erlang

-module(str_binary_conv).
-export([demo/0]).

demo() ->
    Str = "hello",
    Binary = list_to_binary(Str),
    BackToList = binary_to_list(Binary),
    
    io:format("String: ~p~n", [Str]),
    io:format("Binary: ~p~n", [Binary]),
    io:format("Back to list: ~p~n", [BackToList]),
    ok.

七、Unicode支持 #

7.1 Unicode字符串 #

erlang

-module(unicode_str).
-export([demo/0]).

demo() ->
    Chinese = "中文",
    Japanese = "日本語",
    Emoji = "😀",
    
    io:format("Chinese: ~s~n", [Chinese]),
    io:format("Japanese: ~s~n", [Japanese]),
    io:format("Emoji: ~s~n", [Emoji]),
    
    io:format("Chinese length: ~p~n", [string:length(Chinese)]),
    io:format("Chinese codepoints: ~p~n", [string:to_graphemes(Chinese)]),
    ok.

7.2 Unicode函数 #

erlang

-module(unicode_funcs).
-export([demo/0]).

demo() ->
    Str = "中文",
    
    Codepoints = unicode:characters_to_list(Str, utf8),
    Binary = unicode:characters_to_binary(Str, utf8),
    
    io:format("Codepoints: ~p~n", [Codepoints]),
    io:format("Binary: ~p~n", [Binary]),
    ok.

八、二进制字符串 #

8.1 二进制字符串定义 #

erlang

-module(binary_str).
-export([demo/0]).

demo() ->
    Binary = <<"hello">>,
    WithEncoding = <<"中文"/utf8>>,
    
    io:format("Binary: ~p~n", [Binary]),
    io:format("With encoding: ~p~n", [WithEncoding]),
    io:format("is_binary: ~p~n", [is_binary(Binary)]),
    ok.

8.2 二进制字符串操作 #

erlang

-module(binary_str_ops).
-export([demo/0]).

demo() ->
    Binary = <<"Hello, World!">>,
    
    Size = byte_size(Binary),
    Part = binary:part(Binary, 0, 5),
    Split = binary:split(Binary, <<", ">>),
    
    io:format("Size: ~p~n", [Size]),
    io:format("Part: ~p~n", [Part]),
    io:format("Split: ~p~n", [Split]),
    ok.

8.3 字符串vs二进制 #

特性	字符串（列表）	二进制
内存占用	较大	较小
处理速度	较慢	较快
模式匹配	方便	需要位语法
适用场景	文本处理	I/O操作

九、正则表达式 #

9.1 基本使用 #

erlang

-module(regex_demo).
-export([demo/0]).

demo() ->
    Str = "Hello, World! 123",
    
    {ok, MP} = re:compile("\\d+"),
    {match, Matches} = re:run(Str, MP, [global]),
    
    io:format("Matches: ~p~n", [Matches]),
    ok.

9.2 常用操作 #

erlang

-module(regex_ops).
-export([demo/0]).

demo() ->
    Str = "Hello, World!",
    
    Match = re:run(Str, "World"),
    Replace = re:replace(Str, "World", "Erlang", [{return, list}]),
    Split = re:split(Str, ", ", [{return, list}]),
    
    io:format("Match: ~p~n", [Match]),
    io:format("Replace: ~s~n", [Replace]),
    io:format("Split: ~p~n", [Split]),
    ok.

十、实用函数 #

10.1 字符串工具函数 #

erlang

-module(string_utils).
-export([capitalize/1, is_empty/1, trim_all/1]).

capitalize("") -> "";
capitalize([H | T]) -> [char:to_upper(H) | string:lowercase(T)].

is_empty(Str) -> string:trim(Str) =:= "".

trim_all(Str) -> string:trim(Str, both, " \t\n\r").

10.2 字符串验证 #

erlang

-module(string_validate).
-export([is_numeric/1, is_alpha/1, is_alphanumeric/1]).

is_numeric(Str) ->
    lists:all(fun(C) -> C >= $0 andalso C =< $9 end, Str).

is_alpha(Str) ->
    lists:all(fun(C) -> 
        (C >= $a andalso C =< $z) orelse (C >= $A andalso C =< $Z) 
    end, Str).

is_alphanumeric(Str) ->
    lists:all(fun(C) -> 
        (C >= $0 andalso C =< $9) orelse
        (C >= $a andalso C =< $z) orelse 
        (C >= $A andalso C =< $Z) 
    end, Str).

10.3 字符串构建 #

erlang

-module(string_builder).
-export([build/1]).

build(Parts) ->
    lists:flatten([format_part(P) || P <- Parts]).

format_part({int, N}) -> integer_to_list(N);
format_part({float, F}) -> float_to_list(F);
format_part({atom, A}) -> atom_to_list(A);
format_part(Str) when is_list(Str) -> Str;
format_part(Bin) when is_binary(Bin) -> binary_to_list(Bin).

十一、总结 #

本章学习了：

字符串的本质（整数列表）
字符串定义和转义字符
string模块函数
列表操作字符串
格式化输出
类型转换
Unicode支持
二进制字符串
正则表达式
实用工具函数