Erlang二进制 #

一、二进制概述 #

二进制(Binary)是Erlang中用于存储原始字节数据的类型,比列表更节省内存,处理速度更快。

1.1 二进制的特点 #

  • 高效的内存使用
  • 快速的I/O操作
  • 支持位级操作
  • 适合网络协议和文件处理

二、二进制定义 #

2.1 基本定义 #

erlang
-module(binary_def).
-export([demo/0]).

demo() ->
    Empty = <<>>,
    Simple = <<1, 2, 3>>,
    Text = <<"hello">>,
    io:format("Empty: ~p~n", [Empty]),
    io:format("Simple: ~p~n", [Simple]),
    io:format("Text: ~p~n", [Text]),
    ok.

2.2 大小限制 #

每个元素必须是0-255的整数:

erlang
1> <<256>>.
** exception error: bad argument
1> <<255>>.
<<255>>
2> <<0>>.
<<0>>

2.3 Unicode支持 #

erlang
-module(binary_unicode).
-export([demo/0]).

demo() ->
    Utf8 = <<"中文"/utf8>>,
    io:format("UTF-8: ~p~n", [Utf8]),
    io:format("Size: ~p bytes~n", [byte_size(Utf8)]),
    ok.

三、位语法 #

3.1 基本语法 #

erlang
-module(bit_syntax).
-export([demo/0]).

demo() ->
    A = <<1:8>>,
    B = <<1:16>>,
    C = <<1:32>>,
    io:format("8 bits: ~p~n", [A]),
    io:format("16 bits: ~p~n", [B]),
    io:format("32 bits: ~p~n", [C]),
    ok.

3.2 段格式 #

完整格式:Value:Size/TypeSpecifierList

erlang
-module(segment_format).
-export([demo/0]).

demo() ->
    Int = <<42:8/integer>>,
    Float = <<3.14:32/float>>,
    Binary = <<"hello":5/binary>>,
    io:format("Integer: ~p~n", [Int]),
    io:format("Float: ~p~n", [Float]),
    io:format("Binary: ~p~n", [Binary]),
    ok.

3.3 类型说明符 #

类型 说明
integer 整数(默认)
float 浮点数
binary 二进制
bytes binary的别名
bitstring 位串
bits bitstring的别名
utf8 UTF-8编码
utf16 UTF-16编码
utf32 UTF-32编码

3.4 符号说明符 #

符号 说明
signed 有符号
unsigned 无符号(默认)

3.5 字节序 #

字节序 说明
big 大端序(默认)
little 小端序
native 本机字节序

3.6 单位 #

单位 说明
unit:N 每个段的大小单位
erlang
-module(byte_order).
-export([demo/0]).

demo() ->
    Big = <<16#12345678:32/big>>,
    Little = <<16#12345678:32/little>>,
    io:format("Big endian: ~p~n", [Big]),
    io:format("Little endian: ~p~n", [Little]),
    ok.

四、二进制模式匹配 #

4.1 基本匹配 #

erlang
-module(binary_match).
-export([demo/0]).

demo() ->
    <<A, B, C>> = <<1, 2, 3>>,
    io:format("A=~p, B=~p, C=~p~n", [A, B, C]),
    
    <<X:8, Y:8, Z:8>> = <<100, 200, 50>>,
    io:format("X=~p, Y=~p, Z=~p~n", [X, Y, Z]),
    ok.

4.2 固定大小匹配 #

erlang
-module(fixed_size_match).
-export([demo/0]).

demo() ->
    <<Int:16>> = <<1, 2>>,
    io:format("16-bit int: ~p~n", [Int]),
    
    <<Float:32/float>> = <<63, 144, 245, 195>>,
    io:format("32-bit float: ~p~n", [Float]),
    ok.

4.3 变长匹配 #

erlang
-module(varlen_match).
-export([demo/0]).

demo() ->
    <<Head:8, Rest/binary>> = <<1, 2, 3, 4, 5>>,
    io:format("Head: ~p~n", [Head]),
    io:format("Rest: ~p~n", [Rest]),
    
    <<First:16, Middle/binary, Last:8>> = <<1, 2, 3, 4, 5>>,
    io:format("First: ~p~n", [First]),
    io:format("Middle: ~p~n", [Middle]),
    io:format("Last: ~p~n", [Last]),
    ok.

4.4 位级匹配 #

erlang
-module(bit_level_match).
-export([demo/0]).

demo() ->
    <<A:4, B:4>> = <<16#AB:8>>,
    io:format("A=~p, B=~p~n", [A, B]),
    
    <<Flag:1, _:5, Value:2>> = <<2#10100010:8>>,
    io:format("Flag=~p, Value=~p~n", [Flag, Value]),
    ok.

4.5 忽略部分 #

erlang
-module(ignore_parts).
-export([demo/0]).

demo() ->
    <<_:8, Middle:8, _:8>> = <<1, 2, 3>>,
    io:format("Middle: ~p~n", [Middle]),
    
    <<_:16, Last:8>> = <<1, 2, 3>>,
    io:format("Last: ~p~n", [Last]),
    ok.

五、二进制操作 #

5.1 binary模块函数 #

erlang
-module(binary_ops).
-export([demo/0]).

demo() ->
    Bin = <<1, 2, 3, 4, 5>>,
    
    Size = byte_size(Bin),
    Part = binary:part(Bin, 1, 3),
    Split = binary:split(Bin, <<3>>),
    Copy = binary:copy(<<1, 2>>, 3),
    
    io:format("Size: ~p~n", [Size]),
    io:format("Part(1,3): ~p~n", [Part]),
    io:format("Split: ~p~n", [Split]),
    io:format("Copy: ~p~n", [Copy]),
    ok.

5.2 常用函数 #

函数 说明
byte_size/1 字节大小
bit_size/1 位大小
binary:part/3 截取部分
binary:split/2 分割
binary:split/3 分割(带选项)
binary:copy/1 复制
binary:copy/2 复制N次
binary:replace/3 替换
binary:encode_unsigned/1 整数转二进制
binary:decode_unsigned/1 二进制转整数

5.3 分割操作 #

erlang
-module(binary_split).
-export([demo/0]).

demo() ->
    Bin = <<"hello,world,erlang">>,
    
    First = binary:split(Bin, <<",">>),
    All = binary:split(Bin, <<",">>, [global]),
    
    io:format("First split: ~p~n", [First]),
    io:format("All split: ~p~n", [All]),
    ok.

5.4 替换操作 #

erlang
-module(binary_replace).
-export([demo/0]).

demo() ->
    Bin = <<"hello world world">>,
    
    ReplaceFirst = binary:replace(Bin, <<"world">>, <<"Erlang">>),
    ReplaceAll = binary:replace(Bin, <<"world">>, <<"Erlang">>, [global]),
    
    io:format("Replace first: ~p~n", [ReplaceFirst]),
    io:format("Replace all: ~p~n", [ReplaceAll]),
    ok.

六、二进制构建 #

6.1 动态构建 #

erlang
-module(binary_build).
-export([build_packet/2]).

build_packet(Type, Data) when is_binary(Data) ->
    Length = byte_size(Data),
    <<Type:8, Length:16, Data/binary>>.

6.2 列表转二进制 #

erlang
-module(list_to_bin).
-export([demo/0]).

demo() ->
    Parts = [<<1, 2>>, <<3, 4>>, <<5>>],
    Combined = iolist_to_binary(Parts),
    io:format("Combined: ~p~n", [Combined]),
    ok.

6.3 IO列表 #

erlang
-module(iolist_demo).
-export([demo/0]).

demo() ->
    IOList = ["Hello", <<" ">>, "World", 33],
    Binary = iolist_to_binary(IOList),
    io:format("IO List: ~p~n", [IOList]),
    io:format("Binary: ~p~n", [Binary]),
    ok.

七、位串 #

7.1 位串定义 #

位串(Bitstring)是二进制的扩展,可以包含非8的倍数的位:

erlang
-module(bitstring_def).
-export([demo/0]).

demo() ->
    Bits = <<1:3, 2:3, 3:2>>,
    io:format("Bitstring: ~p~n", [Bits]),
    io:format("Bit size: ~p~n", [bit_size(Bits)]),
    io:format("Byte size: ~p~n", [byte_size(Bits)]),
    ok.

7.2 位串匹配 #

erlang
-module(bitstring_match).
-export([extract/1]).

extract(<<A:3, B:3, C:2>>) ->
    {A, B, C}.

7.3 位串操作 #

erlang
1> Bs = <<1:3, 2:3, 3:2>>.
<<147>>
2> bit_size(Bs).
8
3> <<A:3, Rest/bitstring>> = Bs.
<<147>>
4> A.
1
5> Rest.
<<147:5>>

八、实际应用 #

8.1 解析IP头 #

erlang
-module(ip_header).
-export([parse/1]).

parse(<<Version:4, IHL:4, ToS:8, TotalLength:16,
         Identification:16, Flags:3, FragmentOffset:13,
         TTL:8, Protocol:8, Checksum:16,
         SourceIP:32, DestIP:32,
         Rest/binary>>) ->
    #{version => Version,
      ihl => IHL,
      tos => ToS,
      total_length => TotalLength,
      identification => Identification,
      flags => Flags,
      fragment_offset => FragmentOffset,
      ttl => TTL,
      protocol => Protocol,
      checksum => Checksum,
      source_ip => format_ip(SourceIP),
      dest_ip => format_ip(DestIP),
      payload => Rest}.

format_ip(IP) ->
    <<A, B, C, D>> = <<IP:32>>,
    io_lib:format("~p.~p.~p.~p", [A, B, C, D]).

8.2 解析TCP头 #

erlang
-module(tcp_header).
-export([parse/1]).

parse(<<SourcePort:16, DestPort:16,
         Sequence:32, AckNumber:32,
         DataOffset:4, Reserved:4, Flags:8,
         Window:16, Checksum:16, UrgentPointer:16,
         Rest/binary>>) ->
    #{source_port => SourcePort,
      dest_port => DestPort,
      sequence => Sequence,
      ack_number => AckNumber,
      data_offset => DataOffset,
      flags => parse_flags(Flags),
      window => Window,
      checksum => Checksum,
      urgent_pointer => UrgentPointer,
      payload => Rest}.

parse_flags(Flags) ->
    #{fin => (Flags band 16#01) > 0,
      syn => (Flags band 16#02) > 0,
      rst => (Flags band 16#04) > 0,
      psh => (Flags band 16#08) > 0,
      ack => (Flags band 16#10) > 0,
      urg => (Flags band 16#20) > 0}.

8.3 构建消息 #

erlang
-module(message_builder).
-export([build/3, parse/1]).

build(Type, Id, Payload) when is_binary(Payload) ->
    Length = byte_size(Payload),
    <<Type:8, Id:32, Length:16, Payload/binary>>.

parse(<<Type:8, Id:32, Length:16, Payload:Length/binary>>) ->
    #{type => Type, id => Id, payload => Payload}.

8.4 十六进制转换 #

erlang
-module(hex_conv).
-export([to_hex/1, from_hex/1]).

to_hex(Bin) when is_binary(Bin) ->
    << <<(hex_char(H)), (hex_char(L))>> || <<H:4, L:4>> <= Bin >>.

hex_char(N) when N < 10 -> N + $0;
hex_char(N) -> N - 10 + $a.

from_hex(Hex) when is_binary(Hex) ->
    << <<(hex_value(H) * 16 + hex_value(L))>> 
       || <<H:8, L:8>> <= Hex >>.

hex_value(C) when C >= $0, C =< $9 -> C - $0;
hex_value(C) when C >= $a, C =< $f -> C - $a + 10;
hex_value(C) when C >= $A, C =< $F -> C - $A + 10.

九、性能优化 #

9.1 使用二进制而非列表 #

erlang
-module(perf_tips).
-export([string_vs_binary/0]).

string_vs_binary() ->
    String = "Hello, World!",
    Binary = <<"Hello, World!">>,
    
    io:format("String size: ~p~n", [length(String) * 8]),
    io:format("Binary size: ~p~n", [byte_size(Binary)]),
    ok.

9.2 使用IO列表 #

erlang
-module(iolist_perf).
-export([build_iolist/1, build_binary/1]).

build_iolist(Parts) ->
    Parts.

build_binary(Parts) ->
    iolist_to_binary(Parts).

9.3 避免频繁转换 #

erlang
-module(avoid_convert).
-export([good/1, bad/1]).

good(Binary) when is_binary(Binary) ->
    binary:split(Binary, <<",">>).

bad(String) when is_list(String) ->
    Binary = list_to_binary(String),
    binary:split(Binary, <<",">>).

十、总结 #

本章学习了:

  • 二进制的定义和特点
  • 位语法
  • 二进制模式匹配
  • binary模块函数
  • 二进制构建
  • 位串
  • 实际应用案例
  • 性能优化技巧

准备好学习集合类型了吗?让我们进入下一章。

最后更新:2026-03-27