集合 #

一、集合基础 #

1.1 创建集合 #

使用Set创建集合:

julia
s1 = Set([1, 2, 3, 4, 5])
s2 = Set([1, 2, 2, 3, 3, 3])
s3 = Set{Int}()
s4 = Set()
s5 = Set(["a", "b", "c"])

1.2 集合类型 #

julia
typeof(Set([1, 2, 3]))
typeof(Set{Int}())
typeof(Set(["a", "b"]))

1.3 集合特点 #

  • 元素不重复
  • 元素无序
  • 快速成员检测
julia
s = Set([1, 2, 2, 3, 3, 3])
length(s)
collect(s)

二、集合操作 #

2.1 添加元素 #

julia
s = Set{Int}()
push!(s, 1)
push!(s, 2)
push!(s, 1)
s

2.2 删除元素 #

julia
s = Set([1, 2, 3, 4, 5])
delete!(s, 3)
pop!(s)
setdiff!(s, Set([1, 2]))
empty!(s)

2.3 检查元素 #

julia
s = Set([1, 2, 3, 4, 5])
in(3, s)
3 ∈ s
3 ∉ s

2.4 集合大小 #

julia
s = Set([1, 2, 3, 4, 5])
length(s)
isempty(s)

三、集合运算 #

3.1 并集 #

julia
s1 = Set([1, 2, 3])
s2 = Set([3, 4, 5])
union(s1, s2)
s1 ∪ s2
union!(s1, s2)

3.2 交集 #

julia
s1 = Set([1, 2, 3])
s2 = Set([2, 3, 4])
intersect(s1, s2)
s1 ∩ s2
intersect!(s1, s2)

3.3 差集 #

julia
s1 = Set([1, 2, 3, 4])
s2 = Set([3, 4, 5, 6])
setdiff(s1, s2)
setdiff!(s1, s2)

3.4 对称差集 #

julia
s1 = Set([1, 2, 3])
s2 = Set([2, 3, 4])
symdiff(s1, s2)

3.5 子集判断 #

julia
s1 = Set([1, 2])
s2 = Set([1, 2, 3, 4])
issubset(s1, s2)
s1 ⊆ s2
s1 ⊊ s2
s1 ⊈ s2

3.6 相等判断 #

julia
s1 = Set([1, 2, 3])
s2 = Set([3, 2, 1])
s1 == s2
issetequal(s1, s2)

四、集合迭代 #

4.1 遍历元素 #

julia
s = Set([1, 2, 3, 4, 5])
for x in s
    println(x)
end

4.2 集合推导 #

julia
Set(x^2 for x in 1:5)
Set(x for x in 1:10 if x % 2 == 0)

4.3 转换为数组 #

julia
s = Set([3, 1, 4, 1, 5, 9])
collect(s)
sort(collect(s))

五、集合应用 #

5.1 去重 #

julia
arr = [1, 2, 3, 2, 4, 3, 5, 1]
unique(arr)
collect(Set(arr))

5.2 查找共同元素 #

julia
function common_elements(arr1, arr2)
    intersect(Set(arr1), Set(arr2))
end

common_elements([1, 2, 3, 4], [3, 4, 5, 6])

5.3 查找唯一元素 #

julia
function unique_to_first(arr1, arr2)
    setdiff(Set(arr1), Set(arr2))
end

unique_to_first([1, 2, 3, 4], [3, 4, 5, 6])

5.4 检查包含 #

julia
function contains_all(container, items)
    issubset(Set(items), Set(container))
end

contains_all([1, 2, 3, 4, 5], [2, 3])
contains_all([1, 2, 3, 4, 5], [2, 6])

六、集合性能 #

6.1 时间复杂度 #

操作 平均 最坏
成员检测 O(1) O(n)
插入 O(1) O(n)
删除 O(1) O(n)
并集 O(n+m) -
交集 O(min(n,m)) -

6.2 与数组比较 #

julia
using BenchmarkTools

arr = collect(1:10000)
s = Set(arr)

@btime 5000 in $arr
@btime 5000 in $s

七、BitSet #

7.1 创建BitSet #

对于小整数集合,使用BitSet更高效:

julia
bs = BitSet([1, 2, 3, 4, 5])
typeof(bs)

7.2 BitSet操作 #

julia
bs = BitSet([1, 2, 3, 4, 5])
push!(bs, 6)
delete!(bs, 3)
3 in bs

7.3 BitSet vs Set #

julia
using BenchmarkTools

s = Set(1:1000)
bs = BitSet(1:1000)

@btime 500 in $s
@btime 500 in $bs

八、实践练习 #

8.1 练习1:查找重复元素 #

julia
function find_duplicates(arr)
    seen = Set{eltype(arr)}()
    duplicates = Set{eltype(arr)}()
    for x in arr
        if x in seen
            push!(duplicates, x)
        else
            push!(seen, x)
        end
    end
    return duplicates
end

find_duplicates([1, 2, 3, 2, 4, 3, 5, 1])

8.2 练习2:集合覆盖 #

julia
function set_cover(universe, subsets)
    remaining = Set(universe)
    selected = []
    
    while !isempty(remaining)
        best = nothing
        best_cover = Set()
        
        for subset in subsets
            cover = intersect(subset, remaining)
            if length(cover) > length(best_cover)
                best = subset
                best_cover = cover
            end
        end
        
        if best === nothing || isempty(best_cover)
            break
        end
        
        push!(selected, best)
        setdiff!(remaining, best)
    end
    
    return selected
end

universe = Set(1:10)
subsets = [Set([1, 2, 3]), Set([3, 4, 5]), Set([5, 6, 7]), Set([7, 8, 9, 10])]
set_cover(universe, subsets)

8.3 练习3:词频分析 #

julia
function unique_words(texts)
    all_words = Set{String}()
    for text in texts
        words = split(lowercase(text))
        for word in words
            push!(all_words, strip(word, ['.', ',', '!', '?']))
        end
    end
    return all_words
end

unique_words(["Hello World", "Hello Julia", "Julia is great"])

8.4 练习4:集合相等 #

julia
function are_anagrams(s1::String, s2::String)
    Set(lowercase(s1)) == Set(lowercase(s2))
end

are_anagrams("listen", "silent")
are_anagrams("hello", "world")

九、总结 #

本章我们学习了:

  1. 集合创建:Set构造函数
  2. 集合操作:添加、删除、检查元素
  3. 集合运算:并集、交集、差集
  4. 子集判断:issubset和⊆运算符
  5. 集合应用:去重、查找共同元素
  6. BitSet:高效的小整数集合

接下来让我们学习Julia的范围!

最后更新:2026-03-27