Elixir Supervisor #
一、Supervisor概述 #
1.1 什么是Supervisor #
Supervisor是专门用于监控和重启其他进程的进程,是OTP容错机制的核心。
1.2 监督树结构 #
text
┌─────────────────┐
│ Application │
└────────┬────────┘
│
┌────────▼────────┐
│ Main Supervisor│
└────────┬────────┘
│
┌────┴────┬────────┐
│ │ │
┌───▼───┐ ┌───▼───┐ ┌──▼────┐
│Worker │ │Worker │ │Sub │
│ 1 │ │ 2 │ │Sup │
└───────┘ └───────┘ └───┬───┘
│
┌────┴────┐
│ │
┌───▼───┐ ┌───▼───┐
│Worker │ │Worker │
│ 3 │ │ 4 │
└───────┘ └───────┘
二、创建Supervisor #
2.1 使用Supervisor模块 #
elixir
defmodule MyApp.Supervisor do
use Supervisor
def start_link(args) do
Supervisor.start_link(__MODULE__, args, name: __MODULE__)
end
def init(_args) do
children = [
{MyApp.Worker, []}
]
Supervisor.init(children, strategy: :one_for_one)
end
end
2.2 使用监督模块 #
elixir
children = [
{MyApp.Worker, []}
]
Supervisor.start_link(children, strategy: :one_for_one)
三、重启策略 #
3.1 one_for_one #
一个子进程崩溃,只重启该进程:
elixir
Supervisor.init(children, strategy: :one_for_one)
text
Before: [A] [B] [C]
A crashes
After: [A'] [B] [C]
3.2 one_for_all #
一个子进程崩溃,重启所有子进程:
elixir
Supervisor.init(children, strategy: :one_for_all)
text
Before: [A] [B] [C]
A crashes
After: [A'] [B'] [C']
3.3 rest_for_one #
一个子进程崩溃,重启该进程及其后面的所有进程:
elixir
Supervisor.init(children, strategy: :rest_for_one)
text
Before: [A] [B] [C] [D]
B crashes
After: [A] [B'] [C'] [D']
3.4 simple_one_for_one #
用于动态添加相同类型的子进程:
elixir
Supervisor.init(children, strategy: :simple_one_for_one)
四、子进程规范 #
4.1 完整规范 #
elixir
children = [
%{
id: MyApp.Worker,
start: {MyApp.Worker, :start_link, [arg1, arg2]},
restart: :permanent,
shutdown: 5000,
type: :worker,
modules: [MyApp.Worker]
}
]
4.2 简写形式 #
elixir
children = [
{MyApp.Worker, [arg1, arg2]}
]
4.3 重启选项 #
:permanent- 总是重启(默认):temporary- 从不重启:transient- 异常退出时重启
4.4 关闭选项 #
:brutal_kill- 立即杀死:infinity- 无限等待(用于Supervisor)- 超时毫秒数(默认5000)
4.5 类型选项 #
:worker- 工作进程(默认):supervisor- 监督进程
五、动态Supervisor #
5.1 创建动态Supervisor #
elixir
children = [
{DynamicSupervisor, strategy: :one_for_one, name: MyApp.DynamicSupervisor}
]
Supervisor.start_link(children, strategy: :one_for_one)
5.2 添加子进程 #
elixir
DynamicSupervisor.start_child(MyApp.DynamicSupervisor, {MyApp.Worker, []})
5.3 完整示例 #
elixir
defmodule ConnectionSupervisor do
use DynamicSupervisor
def start_link(args) do
DynamicSupervisor.start_link(__MODULE__, args, name: __MODULE__)
end
def init(_args) do
DynamicSupervisor.init(strategy: :one_for_one)
end
def start_connection(host, port) do
spec = {Connection, {host, port}}
DynamicSupervisor.start_child(__MODULE__, spec)
end
def stop_connection(pid) do
DynamicSupervisor.terminate_child(__MODULE__, pid)
end
end
六、监督树示例 #
6.1 完整应用结构 #
elixir
defmodule MyApp.Application do
use Application
def start(_type, _args) do
children = [
MyApp.Repo,
MyApp.Cache,
{MyApp.WorkerSupervisor, []},
{Task.Supervisor, name: MyApp.TaskSupervisor},
{Phoenix.PubSub, name: MyApp.PubSub},
MyAppWeb.Endpoint
]
Supervisor.start_link(children, strategy: :one_for_one, name: MyApp.Supervisor)
end
end
6.2 嵌套监督树 #
elixir
defmodule MyApp.WorkerSupervisor do
use Supervisor
def start_link(args) do
Supervisor.start_link(__MODULE__, args, name: __MODULE__)
end
def init(_args) do
children = [
{MyApp.Worker1, []},
{MyApp.Worker2, []},
{MyApp.Worker3, []}
]
Supervisor.init(children, strategy: :one_for_one)
end
end
七、监督操作 #
7.1 查看子进程 #
elixir
iex(1)> Supervisor.which_children(MyApp.Supervisor)
[
{:undefined, #PID<0.123.0>, :worker, [MyApp.Worker]},
...
]
7.2 统计子进程 #
elixir
iex(1)> Supervisor.count_children(MyApp.Supervisor)
%{specs: 3, active: 3, supervisors: 0, workers: 3}
7.3 终止子进程 #
elixir
iex(1)> Supervisor.terminate_child(MyApp.Supervisor, :worker_id)
:ok
7.4 重启子进程 #
elixir
iex(1)> Supervisor.restart_child(MyApp.Supervisor, :worker_id)
{:ok, #PID<0.124.0>}
7.5 删除子进程 #
elixir
iex(1)> Supervisor.delete_child(MyApp.Supervisor, :worker_id)
:ok
八、容错设计 #
8.1 Let It Crash哲学 #
elixir
defmodule MyApp.Worker do
use GenServer
def handle_call(:process, _from, state) do
result = risky_operation()
{:reply, result, state}
end
defp risky_operation do
if :rand.uniform() < 0.5 do
raise "Random failure!"
end
:ok
end
end
8.2 重启限制 #
elixir
children = [
%{
id: MyApp.Worker,
start: {MyApp.Worker, :start_link, []},
restart: :permanent,
shutdown: 5000,
type: :worker
}
]
Supervisor.init(children, strategy: :one_for_one, max_restarts: 3, max_seconds: 5)
8.3 优雅降级 #
elixir
defmodule MyApp.Worker do
use GenServer
def init(args) do
Process.flag(:trap_exit, true)
{:ok, args}
end
def terminate(reason, state) do
Logger.info("Worker terminating: #{inspect(reason)}")
cleanup(state)
end
end
九、最佳实践 #
9.1 分层监督 #
elixir
defmodule MyApp.Supervisor do
use Supervisor
def init(_args) do
children = [
{MyApp.DataSupervisor, []},
{MyApp.BusinessSupervisor, []},
{MyApp.APISupervisor, []}
]
Supervisor.init(children, strategy: :one_for_one)
end
end
9.2 隔离故障 #
elixir
defmodule MyApp.Supervisor do
use Supervisor
def init(_args) do
children = [
{MyApp.CriticalWorker, []},
{Supervisor, [critical_children(), strategy: :one_for_one], id: :critical},
{Supervisor, [non_critical_children(), strategy: :one_for_one], id: :non_critical}
]
Supervisor.init(children, strategy: :rest_for_one)
end
end
十、总结 #
本章学习了:
| 特性 | 用途 |
|---|---|
use Supervisor |
定义监督模块 |
strategy |
重启策略 |
children |
子进程列表 |
DynamicSupervisor |
动态子进程 |
| 重启选项 | 控制重启行为 |
准备好学习Application了吗?让我们进入下一章。
最后更新:2026-03-27