Kubernetes 集群部署实战 #
架构概述 #
本案例将部署一个完整的 AWS EKS Kubernetes 集群:
text
┌─────────────────────────────────────────────────────────────┐
│ EKS 架构 │
├─────────────────────────────────────────────────────────────┤
│ │
│ ┌─────────────┐ │
│ │ EKS 控制面 │ │
│ │ (托管) │ │
│ └──────┬──────┘ │
│ │ │
│ ┌────────────────┼────────────────┐ │
│ │ │ │ │
│ ▼ ▼ ▼ │
│ ┌──────────┐ ┌──────────┐ ┌──────────┐ │
│ │ Node │ │ Node │ │ Node │ │
│ │ Group 1 │ │ Group 2 │ │ Group 3 │ │
│ │ (按需) │ │ (Spot) │ │ (GPU) │ │
│ └──────────┘ └──────────┘ └──────────┘ │
│ │
│ ┌─────────────────────────────────────────────────────┐ │
│ │ VPC 网络 │ │
│ │ ┌─────────────┐ ┌─────────────┐ │ │
│ │ │ 公有子网 │ │ 私有子网 │ │ │
│ │ │ (NAT/ALB) │ │ (EKS Nodes) │ │ │
│ │ └─────────────┘ └─────────────┘ │ │
│ └─────────────────────────────────────────────────────┘ │
│ │
└─────────────────────────────────────────────────────────────┘
项目结构 #
text
eks-cluster/
├── main.tf
├── variables.tf
├── outputs.tf
├── versions.tf
├── vpc.tf
├── iam.tf
├── eks.tf
├── node-groups.tf
├── addons.tf
└── kubernetes.tf
配置文件 #
variables.tf #
hcl
variable "region" {
description = "AWS region"
type = string
default = "us-east-1"
}
variable "project_name" {
description = "Project name"
type = string
}
variable "environment" {
description = "Environment name"
type = string
default = "dev"
}
variable "vpc_cidr" {
description = "VPC CIDR block"
type = string
default = "10.0.0.0/16"
}
variable "cluster_version" {
description = "Kubernetes cluster version"
type = string
default = "1.28"
}
variable "node_groups" {
description = "EKS node group configurations"
type = map(object({
instance_types = list(string)
capacity_type = string
scaling_config = object({
desired_size = number
min_size = number
max_size = number
})
labels = map(string)
taints = list(object({
key = string
value = string
effect = string
}))
}))
default = {
default = {
instance_types = ["t3.medium"]
capacity_type = "ON_DEMAND"
scaling_config = {
desired_size = 2
min_size = 1
max_size = 5
}
labels = {}
taints = []
}
}
}
variable "enable_irsa" {
description = "Enable IAM Roles for Service Accounts"
type = bool
default = true
}
variable "cluster_enabled_log_types" {
description = "EKS cluster log types to enable"
type = list(string)
default = ["api", "audit", "authenticator", "controllerManager", "scheduler"]
}
variable "tags" {
description = "Tags to apply to all resources"
type = map(string)
default = {}
}
vpc.tf #
hcl
data "aws_availability_zones" "available" {
state = "available"
}
locals {
name_prefix = "${var.project_name}-${var.environment}"
azs = slice(data.aws_availability_zones.available.names, 0, 3)
public_subnet_cidrs = [
for i in range(3) : cidrsubnet(var.vpc_cidr, 8, i)
]
private_subnet_cidrs = [
for i in range(3) : cidrsubnet(var.vpc_cidr, 8, i + 10)
]
common_tags = merge(var.tags, {
Project = var.project_name
Environment = var.environment
ManagedBy = "terraform"
})
}
module "vpc" {
source = "terraform-aws-modules/vpc/aws"
version = "5.0.0"
name = "${local.name_prefix}-vpc"
cidr = var.vpc_cidr
azs = local.azs
public_subnets = local.public_subnet_cidrs
private_subnets = local.private_subnet_cidrs
enable_nat_gateway = true
single_nat_gateway = false
one_nat_gateway_per_az = true
enable_vpn_gateway = false
enable_dns_hostnames = true
enable_dns_support = true
public_subnet_tags = {
"kubernetes.io/role/elb" = 1
}
private_subnet_tags = {
"kubernetes.io/role/internal-elb" = 1
}
tags = local.common_tags
}
iam.tf #
hcl
resource "aws_iam_role" "eks_cluster" {
name = "${local.name_prefix}-eks-cluster-role"
assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Action = "sts:AssumeRole"
Effect = "Allow"
Principal = {
Service = "eks.amazonaws.com"
}
}
]
})
tags = local.common_tags
}
resource "aws_iam_role_policy_attachment" "eks_cluster" {
policy_arn = "arn:aws:iam::aws:policy/AmazonEKSClusterPolicy"
role = aws_iam_role.eks_cluster.name
}
resource "aws_iam_role_policy_attachment" "eks_cluster_vpc" {
policy_arn = "arn:aws:iam::aws:policy/AmazonEKSVPCResourceController"
role = aws_iam_role.eks_cluster.name
}
resource "aws_iam_role" "eks_node" {
name = "${local.name_prefix}-eks-node-role"
assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Action = "sts:AssumeRole"
Effect = "Allow"
Principal = {
Service = "ec2.amazonaws.com"
}
}
]
})
tags = local.common_tags
}
resource "aws_iam_role_policy_attachment" "eks_node" {
for_each = toset([
"arn:aws:iam::aws:policy/AmazonEKSWorkerNodePolicy",
"arn:aws:iam::aws:policy/AmazonEKS_CNI_Policy",
"arn:aws:iam::aws:policy/AmazonEC2ContainerRegistryReadOnly",
"arn:aws:iam::aws:policy/AmazonSSMManagedInstanceCore"
])
policy_arn = each.value
role = aws_iam_role.eks_node.name
}
resource "aws_iam_openid_connect_provider" "eks" {
count = var.enable_irsa ? 1 : 0
client_id_list = ["sts.amazonaws.com"]
thumbprint_list = [data.tls_certificate.eks[0].certificates[0].sha1_fingerprint]
url = aws_eks_cluster.main.identity[0].oidc[0].issuer
tags = local.common_tags
}
data "tls_certificate" "eks" {
count = var.enable_irsa ? 1 : 0
url = aws_eks_cluster.main.identity[0].oidc[0].issuer
}
eks.tf #
hcl
resource "aws_eks_cluster" "main" {
name = "${local.name_prefix}-cluster"
version = var.cluster_version
role_arn = aws_iam_role.eks_cluster.arn
vpc_config {
subnet_ids = module.vpc.private_subnets
endpoint_private_access = true
endpoint_public_access = true
public_access_cidrs = ["0.0.0.0/0"]
security_group_ids = [aws_security_group.eks_cluster.id]
}
enabled_cluster_log_types = var.cluster_enabled_log_types
encryption_config {
provider {
key_arn = aws_kms_key.eks.arn
}
resources = ["secrets"]
}
tags = local.common_tags
depends_on = [
aws_iam_role_policy_attachment.eks_cluster,
aws_iam_role_policy_attachment.eks_cluster_vpc,
aws_cloudwatch_log_group.eks_cluster
]
}
resource "aws_cloudwatch_log_group" "eks_cluster" {
name = "/aws/eks/${local.name_prefix}-cluster/cluster"
retention_in_days = 30
tags = local.common_tags
}
resource "aws_kms_key" "eks" {
description = "EKS Secret Encryption Key"
deletion_window_in_days = 7
enable_key_rotation = true
tags = local.common_tags
}
resource "aws_security_group" "eks_cluster" {
name = "${local.name_prefix}-eks-cluster-sg"
description = "Security group for EKS cluster"
vpc_id = module.vpc.vpc_id
egress {
from_port = 0
to_port = 0
protocol = "-1"
cidr_blocks = ["0.0.0.0/0"]
}
tags = local.common_tags
}
node-groups.tf #
hcl
resource "aws_eks_node_group" "main" {
for_each = var.node_groups
cluster_name = aws_eks_cluster.main.name
node_group_name = each.key
node_role_arn = aws_iam_role.eks_node.arn
subnet_ids = module.vpc.private_subnets
instance_types = each.value.instance_types
capacity_type = each.value.capacity_type
scaling_config {
desired_size = each.value.scaling_config.desired_size
min_size = each.value.scaling_config.min_size
max_size = each.value.scaling_config.max_size
}
labels = each.value.labels
dynamic "taint" {
for_each = each.value.taints
content {
key = taint.value.key
value = taint.value.value
effect = taint.value.effect
}
}
tags = merge(local.common_tags, {
Name = "${local.name_prefix}-${each.key}"
})
depends_on = [
aws_iam_role_policy_attachment.eks_node
]
lifecycle {
ignore_changes = [scaling_config[0].desired_size]
}
}
addons.tf #
hcl
resource "aws_eks_addon" "vpc_cni" {
cluster_name = aws_eks_cluster.main.name
addon_name = "vpc-cni"
addon_version = data.aws_eks_addon_version.vpc_cni.version
resolve_conflicts_on_create = "OVERWRITE"
resolve_conflicts_on_update = "OVERWRITE"
configuration_values = jsonencode({
env = {
ENABLE_PREFIX_DELEGATION = "true"
WARM_PREFIX_TARGET = "1"
}
})
tags = local.common_tags
}
resource "aws_eks_addon" "coredns" {
cluster_name = aws_eks_cluster.main.name
addon_name = "coredns"
addon_version = data.aws_eks_addon_version.coredns.version
resolve_conflicts_on_create = "OVERWRITE"
resolve_conflicts_on_update = "OVERWRITE"
tags = local.common_tags
depends_on = [aws_eks_node_group.main]
}
resource "aws_eks_addon" "kube_proxy" {
cluster_name = aws_eks_cluster.main.name
addon_name = "kube-proxy"
addon_version = data.aws_eks_addon_version.kube_proxy.version
resolve_conflicts_on_create = "OVERWRITE"
resolve_conflicts_on_update = "OVERWRITE"
tags = local.common_tags
}
resource "aws_eks_addon" "ebs_csi_driver" {
cluster_name = aws_eks_cluster.main.name
addon_name = "aws-ebs-csi-driver"
addon_version = data.aws_eks_addon_version.ebs_csi_driver.version
service_account_role_arn = aws_iam_role.ebs_csi_driver[0].arn
resolve_conflicts_on_create = "OVERWRITE"
resolve_conflicts_on_update = "OVERWRITE"
tags = local.common_tags
}
data "aws_eks_addon_version" "vpc_cni" {
addon_name = "vpc-cni"
kubernetes_version = var.cluster_version
most_recent = true
}
data "aws_eks_addon_version" "coredns" {
addon_name = "coredns"
kubernetes_version = var.cluster_version
most_recent = true
}
data "aws_eks_addon_version" "kube_proxy" {
addon_name = "kube-proxy"
kubernetes_version = var.cluster_version
most_recent = true
}
data "aws_eks_addon_version" "ebs_csi_driver" {
addon_name = "aws-ebs-csi-driver"
kubernetes_version = var.cluster_version
most_recent = true
}
resource "aws_iam_role" "ebs_csi_driver" {
count = var.enable_irsa ? 1 : 0
name = "${local.name_prefix}-ebs-csi-driver"
assume_role_policy = jsonencode({
Version = "2012-10-17"
Statement = [
{
Effect = "Allow"
Principal = {
Federated = aws_iam_openid_connect_provider.eks[0].arn
}
Action = "sts:AssumeRoleWithWebIdentity"
Condition = {
StringEquals = {
"${replace(aws_iam_openid_connect_provider.eks[0].url, "https://", "")}:sub" = "system:serviceaccount:kube-system:ebs-csi-controller-sa"
"${replace(aws_iam_openid_connect_provider.eks[0].url, "https://", "")}:aud" = "sts.amazonaws.com"
}
}
}
]
})
tags = local.common_tags
}
resource "aws_iam_role_policy_attachment" "ebs_csi_driver" {
count = var.enable_irsa ? 1 : 0
policy_arn = "arn:aws:iam::aws:policy/service-role/AmazonEBSCSIDriverPolicy"
role = aws_iam_role.ebs_csi_driver[0].name
}
kubernetes.tf #
hcl
data "aws_eks_cluster_auth" "main" {
name = aws_eks_cluster.main.name
}
provider "kubernetes" {
host = aws_eks_cluster.main.endpoint
cluster_ca_certificate = base64decode(aws_eks_cluster.main.certificate_authority[0].data)
token = data.aws_eks_cluster_auth.main.token
}
resource "kubernetes_config_map" "aws_auth" {
metadata {
name = "aws-auth"
namespace = "kube-system"
}
data = {
mapRoles = yamlencode([
{
rolearn = aws_iam_role.eks_node.arn
username = "system:node:{{EC2PrivateDNSName}}"
groups = ["system:bootstrappers", "system:nodes"]
}
])
}
depends_on = [aws_eks_cluster.main]
}
outputs.tf #
hcl
output "cluster_id" {
description = "EKS cluster ID"
value = aws_eks_cluster.main.id
}
output "cluster_name" {
description = "EKS cluster name"
value = aws_eks_cluster.main.name
}
output "cluster_endpoint" {
description = "EKS cluster endpoint"
value = aws_eks_cluster.main.endpoint
}
output "cluster_version" {
description = "Kubernetes version"
value = aws_eks_cluster.main.version
}
output "cluster_certificate_authority_data" {
description = "Base64 encoded certificate authority data"
value = aws_eks_cluster.main.certificate_authority[0].data
}
output "oidc_provider_arn" {
description = "OIDC provider ARN"
value = var.enable_irsa ? aws_iam_openid_connect_provider.eks[0].arn : null
}
output "oidc_provider_url" {
description = "OIDC provider URL"
value = aws_eks_cluster.main.identity[0].oidc[0].issuer
}
output "node_group_arns" {
description = "Node group ARNs"
value = { for k, v in aws_eks_node_group.main : k => v.arn }
}
output "kubectl_config" {
description = "kubectl configuration"
value = "aws eks update-kubeconfig --name ${aws_eks_cluster.main.name} --region ${var.region}"
}
使用示例 #
hcl
module "eks" {
source = "./modules/eks"
project_name = "myproject"
environment = "prod"
cluster_version = "1.28"
node_groups = {
system = {
instance_types = ["t3.medium"]
capacity_type = "ON_DEMAND"
scaling_config = {
desired_size = 2
min_size = 1
max_size = 5
}
labels = {
role = "system"
}
taints = []
}
workload = {
instance_types = ["t3.large"]
capacity_type = "SPOT"
scaling_config = {
desired_size = 3
min_size = 1
max_size = 10
}
labels = {
role = "workload"
}
taints = []
}
}
enable_irsa = true
tags = {
Owner = "platform-team"
}
}
部署步骤 #
bash
terraform init
terraform plan
terraform apply
aws eks update-kubeconfig --name mycluster --region us-east-1
总结 #
恭喜你完成了 Terraform 学习之旅!通过本系列文档,你已经掌握了从基础到高级的 Terraform 知识,可以开始在实际项目中应用了!
最后更新:2026-03-29