Terraform 数据源 #

什么是数据源? #

数据源(Data Source)允许 Terraform 查询和引用外部定义的基础设施资源。与资源不同,数据源只读取数据,不会创建或修改资源。

text
┌─────────────────────────────────────────────────────────────┐
│                    数据源的作用                              │
├─────────────────────────────────────────────────────────────┤
│                                                             │
│  ┌─────────────┐     ┌─────────────┐     ┌─────────────┐   │
│  │ 查询已有资源 │     │ 获取动态信息 │     │ 资源依赖    │   │
│  └─────────────┘     └─────────────┘     └─────────────┘   │
│                                                             │
│  - 查询现有 VPC、子网                                      │
│  - 获取最新 AMI ID                                         │
│  - 读取配置信息                                            │
│                                                             │
└─────────────────────────────────────────────────────────────┘

数据源语法 #

基本语法 #

hcl
data "<PROVIDER>_<TYPE>" "<NAME>" {
  <CONFIG>
}

简单示例 #

hcl
data "aws_vpc" "selected" {
  id = var.vpc_id
}

resource "aws_subnet" "example" {
  vpc_id     = data.aws_vpc.selected.id
  cidr_block = "10.0.1.0/24"
}

数据源引用 #

hcl
data.<TYPE>.<NAME>.<ATTRIBUTE>

data.aws_vpc.selected.id
data.aws_ami.amazon_linux.id
data.aws_availability_zones.available.names

AWS 数据源 #

查询 AMI #

hcl
data "aws_ami" "amazon_linux" {
  most_recent = true
  owners      = ["amazon"]
  
  filter {
    name   = "name"
    values = ["amzn2-ami-hvm-*-x86_64-gp2"]
  }
  
  filter {
    name   = "virtualization-type"
    values = ["hvm"]
  }
}

resource "aws_instance" "example" {
  ami           = data.aws_ami.amazon_linux.id
  instance_type = "t2.micro"
}

查询 VPC #

hcl
data "aws_vpc" "selected" {
  filter {
    name   = "tag:Environment"
    values = ["production"]
  }
}

data "aws_vpc" "by_id" {
  id = var.vpc_id
}

data "aws_vpcs" "all" {
  filter {
    name   = "tag:Environment"
    values = ["production"]
  }
}

查询子网 #

hcl
data "aws_subnets" "public" {
  filter {
    name   = "vpc-id"
    values = [data.aws_vpc.selected.id]
  }
  
  filter {
    name   = "tag:Tier"
    values = ["public"]
  }
}

data "aws_subnet" "public" {
  for_each = toset(data.aws_subnets.public.ids)
  id       = each.value
}

resource "aws_instance" "example" {
  subnet_id = data.aws_subnets.public.ids[0]
  
  ami           = "ami-0c55b159cbfafe1f0"
  instance_type = "t2.micro"
}

查询可用区 #

hcl
data "aws_availability_zones" "available" {
  state = "available"
}

resource "aws_subnet" "public" {
  count             = 3
  vpc_id            = aws_vpc.main.id
  cidr_block        = cidrsubnet(var.vpc_cidr, 8, count.index)
  availability_zone = data.aws_availability_zones.available.names[count.index]
}

查询安全组 #

hcl
data "aws_security_group" "selected" {
  name = "default"
}

data "aws_security_groups" "web" {
  filter {
    name   = "tag:Name"
    values = ["web-*"]
  }
}

查询 IAM 角色 #

hcl
data "aws_iam_role" "ecs_task_execution" {
  name = "ecsTaskExecutionRole"
}

resource "aws_ecs_task_definition" "example" {
  execution_role_arn = data.aws_iam_role.ecs_task_execution.arn
  
  family = "example"
}

查询 S3 存储桶 #

hcl
data "aws_s3_bucket" "logs" {
  bucket = "my-logs-bucket"
}

data "aws_s3_bucket_object" "config" {
  bucket = "my-config-bucket"
  key    = "config.json"
}

查询 RDS #

hcl
data "aws_db_instance" "database" {
  db_instance_identifier = "my-database"
}

output "db_endpoint" {
  value = data.aws_db_instance.database.endpoint
}

查询 EKS 集群 #

hcl
data "aws_eks_cluster" "cluster" {
  name = var.cluster_name
}

data "aws_eks_cluster_auth" "cluster" {
  name = var.cluster_name
}

provider "kubernetes" {
  host                   = data.aws_eks_cluster.cluster.endpoint
  cluster_ca_certificate = base64decode(data.aws_eks_cluster.cluster.certificate_authority[0].data)
  token                  = data.aws_eks_cluster_auth.cluster.token
}

查询 Route53 托管区域 #

hcl
data "aws_route53_zone" "main" {
  name         = "example.com."
  private_zone = false
}

resource "aws_route53_record" "www" {
  zone_id = data.aws_route53_zone.main.zone_id
  name    = "www.${data.aws_route53_zone.main.name}"
  type    = "A"
  ttl     = 300
  records = [aws_instance.web.public_ip]
}

查询 ACM 证书 #

hcl
data "aws_acm_certificate" "issued" {
  domain      = "example.com"
  statuses    = ["ISSUED"]
  most_recent = true
}

resource "aws_lb_listener" "https" {
  load_balancer_arn = aws_lb.main.arn
  port              = 443
  protocol          = "HTTPS"
  
  certificate_arn = data.aws_acm_certificate.issued.arn
  
  default_action {
    type             = "forward"
    target_group_arn = aws_lb_target_group.main.arn
  }
}

查询 Secrets Manager #

hcl
data "aws_secretsmanager_secret_version" "db_password" {
  secret_id = "my-db-password"
}

resource "aws_db_instance" "example" {
  password = data.aws_secretsmanager_secret_version.db_password.secret_string
}

查询 SSM Parameter #

hcl
data "aws_ssm_parameter" "db_password" {
  name = "/database/password"
}

resource "aws_db_instance" "example" {
  password = data.aws_ssm_parameter.db_password.value
}

其他常用数据源 #

HTTP 数据源 #

hcl
data "http" "ip" {
  url = "https://api.ipify.org"
}

resource "aws_security_group_rule" "ingress" {
  type              = "ingress"
  from_port         = 22
  to_port           = 22
  protocol          = "tcp"
  cidr_blocks       = ["${data.http.ip.response_body}/32"]
  security_group_id = aws_security_group.main.id
}

本地文件数据源 #

hcl
data "local_file" "config" {
  filename = "${path.module}/config.json"
}

resource "aws_s3_bucket_object" "config" {
  bucket  = "my-bucket"
  key     = "config.json"
  content = data.local_file.config.content
}

模板数据源 #

hcl
data "template_file" "user_data" {
  template = file("${path.module}/user_data.sh.tpl")
  
  vars = {
    environment = var.environment
    region      = var.region
  }
}

resource "aws_instance" "example" {
  user_data = data.template_file.user_data.rendered
}

Terraform 远程状态 #

hcl
data "terraform_remote_state" "vpc" {
  backend = "s3"
  
  config = {
    bucket = "my-terraform-state"
    key    = "vpc/terraform.tfstate"
    region = "us-east-1"
  }
}

resource "aws_instance" "example" {
  subnet_id = data.terraform_remote_state.vpc.outputs.subnet_id
}

外部数据源 #

hcl
data "external" "example" {
  program = ["python", "${path.module}/get_data.py"]
  
  query = {
    environment = var.environment
  }
}

output "result" {
  value = data.external.example.result
}

数据源过滤 #

使用 filter #

hcl
data "aws_ami" "ubuntu" {
  most_recent = true
  owners      = ["099720109477"]
  
  filter {
    name   = "name"
    values = ["ubuntu/images/hvm-ssd/ubuntu-focal-20.04-amd64-server-*"]
  }
  
  filter {
    name   = "virtualization-type"
    values = ["hvm"]
  }
  
  filter {
    name   = "root-device-type"
    values = ["ebs"]
  }
}

使用标签过滤 #

hcl
data "aws_vpc" "selected" {
  filter {
    name   = "tag:Environment"
    values = ["production"]
  }
  
  filter {
    name   = "tag:Project"
    values = ["my-project"]
  }
}

使用 for_each #

hcl
variable "subnet_ids" {
  default = ["subnet-1", "subnet-2", "subnet-3"]
}

data "aws_subnet" "selected" {
  for_each = toset(var.subnet_ids)
  id       = each.value
}

output "subnet_cidrs" {
  value = {
    for k, v in data.aws_subnet.selected : k => v.cidr_block
  }
}

数据源 vs 资源 #

text
┌─────────────────────────────────────────────────────────────┐
│                    数据源 vs 资源                            │
├─────────────────────────────────────────────────────────────┤
│                                                             │
│  资源(Resource):                                         │
│  - 创建和管理基础设施                                      │
│  - 有生命周期                                              │
│  - 可以修改                                                │
│  - terraform apply 时创建                                  │
│                                                             │
│  数据源(Data Source):                                    │
│  - 只读取信息                                              │
│  - 无生命周期                                              │
│  - 只读                                                    │
│  - terraform plan 时读取                                   │
│                                                             │
│  选择原则:                                                 │
│  - 需要创建新资源?用资源                                  │
│  - 需要引用已有资源?用数据源                              │
│                                                             │
└─────────────────────────────────────────────────────────────┘

数据源最佳实践 #

1. 使用描述性名称 #

hcl
data "aws_vpc" "production" {
  filter {
    name   = "tag:Environment"
    values = ["production"]
  }
}

data "aws_subnets" "public_app" {
  filter {
    name   = "tag:Tier"
    values = ["public"]
  }
  
  filter {
    name   = "tag:Application"
    values = ["app"]
  }
}

2. 集中管理数据源 #

text
project/
├── main.tf
├── variables.tf
├── outputs.tf
├── data.tf      数据源定义
└── locals.tf

3. 使用 locals 简化引用 #

hcl
data "aws_vpc" "selected" {
  id = var.vpc_id
}

data "aws_subnets" "public" {
  filter {
    name   = "vpc-id"
    values = [data.aws_vpc.selected.id]
  }
  
  filter {
    name   = "tag:Tier"
    values = ["public"]
  }
}

locals {
  vpc_id     = data.aws_vpc.selected.id
  vpc_cidr   = data.aws_vpc.selected.cidr_block
  subnet_ids = data.aws_subnets.public.ids
}

4. 条件数据源 #

hcl
data "aws_vpc" "selected" {
  count = var.vpc_id != null ? 1 : 0
  id    = var.vpc_id
}

locals {
  vpc_id = var.vpc_id != null ? data.aws_vpc.selected[0].id : aws_vpc.main.id
}

完整示例 #

查询网络资源 #

hcl
data "aws_vpc" "selected" {
  filter {
    name   = "tag:Environment"
    values = [var.environment]
  }
}

data "aws_subnets" "private" {
  filter {
    name   = "vpc-id"
    values = [data.aws_vpc.selected.id]
  }
  
  filter {
    name   = "tag:Tier"
    values = ["private"]
  }
}

data "aws_subnet" "private" {
  for_each = toset(data.aws_subnets.private.ids)
  id       = each.value
}

data "aws_security_group" "default" {
  vpc_id = data.aws_vpc.selected.id
  name   = "default"
}

data "aws_availability_zones" "available" {
  state = "available"
}

查询计算资源 #

hcl
data "aws_ami" "amazon_linux" {
  most_recent = true
  owners      = ["amazon"]
  
  filter {
    name   = "name"
    values = ["amzn2-ami-hvm-*-x86_64-gp2"]
  }
}

data "aws_iam_role" "ecs_task_execution" {
  name = "ecsTaskExecutionRole"
}

data "aws_iam_policy_document" "assume_role" {
  statement {
    actions = ["sts:AssumeRole"]
    
    principals {
      type        = "Service"
      identifiers = ["ec2.amazonaws.com"]
    }
  }
}

查询配置数据 #

hcl
data "aws_ssm_parameter" "db_endpoint" {
  name = "/${var.environment}/database/endpoint"
}

data "aws_secretsmanager_secret_version" "db_credentials" {
  secret_id = "${var.environment}/database/credentials"
}

data "aws_s3_bucket_object" "config" {
  bucket = var.config_bucket
  key    = "${var.environment}/config.json"
}

locals {
  db_config = jsondecode(data.aws_s3_bucket_object.config.body)
  db_credentials = jsondecode(data.aws_secretsmanager_secret_version.db_credentials.secret_string)
}

下一步 #

掌握了数据源后,接下来学习 状态管理,了解 Terraform 状态文件的概念和管理方法!

最后更新:2026-03-29