terraformrunbook

RUNBOOK

RUNBOOKLast updated: 1/31/2026

Terraform & OpenTofu: Operational Runbook

Production-ready procedures for deploying, maintaining, and troubleshooting Terraform-managed infrastructure.


Table of Contents

  1. Installation & Setup
  2. Initial Configuration
  3. Standard Deployment
  4. State Management
  5. Workspace Management
  6. Provider Configuration
  7. Module Management
  8. Upgrading Terraform
  9. Disaster Recovery
  10. Troubleshooting
  11. Performance Tuning
  12. Security Hardening

1. Installation & Setup

Prerequisites

# Check system requirements
uname -m                    # x86_64 or arm64
uname -s                    # Linux, Darwin (macOS), or Windows

# Verify installed tools
which git
which aws  # or azure, gcloud

Linux Installation

# Ubuntu/Debian
wget https://apt.releases.hashicorp.com/gpg
apt-key add gpg
apt-add-repository "deb [arch=amd64] https://apt.releases.hashicorp.com $(lsb_release -cs) main"
apt-get update
apt-get install terraform

# Verify installation
terraform version

macOS Installation

# Using Homebrew
brew tap hashicorp/tap
brew install hashicorp/tap/terraform

# Using native binary
curl -fsSL https://apt.releases.hashicorp.com/terraform_1.5.0_darwin_amd64.zip -o terraform.zip
unzip terraform.zip
sudo mv terraform /usr/local/bin/

# Verify
terraform version

Windows Installation

# Using Chocolatey
choco install terraform

# Or download from releases
Invoke-WebRequest -Uri "https://releases.hashicorp.com/terraform/1.5.0/terraform_1.5.0_windows_amd64.zip" -OutFile "terraform.zip"
Expand-Archive terraform.zip -DestinationPath $env:ProgramFiles\Terraform\

# Add to PATH
$env:Path += ";$env:ProgramFiles\Terraform"

Provider Authentication

AWS

# Option 1: Environment variables
export AWS_ACCESS_KEY_ID="<access_key>"
export AWS_SECRET_ACCESS_KEY="<secret_key>"
export AWS_DEFAULT_REGION="us-east-1"

# Option 2: AWS credentials file (~/.aws/credentials)
[default]
aws_access_key_id = <access_key>
aws_secret_access_key = <secret_key>

# Option 3: IAM role (recommended for production)
provider "aws" {
  assume_role {
    role_arn = "arn:aws:iam::123456789012:role/terraform"
  }
}

# Verify connection
aws sts get-caller-identity
terraform init

Azure

# Login to Azure
az login

# Set subscription
az account set --subscription="<subscription_id>"

# Verify
az account show

GCP

# Authenticate
gcloud auth application-default login

# Set project
gcloud config set project <project_id>

# Verify
gcloud config list

2. Initial Configuration

Project Structure Setup

# Create project directory
mkdir terraform-project
cd terraform-project

# Create standard structure
mkdir modules
mkdir environments/{dev,staging,prod}
touch versions.tf providers.tf main.tf variables.tf outputs.tf terraform.tfvars.example

# Initialize git
git init
echo 'terraform.tfstate' >> .gitignore
echo 'terraform.tfvars' >> .gitignore
echo '.terraform/' >> .gitignore
echo '*.tfplan' >> .gitignore
echo '*.tfstate.*' >> .gitignore

git add .
git commit -m "Initial Terraform project structure"

Provider Configuration

# versions.tf
terraform {
  required_version = ">= 1.0"
  required_providers {
    aws = {
      source  = "hashicorp/aws"
      version = "~> 5.0"
    }
  }
}

# providers.tf
provider "aws" {
  region = var.aws_region

  default_tags {
    tags = {
      Environment = var.environment
      Project     = var.project_name
      ManagedBy   = "Terraform"
      CreatedAt   = timestamp()
    }
  }
}

# variables.tf
variable "aws_region" {
  type    = string
  default = "us-east-1"
}

variable "environment" {
  type = string
  validation {
    condition     = contains(["dev", "staging", "prod"], var.environment)
    error_message = "Environment must be dev, staging, or prod."
  }
}

variable "project_name" {
  type = string
}

3. Standard Deployment

Deployment Workflow

# Step 1: Initialize
terraform init

# Expected output:
# Initializing the backend...
# Initializing provider plugins...
# Terraform has been successfully initialized!

# Step 2: Validate configuration
terraform validate

# Expected output:
# Success! The configuration is valid.

# Step 3: Format check
terraform fmt -check -recursive

# Expected output:
# (no output = all files properly formatted)

# Step 4: Create plan
terraform plan -out=tfplan

# Expected output:
# Plan: X to add, Y to change, Z to destroy.

# Step 5: Review plan
cat tfplan  # Binary format, use terraform show tfplan for readable output
terraform show tfplan

# Step 6: Apply (production requires approval)
terraform apply tfplan

# Expected output:
# Apply complete! Resources: X added, Y changed, Z destroyed.

# Step 7: Verify
terraform output
aws ec2 describe-instances --region us-east-1

Multi-Environment Deployment

# Development environment
cd environments/dev
terraform init -backend-config="key=dev/terraform.tfstate"
terraform plan -var-file="terraform.tfvars"
terraform apply

# Staging environment
cd ../staging
terraform init -backend-config="key=staging/terraform.tfstate"
terraform plan -var-file="terraform.tfvars"
terraform apply

# Production environment (with approval)
cd ../prod
terraform init -backend-config="key=prod/terraform.tfstate"
terraform plan -var-file="terraform.tfvars" -out=prod.tfplan
# Manual review and approval
terraform apply prod.tfplan

Targeted Deployment

# Deploy specific resource
terraform plan -target=aws_instance.web
terraform apply -target=aws_instance.web

# Deploy specific module
terraform plan -target=module.vpc
terraform apply -target=module.vpc

# Deploy multiple resources
terraform plan -target=aws_instance.web -target=aws_security_group.web

4. State Management

Remote State Setup (S3 + DynamoDB)

# Create S3 bucket for state
aws s3api create-bucket --bucket terraform-state-$(date +%s) --region us-east-1

# Enable versioning
aws s3api put-bucket-versioning \
  --bucket terraform-state-123456 \
  --versioning-configuration Status=Enabled

# Enable encryption
aws s3api put-bucket-encryption \
  --bucket terraform-state-123456 \
  --server-side-encryption-configuration '{
    "Rules": [{
      "ApplyServerSideEncryptionByDefault": {
        "SSEAlgorithm": "AES256"
      }
    }]
  }'

# Block public access
aws s3api put-public-access-block \
  --bucket terraform-state-123456 \
  --public-access-block-configuration \
  "BlockPublicAcls=true,IgnorePublicAcls=true,BlockPublicPolicy=true,RestrictPublicBuckets=true"

# Create DynamoDB table for locking
aws dynamodb create-table \
  --table-name terraform-locks \
  --attribute-definitions AttributeName=LockID,AttributeType=S \
  --key-schema AttributeName=LockID,KeyType=HASH \
  --billing-mode PAY_PER_REQUEST \
  --region us-east-1

Configure Remote Backend

# terraform.tf
terraform {
  backend "s3" {
    bucket         = "terraform-state-123456"
    key            = "prod/terraform.tfstate"
    region         = "us-east-1"
    encrypt        = true
    dynamodb_table = "terraform-locks"
  }
}

Migrate to Remote Backend

# Current state: Local file (terraform.tfstate)
ls -la terraform.tfstate

# Initialize with remote backend
terraform init

# Confirm migration
# When prompted, enter "yes" to migrate state
terraform.tfstate -> terraform state pulled and stored in S3

# Verify
terraform state list
aws s3 ls s3://terraform-state-123456/prod/

State Inspection

# List resources in state
terraform state list
# Output:
# aws_instance.web[0]
# aws_instance.web[1]
# aws_security_group.web

# Show specific resource
terraform state show aws_instance.web[0]
# Output:
# # aws_instance.web[0]:
# resource "aws_instance" "web" {
#   ami           = "ami-12345678"
#   instance_type = "t2.micro"
#   ...
# }

# Extract state to JSON
terraform state pull > state.json
cat state.json | jq '.resources[] | select(.type=="aws_instance")'

State Backup

# Manual backup
terraform state pull > terraform.tfstate.backup

# Automated daily backup
#!/bin/bash
BACKUP_DIR="/backups/terraform"
mkdir -p $BACKUP_DIR
DATE=$(date +%Y-%m-%d-%H%M%S)
terraform state pull > $BACKUP_DIR/terraform.tfstate.$DATE
# Compress older than 30 days
find $BACKUP_DIR -type f -mtime +30 -exec gzip {} \;

5. Workspace Management

Workspace Operations

# List workspaces
terraform workspace list
# Output:
#   default
# * staging
#   production

# Create workspace
terraform workspace new production
# Created and switched to workspace "production"!

# Switch workspace
terraform workspace select staging

# Show current workspace
terraform workspace show

# Delete workspace (must be default or unused)
terraform workspace delete staging
# Deleted workspace "staging"!

Environment Isolation with Workspaces

# terraform.tfvars
instance_count = 1
instance_type  = "t2.micro"

# main.tf
resource "aws_instance" "web" {
  count         = var.instance_count
  ami           = "ami-12345678"
  instance_type = var.instance_type

  tags = {
    Name        = "web-${terraform.workspace}-${count.index}"
    Environment = terraform.workspace
  }
}

# Deployment workflow
terraform workspace select dev
terraform apply  # Creates 1 t2.micro in dev workspace

terraform workspace select prod
terraform apply  # Creates 3 t3.large in prod workspace (if tfvars changed)

6. Provider Configuration

Multi-Provider Setup

# providers.tf

# AWS provider (default)
provider "aws" {
  region = "us-east-1"
}

# AWS provider (different region)
provider "aws" {
  alias  = "us-west"
  region = "us-west-2"
}

# Azure provider
provider "azurerm" {
  features {}
  subscription_id = var.azure_subscription_id
}

# GCP provider
provider "google" {
  project = var.gcp_project_id
  region  = "us-central1"
}

# Kubernetes provider
provider "kubernetes" {
  host                   = aws_eks_cluster.main.endpoint
  cluster_ca_certificate = base64decode(aws_eks_cluster.main.certificate_authority[0].data)
  token                  = data.aws_eks_cluster_auth.main.token
}

Provider Upgrade

# Check available versions
terraform init -upgrade

# Specify provider version
terraform init -upgrade -var 'provider_version=5.0'

# Verify
terraform version

# Plan and apply
terraform plan
terraform apply

7. Module Management

Module Structure Creation

# Create module directory
mkdir -p modules/vpc
cd modules/vpc

# Create module files
touch main.tf variables.tf outputs.tf README.md

# Example module structure
cat > main.tf << 'EOF'
resource "aws_vpc" "main" {
  cidr_block           = var.cidr_block
  enable_dns_hostnames = true

  tags = {
    Name = var.name
  }
}

resource "aws_subnet" "private" {
  count             = length(var.private_subnets)
  vpc_id            = aws_vpc.main.id
  cidr_block        = var.private_subnets[count.index]
  availability_zone = var.azs[count.index]

  tags = {
    Name = "${var.name}-private-${count.index}"
  }
}
EOF

cat > variables.tf << 'EOF'
variable "name" {
  type = string
}

variable "cidr_block" {
  type = string
}

variable "private_subnets" {
  type = list(string)
}

variable "azs" {
  type = list(string)
}
EOF

cat > outputs.tf << 'EOF'
output "vpc_id" {
  value = aws_vpc.main.id
}

output "subnet_ids" {
  value = aws_subnet.private[*].id
}
EOF

Module Usage

# main.tf

module "vpc" {
  source = "./modules/vpc"

  name            = "production-vpc"
  cidr_block      = "10.0.0.0/16"
  private_subnets = ["10.0.1.0/24", "10.0.2.0/24", "10.0.3.0/24"]
  azs            = ["us-east-1a", "us-east-1b", "us-east-1c"]
}

resource "aws_instance" "web" {
  subnet_id = module.vpc.subnet_ids[0]
  # ... other configuration
}

Remote Module Usage

# From Terraform Registry
module "eks" {
  source  = "terraform-aws-modules/eks/aws"
  version = "~> 19.0"

  cluster_name    = "my-cluster"
  cluster_version = "1.27"
  vpc_id          = module.vpc.vpc_id
  subnet_ids      = module.vpc.subnet_ids
}

# From Git repository
module "monitoring" {
  source = "git::https://github.com/example/terraform-monitoring.git//modules/prometheus?ref=v1.0"

  prometheus_enabled = true
}

8. Upgrading Terraform

Version Compatibility Check

# Check current version
terraform version
# Terraform v1.5.0

# Check required version in code
grep -r "required_version" .
# terraform {
#   required_version = ">= 1.0"
# }

# Check provider versions
grep -r "required_providers" .

Upgrade Procedure

# Step 1: Backup state
terraform state pull > terraform.tfstate.backup

# Step 2: Check compatibility
# Visit: https://www.terraform.io/upgrade-guides

# Step 3: Update versions.tf
cat > versions.tf << 'EOF'
terraform {
  required_version = ">= 1.5"
  required_providers {
    aws = {
      source  = "hashicorp/aws"
      version = "~> 5.0"
    }
  }
}
EOF

# Step 4: Reinitialize
terraform init -upgrade

# Step 5: Validate
terraform validate

# Step 6: Plan (verify no changes)
terraform plan
# Expected: Plan: 0 to add, 0 to change, 0 to destroy.

# Step 7: Document upgrade
git add versions.tf
git commit -m "Upgrade Terraform to 1.5.0, AWS provider to 5.0"

9. Disaster Recovery

State File Recovery

# Scenario: State file corrupted or lost

# Step 1: Verify backup exists
ls -la terraform.tfstate.backup

# Step 2: Restore from backup
cp terraform.tfstate.backup terraform.tfstate

# Step 3: Verify state
terraform state list

# Step 4: If remote backend corrupted
# Pull from S3
aws s3 cp s3://terraform-state-123456/prod/terraform.tfstate terraform.tfstate.recovered

# Step 5: Validate restored state
terraform validate
terraform plan  # Should show: Plan: 0 to add, 0 to change, 0 to destroy.

Resource Recovery

# Scenario: Resource accidentally destroyed by Terraform

# Step 1: Check state list
terraform state list

# Step 2: Verify resource not in state
terraform state show aws_instance.web
# Error: No instance found

# Step 3: Recreate resource manually (if needed immediately)
aws ec2 run-instances --image-id ami-12345 --instance-type t2.micro

# Step 4: Import resource back to state
terraform import aws_instance.web i-1234567890abcdef0

# Step 5: Update resource configuration in code
resource "aws_instance" "web" {
  ami           = "ami-12345"
  instance_type = "t2.micro"
}

# Step 6: Verify import
terraform state show aws_instance.web
terraform plan  # Should show: Plan: 0 to add, 0 to change, 0 to destroy.

Rollback Procedure

# Scenario: Applied configuration caused issues

# Step 1: Check git history
git log --oneline

# Step 2: Review problematic commit
git show <commit_hash>

# Step 3: Revert code changes
git revert <commit_hash>

# Step 4: Plan rollback
terraform plan -destroy -out=rollback.tfplan

# Step 5: Apply destruction (if complete rollback needed)
terraform apply rollback.tfplan

# Alternative: Restore from previous terraform.tfstate
git checkout HEAD~1 -- terraform.tfstate
terraform apply  # Will recreate or modify to match old state

10. Troubleshooting

State Lock Issues

# Error: Error acquiring the state lock
# Cause: Another user has lock (or lock abandoned)

# Check lock in DynamoDB
aws dynamodb scan \
  --table-name terraform-locks \
  --region us-east-1

# Force unlock (dangerous - use only if certain)
terraform force-unlock <lock_id>

# Example:
terraform force-unlock e7a56551-a2c0-42e8-9358-2354211f1234

Provider Authentication Errors

# Error: error configuring AWS Provider: ValidationError: 1 validation error(s) found

# Solution: Verify credentials
aws sts get-caller-identity
# If fails: Configure AWS credentials

# Check environment variables
echo $AWS_ACCESS_KEY_ID
echo $AWS_SECRET_ACCESS_KEY

# Or use AWS CLI to configure
aws configure
# Enter: Access Key ID, Secret Access Key, Region

Resource Already Exists

# Error: Error: resource already exists in AWS

# Scenario: Resource created manually, now trying via Terraform

# Solution 1: Import resource
terraform import aws_instance.web i-1234567890abcdef0

# Solution 2: Remove from Terraform state
terraform state rm aws_instance.web
# Then create manually and re-import

# Solution 3: Check for naming conflicts
terraform plan -json | grep "violation"

Backend Configuration Issues

# Error: Failed to get existing workspaces
# Cause: Backend not properly configured

# Solution 1: Reinitialize
rm -rf .terraform .terraform.lock.hcl
terraform init

# Solution 2: Check backend config
cat terraform.tf | grep -A 10 "backend"

# Solution 3: Verify S3 bucket exists
aws s3 ls s3://terraform-state-123456/

# Solution 4: Check IAM permissions
aws iam get-user
# Verify user has: s3:GetObject, s3:PutObject, dynamodb:DescribeTable

Module Errors

# Error: Error downloading module
# Cause: Git repository not accessible or wrong ref

# Verify Git access
git ls-remote https://github.com/example/terraform-module.git

# Check module version
cat .terraform/modules/modules.json | jq '.modules[] | {key, source}'

# Force redownload
rm -rf .terraform/modules
terraform init -upgrade

11. Performance Tuning

Parallel Operations

# Default: 10 parallel operations
terraform apply

# Increase parallelism
terraform apply -parallelism=20

# Useful for large deployments with 100+ resources

State Locking Optimization

# Disable state locking (use only for read-only operations)
terraform init -backend-config="skip_credentials_validation=true"

# Increase DynamoDB throughput for faster locking
aws dynamodb update-table \
  --table-name terraform-locks \
  --provisioned-throughput ReadCapacityUnits=10,WriteCapacityUnits=10

Provider Performance

# Enable debug logging for performance analysis
TF_LOG=DEBUG terraform plan 2>&1 | tee terraform.log

# Analyze slow operations
grep "Duration:" terraform.log | sort -t= -k2 -rn | head -20

12. Security Hardening

Sensitive Data Protection

# Mark passwords and keys as sensitive
variable "db_password" {
  type      = string
  sensitive = true
}

output "db_endpoint" {
  value     = aws_db_instance.main.endpoint
  sensitive = true
}

# Won't display in console output

State File Encryption

# S3 server-side encryption
aws s3api put-bucket-encryption \
  --bucket terraform-state-123456 \
  --server-side-encryption-configuration '{
    "Rules": [{
      "ApplyServerSideEncryptionByDefault": {
        "SSEAlgorithm": "aws:kms",
        "KMSMasterKeyID": "arn:aws:kms:us-east-1:123456789012:key/12345678"
      }
    }]
  }'

# Enable versioning for recovery
aws s3api put-bucket-versioning \
  --bucket terraform-state-123456 \
  --versioning-configuration Status=Enabled

Access Control

# Restrict who can modify Terraform state
aws s3api put-bucket-policy --bucket terraform-state-123456 \
  --policy '{
    "Version": "2012-10-17",
    "Statement": [{
      "Effect": "Allow",
      "Principal": {
        "AWS": "arn:aws:iam::123456789012:role/terraform"
      },
      "Action": "s3:*",
      "Resource": "arn:aws:s3:::terraform-state-123456/*"
    }]
  }'

Quick Reference: Common Commands

TaskCommand
Initializeterraform init
Validateterraform validate
Formatterraform fmt -recursive
Planterraform plan -out=tfplan
Applyterraform apply tfplan
Destroyterraform destroy
Importterraform import aws_instance.web i-12345
Outputterraform output
Stateterraform state list
Refreshterraform refresh
Workspaceterraform workspace select prod
Lock infoterraform state lock-info

Document Version: 1.0
Last Updated: January 31, 2026
Contact: Infrastructure & DevOps Team