google-cloud-platformterraformgce-instance-groupautoscalemode

How to configure autoscaler in managed instance group to autoscale when required?


I have successfully set up a managed instance group and load balancer using the guide available at guide. However, I am encountering an issue where, when I stress the CPU continuously for 5 minutes, there is no autoscaling triggered. Despite the load increasing significantly, the system does not scale as expected.

I would appreciate it if someone could help me identify what might be causing this behavior. Below is the Terraform script I used to configure the setup:

resource "google_compute_network" "vpc_network" {
  name = "${var.gcp_env}-${var.gcp_project}-vpc"
  auto_create_subnetworks  = false
}

resource "google_compute_subnetwork" "subnet1" {
  name = "${var.gcp_env}-${var.gcp_project}-subnet-1"
  region = "${var.gcp_region}"
  network = google_compute_network.vpc_network.self_link
  ip_cidr_range = "11.0.0.0/24"
}

resource "google_compute_global_address" "lb-ip" {
  name = "${var.gcp_env}-${var.gcp_project}-load-balancer-ip"
  lifecycle {
    prevent_destroy = false
  }
}

resource "google_compute_address" "mig-ip" {
  name = "${var.gcp_env}-${var.gcp_project}-mig-ip-1"
  lifecycle {
    prevent_destroy = false
  }
}

resource "google_compute_instance_template" "debian12-template" {
  name = "${var.gcp_env}-${var.gcp_project}-debian12-template"
  region = var.gcp_region
  machine_type = "e2-standard-2"
  disk {
    auto_delete = true
    boot = true
    device_name = "${var.gcp_env}-${var.gcp_project}-boot-disk"
    disk_size_gb = 30
    source_image = "projects/uat-aarogyadoot/global/images/uat-aarogyadoot-instance-image"
  }
  network_interface {
    network = google_compute_network.vpc_network.name
    subnetwork = google_compute_subnetwork.subnet1.name
    access_config {
      nat_ip = google_compute_address.mig-ip.address
    }
  }
  depends_on = [google_compute_address.mig-ip]
  tags = ["firewall-for-mig"]
}

resource "google_compute_region_instance_group_manager" "mig" {
  name = "${var.gcp_env}-${var.gcp_project}-managed-instance-group"
  version {
    instance_template = google_compute_instance_template.debian12-template.self_link
  }
  distribution_policy_zones = ["${var.gcp_region}-a", "${var.gcp_region}-b", "${var.gcp_region}-c"]
  base_instance_name = "${var.gcp_env}-${var.gcp_project}-instance"
  # target_size = 1
  region = var.gcp_region  
  named_port {
    name = "${var.gcp_env}-${var.gcp_project}-named-port"
    port = 80
  }
  ### This makes it stateful, I want stateless so I commented it.
  # stateful_internal_ip {
  #   interface_name = "nic0"
  #   delete_rule    = "ON_PERMANENT_INSTANCE_DELETION"
  # }
  update_policy {
    type                         = "PROACTIVE"
    minimal_action               = "REFRESH"
    instance_redistribution_type = "NONE"
    max_unavailable_fixed        = 3
    replacement_method= "RECREATE"
  }
}

resource "google_compute_region_autoscaler" "autoscaler" {
  name = "${var.gcp_env}-${var.gcp_project}-autoscaler"
  region = var.gcp_region
  # zone = "${var.gcp_region}-a"
  target = google_compute_region_instance_group_manager.mig.id
  depends_on = [google_compute_region_instance_group_manager.mig]
  autoscaling_policy {
    mode = "ON"
    cooldown_period = 60
    cpu_utilization {
      target = 0.75
    }
    max_replicas = 2
    min_replicas = 1
    scaling_schedules {
      name = "every-weekday-morning"
      min_required_replicas = 2
      schedule = "0 9 * * MON-SAT"
      time_zone = "Asia/Kolkata"
      duration_sec = 32400
      }
    scale_in_control {
      max_scaled_in_replicas {
        fixed = 1
      }
      time_window_sec = 1800
    }
  }
}
resource "google_compute_firewall" "firewall-for-mig" {
  name          = "${var.gcp_env}-${var.gcp_project}-firewall-for-mig"
  direction     = "INGRESS"
  network       = google_compute_network.vpc_network.self_link
  priority      = 1000
  source_ranges = ["0.0.0.0/0"]
  target_tags   = ["firewall-for-mig"]
  allow {
    ports    = ["80", "22"]
    protocol = "tcp"
  }
}

resource "google_compute_health_check" "health-check" {
  name               = "${var.gcp_env}-${var.gcp_project}-http-basic-check"
  check_interval_sec = 5
  healthy_threshold  = 2
  http_health_check {
    port               = 80
    port_specification = "USE_FIXED_PORT"
    proxy_header       = "NONE"
    request_path       = "/"
  }
  timeout_sec         = 5
  unhealthy_threshold = 2
}

resource "google_compute_backend_service" "backend-service" {
  name                            = "${var.gcp_env}-${var.gcp_project}-backend-service"
  connection_draining_timeout_sec = 0
  health_checks                   = [google_compute_health_check.health-check.id]
  load_balancing_scheme           = "EXTERNAL_MANAGED"
  port_name                       = "http"
  protocol                        = "HTTP"
  session_affinity                = "NONE"
  timeout_sec                     = 30
  enable_cdn = true
  backend {
    group           = google_compute_region_instance_group_manager.mig.instance_group
    balancing_mode  = "UTILIZATION"
    capacity_scaler = 1.0
  }
}

resource "google_compute_url_map" "url-map-backend" {
  name            = "${var.gcp_env}-${var.gcp_project}-alb"
  default_service = google_compute_backend_service.backend-service.id
}

resource "google_compute_target_http_proxy" "http-lb-proxy" {
  name    = "${var.gcp_env}-${var.gcp_project}-http-lb-proxy"
  url_map = google_compute_url_map.url-map-backend.id
}

Solution

  • The issue is with the below block, as you are using a MIG, you can not assign the same Public IP Address to more than one GCE instance, that is why the autoscaler is triggered but then it fails when creating the new instance with the same IP Address, so please just adjust this part and one dynamic Public IP will be assigned.

    access_config {
      // Ephemeral public IP
    }