databaseterraformaws-gluecreation

terraform database creation in athena


I am trying to create a database using terraform and this seems very complicated for a poor query...

Could you help me, please?

I have tried null_resource with local-exec and data "external" Python... I think I am looking the wrong way

ex which doesn't works in terraform 0.12

resource "null_resource" "create-endpoint" {
  provisioner "local-exec" {
  query = <<EOF
{
CREATE EXTERNAL TABLE `dashboard_loading_time`(
  `timestamp_iso` string, 
  `app_identification` struct<service:string,app_name:string,app_type:string,stage:string>, 
  `user` struct<api_gateway_key:struct<id:string,name:string>,mashery_key:struct<id:string,name:string>,employee:struct<id:string,name:string>>, 
  `action` struct<action_type:string,path:string>, 
  `result` struct<status:string,http_status:string,response:struct<response:string>>)
PARTITIONED BY ( 
  `year` int)
ROW FORMAT SERDE 
  'org.openx.data.jsonserde.JsonSerDe' 
STORED AS INPUTFORMAT 
  'org.apache.hadoop.mapred.TextInputFormat' 
OUTPUTFORMAT 
  'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
  's3://xxxxxxxxxxxxxxxxxxxxxxxxxxxxxxxx/dev'
}
EOF
    command = "aws athena start-query-execution --query-string "query""
  }
}

I would like to find the simplest way to do this using terraform.


Solution

  • If you wanna make it for athena, need to make glue resources.

    try below code with terraform.

    variable "service_name" {
      default = "demo-service"
    }
    
    variable "workspace" {
      default = "dev"
    }
    
    variable "columns" {
      default = {
        id         = "int"
        type       = "string"
        status     = "int"
        created_at = "timestamp"
      }
    }
    
    resource "aws_glue_catalog_database" "athena" {
      name = "${var.service_name}_db"
    }
    
    resource "aws_glue_catalog_table" "athena" {
      name          = "${var.service_name}_logs"
      database_name = "${aws_glue_catalog_database.athena.name}"
      table_type    = "EXTERNAL_TABLE"
    
      parameters = {
        EXTERNAL = "TRUE"
      }
    
      storage_descriptor {
        location      = "s3://${var.service_name}-${var.workspace}-data-pipeline/log/"
        input_format  = "org.apache.hadoop.mapred.TextInputFormat"
        output_format = "org.apache.hadoop.hive.ql.io.IgnoreKeyTextOutputFormat"
    
        ser_de_info {
          name                  = "jsonserde"
          serialization_library = "org.openx.data.jsonserde.JsonSerDe"
    
          parameters = {
            "serialization.format" = "1"
          }
        }
    
        dynamic "columns" {
          for_each = "${var.columns}"
    
          content {
            name = "${columns.key}"
            type = "${columns.value}"
          }
        }
      }
      partition_keys {
        name = "year"
        type = "string"
      }
      partition_keys {
        name = "month"
        type = "string"
      }
      partition_keys {
        name = "day"
        type = "string"
      }
      partition_keys {
        name = "hour"
        type = "string"
      }
    }
    

    refer to this repository : aws-serverless-data-pipeline-by-terraform