amazon-web-servicesaws-sdkaws-cliaws-glue

I am trying to get list of databases from AWS Glue using AWS Glue SDK for Java 1.X with limited resource access on policy but getting empty list


As mentioned in title, I am trying to fetch list of databases from AWS Glue.

I have plenty of databases listed in Glue. I am using a policy with limited resource access so that I can only see specific type of databases.

These are my policies with respective permission: 1.resource-policy

{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Sid": "VisualEditor0",
            "Effect": "Allow",
            "Action": [
                "glue:GetConnections",
                "glue:GetTables",
                "glue:GetTableVersions",
                "glue:GetDatabases",
                "glue:GetTable",
                "glue:GetDatabase",
                "glue:GetJob",
                "glue:StartJobRun",
                "glue:BatchStopJobRun",
                "glue:GetCrawler"
            ],
            "Resource": [
                "arn:aws:glue:*:248135293344:connection/*",
                "arn:aws:glue:*:248135293344:job/*",
                "arn:aws:glue:*:248135293344:database/glue*",
                "arn:aws:glue:*:248135293344:catalog",
                "arn:aws:glue:*:248135293344:table/glue*"
            ]
        }
    ]
}
  1. S3 policy
{
    "Version": "2012-10-17",
    "Statement": [
        {
            "Sid": "VisualEditor0",
            "Effect": "Allow",
            "Action": [
                "s3:ListBucket",
                "s3:GetBucketAcl"
            ],
            "Resource": "arn:aws:s3:::*"
        },
        {
            "Sid": "VisualEditor1",
            "Effect": "Allow",
            "Action": "s3:ListAllMyBuckets",
            "Resource": "*"
        }
    ]
}

When I do a query through AWS CLI using the following command:

aws glue get-databases --query "DatabaseList[*].Name"

I get the following output: Output of the listed database

But when I try to do the query using AWS SDK for Java I get no results. These are my code snippets and results:

package alation.awsglue.mde.extractor;

import alation.aws.auth.AWSAuthentication;
import alation.awsglue.Main;
import com.amazonaws.auth.AWSCredentialsProvider;
import com.amazonaws.regions.Regions;
import com.amazonaws.services.glue.AWSGlue;
import com.amazonaws.services.glue.AWSGlueClientBuilder;
import com.amazonaws.services.glue.model.AWSGlueException;
import com.amazonaws.services.glue.model.Database;
import com.amazonaws.services.glue.model.GetDatabasesRequest;
import com.amazonaws.services.glue.model.GetDatabasesResult;
import org.apache.log4j.PropertyConfigurator;

import java.util.List;

public class sampleMDE {
    public static void main(String[] args) {
        System.setProperty("com.amazonaws.sdk.enableDefaultMetrics", "true");
        PropertyConfigurator.configure(Main.class.getClassLoader().getResource("log4j.properties"));
        AWSGlue amazonGlueClient = null;
        try{
        // Initialize the Glue client
        AWSCredentialsProvider awsCredProvider =
            new AWSAuthentication(
                "<your_key>",
                "<your_id>")
                .getCredential();
        amazonGlueClient = AWSGlueClientBuilder.standard()
            .withCredentials(awsCredProvider)
            .withRegion(Regions.US_EAST_1).build();
    } catch (Exception e) {
        System.err.println("Failed to create Glue client: " + e.getMessage());
        e.printStackTrace();
        return;
    }
        try{

        // Create the request
        GetDatabasesRequest request = new GetDatabasesRequest();

        // Get the list of databases
        GetDatabasesResult response = amazonGlueClient.getDatabases(request);

        // Process the response
        List<Database> databases = response.getDatabaseList();
        if (databases.isEmpty()) {
            System.out.println("No databases found.");
        }
        for (Database database : databases) {
            System.out.println("Database name: " + database.getName());
        }
    } catch (AWSGlueException e) {
        System.err.println("Error occurred while retrieving databases: " + e.getErrorMessage());
        e.printStackTrace();
    }

    }
}

Result


Solution

  • I had a talk with AWS support team and got to know that at times the initial pages can be empty while fetching the databases. So need to opt for pagination in such and in general all conditions. Following code snippet worked for me:

    
    import com.amazonaws.auth.AWSCredentialsProvider;
    import com.amazonaws.regions.Regions;
    import com.amazonaws.services.glue.AWSGlue;
    import com.amazonaws.services.glue.AWSGlueClientBuilder;
    import com.amazonaws.services.glue.model.AWSGlueException;
    import com.amazonaws.services.glue.model.Database;
    import com.amazonaws.services.glue.model.GetDatabasesRequest;
    import com.amazonaws.services.glue.model.GetDatabasesResult;
    
    import java.util.ArrayList;
    import java.util.List;
    
    public class sampleMDE {
        public static void main(String[] args) {
            System.setProperty("com.amazonaws.sdk.enableDefaultMetrics", "true");
            AWSGlue amazonGlueClient = null;
            try{
            // Initialize the Glue client
            AWSCredentialsProvider awsCredProvider =
                new AWSAuthentication(
                    "<ID>",
                    "<SECRET>")
                    .getCredential();
            amazonGlueClient = AWSGlueClientBuilder.standard()
                .withCredentials(awsCredProvider)
                .withRegion(Regions.US_EAST_1).build();
            } catch (Exception e) {
                System.err.println("Failed to create Glue client: " + e.getMessage());
                e.printStackTrace();
                return;
            }
            try{
                // Process the response
                String nextToken = null;
                List<Database> databases = new ArrayList<>();
                do {
                    GetDatabasesRequest request = new GetDatabasesRequest()
                        .withNextToken(nextToken);
    
                    GetDatabasesResult result = amazonGlueClient.getDatabases(request);
                    databases.addAll(result.getDatabaseList());
    
                    nextToken = result.getNextToken();
                } while (nextToken != null);
                if (databases.isEmpty()) {
                    System.out.println("No databases found.");
                }
                for (Database database : databases) {
                    System.out.println("Database name: " + database.getName());
                }
            } catch (AWSGlueException e) {
                System.err.println("Error occurred while retrieving databases: " + e.getErrorMessage());
                e.printStackTrace();
            }
    
        }
    }