amazon-dynamodbaws-sdk-js-v3

@aws-sdk/client-dynamodb ScanCommand not retrieving all records in DynamoDB table


I have a Lambda service (type module) that is called from API Gateway, retrieving records from a DynamoDB table.

I'm using the ScanCommand as I want to read all records. When I look at the table in DynamoDb, the table has 1903 records. When I run the scan command, its only retrieving 1350 records.

The command within the Lambda function:


import { ScanCommand } from "@aws-sdk/client-dynamodb";
import { marshall, unmarshall } from "@aws-sdk/util-dynamodb";
import { dbClient } from "./db.js";

const tableName = "restaurants";

const getAllRestaurants = async () => {
  console.log("getAllRestaurants");
  try {
    const params = {
      TableName: tableName,
    };

    const { Items } = await dbClient.send(new ScanCommand(params));

    console.log(Items);
    return Items ? Items.map((item) => unmarshall(item)) : {};
  } catch (e) {
    console.error(e);
    throw e;
  }
};


export const handler = async function (event) {
  console.log("request:", JSON.stringify(event, undefined, 2));
  let body;

  try {
    switch (event.httpMethod) {
      case "GET":
        if (event.pathParameters !== null) {
          body = await getRestaurant(event);
        } else {
          body = await getAllRestaurants();
        }
        break;
      default:
        throw new Error(`Unsupported route: "${event.httpMethod}"`);
    }
    console.log(body);

    return {
      statusCode: 200,
      headers: {
        "Access-Control-Allow-Headers": "*",
        "Access-Control-Allow-Origin": "*",
      },
      body: JSON.stringify({
        message: `Successfully finished Restaurant operation: "${event.httpMethod}"`,
        body,
      }),
    };
  } catch (e) {
    console.error(e);
    return {
      statusCode: 500,
      body: JSON.stringify({
        message: "Failed to perform Restaurant operation.",
        errorMsg: e.message,
        errorStack: e.stack,
      }),
    };
  }
};
import { DynamoDBClient } from "@aws-sdk/client-dynamodb"
const REGION = "us-east-2"
const dbClient = new DynamoDBClient({ region: REGION })
export { dbClient }

I've removed some other functions (Create/Delete) from the code example. The function is working, no errors in the logs, but just missing data.

My package.json:

  "dependencies": {
    "@aws-sdk/client-dynamodb": "^3.352.0",
    "@aws-sdk/util-dynamodb": "^3.352.0"

The data, I can't see anything specific (special characters etc.), they look the same.

Here is a record I know is failing to be retrieved. I can see it within the console:


{
  "restaurantId": {
    "S": "5cb7449b-7a11-46f1-830b-82fc5b5a99a0"
  },
  "accessability": {
    "BOOL": true
  },
  "city": {
    "S": "Walnut Creek"
  },
  "coordinates": {
    "M": {
      "latitude": {
        "N": "37.90086769999999"
      },
      "longitude": {
        "N": "-122.0612685"
      }
    }
  },
  "cuisine": {
    "S": "Burmese"
  },
  "dateAdded": {
    "S": "Sat Jan 20 2024 21:25:11 GMT-0800"
  },
  "dateUpdated": {
    "S": "Thu Jul 04 2024 03:53:10 GMT+0000 (Coordinated Universal Time)"
  },
  "delivery": {
    "BOOL": true
  },
  "description": {
    "S": ""
  },
  "formattedAddress": {
    "S": "1616 N Main St, Walnut Creek, CA 94596, USA"
  },
  "formattedPhone": {
    "S": "(925) 261-8580"
  },
  "name": {
    "S": "Burma 2"
  },
  "openHours": {
    "L": [
      {
        "S": "Monday: 11:30 AM – 2:30 PM, 5:00 – 9:00 PM"
      },
      {
        "S": "Tuesday: 11:30 AM – 2:30 PM, 5:00 – 9:00 PM"
      },
      {
        "S": "Wednesday: 11:30 AM – 2:30 PM, 5:00 – 9:00 PM"
      },
      {
        "S": "Thursday: 11:30 AM – 2:30 PM, 5:00 – 9:00 PM"
      },
      {
        "S": "Friday: 11:30 AM – 2:30 PM, 5:00 – 10:00 PM"
      },
      {
        "S": "Saturday: 11:30 AM – 10:00 PM"
      },
      {
        "S": "Sunday: 11:30 AM – 9:00 PM"
      }
    ]
  },
  "overallScore": {
    "N": "89"
  },
  "priceGuide": {
    "N": "2"
  },
  "vegeterian": {
    "BOOL": true
  },
  "website": {
    "S": "https://www.burma2.com/"
  }
}

Any ideas why I'm missing 30%+ of the data?


Solution

  • DynamoDB paginates the results from Query/Scan operations. With pagination, the Query/Scan results are divided into "pages" of data that are 1 MB in size (or less). An application can process the first page of results, then the second page, and so on.

    https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/Query.Pagination.html

    /*
    This code uses a page size of 100 and will bring back all items in a table.
    It uses the "low-level" JS client, not the DocumentClient, thus the data is
    returned in DynamoDB's native JSON format. If you want plain JSON, you can
    either convert this to use the DocumentClient or use unmarshall util
    */
    
    const { DynamoDBClient, paginateScan } = require('@aws-sdk/client-dynamodb');
    
    const dynamodb = new DynamoDBClient({});
    
    async function fetchAll() {
        const config = {
            client: dynamodb,
            pageSize: 100
        };
        const input = {
            TableName: 'RetailDatabase'
        };
        const paginator = paginateScan(config, input);
        for await (const page of paginator) {
            page.Items.forEach(console.log);
        }
    }
    
    fetchAll()
        .then(() => console.log('done'))
        .catch(() => process.exit(1));
    

    https://github.com/aws-samples/aws-dynamodb-examples/blob/master/DynamoDB-SDK-Examples/node.js/WorkingWithScans/scan-fetch-all-pagination.js