amazon-web-servicesaws-lambdaamazon-ecsaws-cdkamazon-eks

Shift Lambda infrastructure to ECS


This is how I have defined all my python lambda functions in typescipt(masked all sensitive info):

import * as cdk from 'aws-cdk-lib';
import { Stack, StackProps } from 'aws-cdk-lib';
import { AuthorizationType, CfnAuthorizer, CfnMethod, LambdaIntegration, Resource, RestApi } from 'aws-cdk-lib/aws-apigateway';
import { Effect, IManagedPolicy, ManagedPolicy, PolicyStatement, Role, ServicePrincipal } from 'aws-cdk-lib/aws-iam';
import * as lambda from 'aws-cdk-lib/aws-lambda';
import { Duration } from 'aws-cdk-lib/core';

import { Rule, Schedule } from 'aws-cdk-lib/aws-events';
import { Code, DockerImageFunction, Function, ILayerVersion, LayerVersion, Runtime } from 'aws-cdk-lib/aws-lambda';
import * as eventsources from 'aws-cdk-lib/aws-lambda-event-sources';
import * as s3 from 'aws-cdk-lib/aws-s3';
import { Secret } from 'aws-cdk-lib/aws-secretsmanager';
import { Queue } from 'aws-cdk-lib/aws-sqs';
import { StringParameter } from 'aws-cdk-lib/aws-ssm';
import { Construct } from 'constructs';
import { AMA, APIPaths, EventBridgeRules, Handlers, Lambdas, Methods, Package, SSM_PARAM } from '../constants';
import { createQueueWithDLQ, createSQSEventSource } from '../sqsUtils';
import { createLambdaFunction, getCommonBucketName } from '../utils';
import { CognitoStack } from './cognitoStack';
import { LambdaStack } from './lambdaStack';
import { RDSStack } from './rdsStack';
import { S3BucketStack } from './s3BucketStack';
import { VPCStack } from './vpcStack';

const EMBEDDING_LOG_POLLING_INTERVAL = Duration.minutes(5);

export const ACTIVE_EMBEDDING_LOG_STACKS = [
    { accountId: '<MASKED_ACCOUNT_ID_1>', stageName: 'alpha' },
    { accountId: '<MASKED_ACCOUNT_ID_2>', stageName: 'ui' },
    { accountId: '<MASKED_ACCOUNT_ID_3>', stageName: 'ui2' },
    { accountId: '<MASKED_ACCOUNT_ID_4>', stageName: 'aryan' },
    { accountId: '<MASKED_ACCOUNT_ID_4>', stageName: 'ama' },
];

export const EMBEDDING_POLLING_LAMBDA_DURATION = Duration.seconds(30);

export class AmaStack extends Stack {
    public readonly AmaServiceAPI: RestApi;
    public readonly Amaauthorizer: CfnAuthorizer;

    // Resources
    public readonly ParentFunctionResource: Resource;
    public readonly RedirectionEngineResource: Resource;
    public readonly aiFormFillupResource: Resource;

    // ama
    public readonly amaServiceLambda: Function;
    public readonly getAmaChats: Function;
    public readonly feedbackLambda: Function;
    public readonly redirectionEngine: Function;
    public readonly embeddingLambda: Function;
    public readonly aiFormFillup: Function;
    public readonly aiFormUpdate: Function;
    public readonly aiFormUpdateQueue: Queue;
    public readonly aiFormUpdateDlqQueue: Queue;

    // prompt engine
    public readonly promptEngine: Function;

    // embedding log queue
    public readonly embeddingLogQueue: Queue;
    public readonly embeddingLogDlqQueue: Queue;

    // layer
    public readonly layer: ILayerVersion;
    embeddingPollingLambda: cdk.aws_lambda.Function;

    constructor(scope: Construct, id: string, props: AMAProps) {
        super(scope, id, props);

        this.AmaServiceAPI = new RestApi(this, `AmaServiceAPI-${props.stage}`);

        const baseLayerArn = StringParameter.valueForStringParameter(this, `${SSM_PARAM}-${props.stage}`);
        this.layer = LayerVersion.fromLayerVersionArn(this, `AmaLayerFromArn-${props.stage}`, baseLayerArn);

        const commonEnvironmentVariables = {
            COMPANY_RDS_SECRET: props.rdsStack.DatabaseCluster.secret!.secretValue.unsafeUnwrap(),
        };

        this.Amaauthorizer = new CfnAuthorizer(this, `APIGatewayAuthorizer-${props.stage}`, {
            name: `Ama-authorizer-${props.stage}`,
            identitySource: 'method.request.header.Authorization',
            providerArns: [props.cognitoStack.cognitoPool.userPoolArn],
            restApiId: this.AmaServiceAPI.restApiId,
            type: AuthorizationType.COGNITO,
        });

        this.ParentFunctionResource = this.AmaServiceAPI.root.addResource(APIPaths.AMA);
        this.RedirectionEngineResource = this.AmaServiceAPI.root.addResource(APIPaths.REDIRECTION_ENGINE);

        const openAISecret = Secret.fromSecretNameV2(this, 'OpenAISecret', 'Open-AI-API-Key');
        const openAIKey = openAISecret.secretValueFromJson('OPENAI_API_KEY');
        const AMAFeedbackResource = this.ParentFunctionResource.addResource(APIPaths.AMA_FEEDBACK);

        const managedPolicies = [
            ManagedPolicy.fromAwsManagedPolicyName('AWSLambdaExecute'),
            ManagedPolicy.fromAwsManagedPolicyName('AWSLambda_FullAccess'),
            ManagedPolicy.fromAwsManagedPolicyName('CloudWatchLambdaInsightsExecutionRolePolicy'),
            ManagedPolicy.fromAwsManagedPolicyName('AmazonBedrockFullAccess'),
            ManagedPolicy.fromAwsManagedPolicyName('AmazonS3FullAccess'),
        ];

        const embeddingQueueAndDlqQueue = createQueueWithDLQ(this, 'EmbeddingLogQueue', props.stage, {
            visibilityTimeout: Duration.seconds(120),
            maxReceiveCount: 3,
        });

        this.embeddingLogQueue = embeddingQueueAndDlqQueue.queue;
        this.embeddingLogDlqQueue = embeddingQueueAndDlqQueue.dlqQueue;

        this.embeddingPollingLambda = createLambdaFunction(
            this,
            Lambdas.EMBEDDING_LOG_POLLING,
            Package.BACKEND,
            Handlers.EMBEDDING_LOG_POLLING,
            props.stage,
            {
                roleName: `${Lambdas.EMBEDDING_LOG_POLLING}-Role`,
                managedPolicies: managedPolicies,
                resources: [this.embeddingLogQueue.queueArn],
                actions: ['sqs:SendMessage'],
            },
            {
                ...commonEnvironmentVariables,
                EMBEDDING_LOG_QUEUE: this.embeddingLogQueue.queueUrl,
            },
            EMBEDDING_POLLING_LAMBDA_DURATION,
            [this.layer],
            props.vpcStack.vpc,
        );

        const rule = new Rule(this, `EventBridge-${EventBridgeRules.AMA_LOG_POLLING}`, {
            description: `Rule to trigger embedding polling lambda every ${EMBEDDING_LOG_POLLING_INTERVAL} minutes`,
            schedule: Schedule.rate(EMBEDDING_LOG_POLLING_INTERVAL),
        });

        if (ACTIVE_EMBEDDING_LOG_STACKS.find((stack) => stack.accountId === props.accountId && stack.stageName === props.stage)) {
            rule.addTarget(new cdk.aws_events_targets.LambdaFunction(this.embeddingPollingLambda));
        }

        const aiFormUpdateQueueAndDldQueue = createQueueWithDLQ(this, 'AiFormUpdateQueue', props.stage, {
            visibilityTimeout: cdk.Duration.seconds(120),
            maxReceiveCount: 3,
        });
        this.aiFormUpdateQueue = aiFormUpdateQueueAndDldQueue.queue;
        this.aiFormUpdateDlqQueue = aiFormUpdateQueueAndDldQueue.dlqQueue;

        this.amaServiceLambda = this.createDockerLambdaFunction(
            Lambdas.AMA_LAMBDA,
            Handlers.AMA_SERVICE,
            props.stage,
            {
                roleName: `${Lambdas.AMA_LAMBDA}-${props.stage}-Role`,
                managedPolicies: managedPolicies,
                actions: ['s3:GetObject', 's3:GetObjectTagging', 's3:ListBucket'],
                resources: [props.lambdaStack.replicateUserProfile.functionArn],
            },
            {
                BUCKET_PREFIX: AMA.BUCKET_PREFIX,
                MODEL: AMA.MODEL,
                EMBEDDING: AMA.EMBEDDING,
                LANGCHAIN_TRACING_V2: 'true',
                LANGCHAIN_ENDPOINT: props.langchainEndpoint,
                LANGCHAIN_API_KEY: props.langchainApiKey,
                LANGCHAIN_PROJECT: `AMA-${props.stage}`,
                STAGE_NAME: props.stage,
                BUCKET_PREFIX: getCommonBucketName(props.accountId, props.stage),
                LLM_TOP_P: '0.2',
                OPENAI_API_KEY: openAIKey.unsafeUnwrap(),
                LLM_TEMPERATURE: '0.2',
                RETRIEVER_K: '3',
                ...commonEnvironmentVariables,
            },
        );

        openAISecret.grantRead(this.amaServiceLambda);

        this.promptEngine = this.createDockerLambdaFunction(
            Lambdas.PROMPT_ENGINE,
            Handlers.PROMPT_ENGINE,
            props.stage,
            {
                roleName: `${Lambdas.PROMPT_ENGINE}-${props.stage}-Role`,
                managedPolicies: managedPolicies,
                actions: ['s3:GetObject', 's3:GetObjectTagging', 's3:ListBucket'],
                resources: [props.lambdaStack.replicateUserProfile.functionArn],
            },
            {
                MODEL: AMA.MODEL,
                EMBEDDING: AMA.EMBEDDING,
            },
        );

        this.feedbackLambda = this.createDockerLambdaFunction(
            Lambdas.FEEDBACK_LAMBDA,
            Handlers.AMA_FEEDBACK,
            props.stage,
            {
                roleName: `${Lambdas.FEEDBACK_LAMBDA}-${props.stage}-Role`,
                managedPolicies: managedPolicies,
                actions: ['s3:GetObject', 's3:GetObjectTagging', 's3:ListBucket'],
                resources: [props.lambdaStack.replicateUserProfile.functionArn],
            },
            {
                BUCKET_PREFIX: AMA.BUCKET_PREFIX,
                MODEL: AMA.MODEL,
                LANGCHAIN_ENDPOINT: '<MASKED_LANGCHAIN_ENDPOINT>',
                LANGCHAIN_API_KEY: '<MASKED_LANGCHAIN_API_KEY>',
            },
        );

        this.configureMethodOnResource(AMAFeedbackResource, Methods.POST, new LambdaIntegration(this.feedbackLambda));

        this.redirectionEngine = this.createDockerLambdaFunction(
            Lambdas.REDIRECTION_ENGINE,
            Handlers.REDIRECTION_ENGINE,
            props.stage,
            {
                roleName: `${Lambdas.REDIRECTION_ENGINE}-${props.stage}-Role`,
                managedPolicies: managedPolicies,
                actions: ['s3:GetObject', 's3:GetObjectTagging', 's3:ListBucket'],
                resources: [
                    props.lambdaStack.replicateUserProfile.functionArn,
                    this.amaServiceLambda.functionArn,
                    this.promptEngine.functionArn,
                ],
            },
            {
                AMA_LAMBDA: this.amaServiceLambda.functionName,
                PROMPT_ENGINE: this.promptEngine.functionName,
                BUCKET_PREFIX: AMA.BUCKET_PREFIX,
                MODEL: AMA.MODEL,
                EMBEDDING: AMA.EMBEDDING,
                OPENAI_API_KEY: openAIKey.unsafeUnwrap(),
                LANGCHAIN_TRACING_V2: 'true',
                LANGCHAIN_ENDPOINT: props.langchainEndpoint,
                LANGCHAIN_API_KEY: props.langchainApiKey,
                LANGCHAIN_PROJECT: `RedirectionEngine-${props.stage}`,
                STAGE_NAME: props.stage,
            },
        );

        this.embeddingLambda = this.createDockerLambdaFunction(
            Lambdas.EMBEDDING_ENGINE,
            Handlers.EMBEDDING_ENGINE,
            props.stage,
            {
                roleName: `${Lambdas.EMBEDDING_ENGINE}-${props.stage}-Role`,
                managedPolicies: managedPolicies,
                actions: ['s3:GetObject', 's3:GetObjectTagging', 's3:ListBucket'],
                resources: [props.lambdaStack.replicateUserProfile.functionArn],
            },
            {
                BUCKET_PREFIX: AMA.BUCKET_PREFIX,
                MODEL: AMA.MODEL,
                EMBEDDING: AMA.EMBEDDING,
                LANGCHAIN_TRACING_V2: 'true',
                LANGCHAIN_ENDPOINT: props.langchainEndpoint,
                LANGCHAIN_API_KEY: props.langchainApiKey,
                LANGCHAIN_PROJECT: `AMA-${props.stage}`,
                STAGE_NAME: props.stage,
                BUCKET_PREFIX: getCommonBucketName(props.accountId, props.stage),
                PINECONE_API_KEY: '<MASKED_PINECONE_API_KEY>',
                LLM_TOP_P: '0.3',
                OPENAI_API_KEY: openAIKey.unsafeUnwrap(),
                LLM_TEMPERATURE: '0.3',
                RETRIEVER_K: '3',
                ...commonEnvironmentVariables,
            },
        );


        // ... [Implementation details for remaining methods]
    }

    // ... [Rest of the class implementation]
}

export interface AMAProps extends StackProps {
    readonly stage: string;
    readonly region: string;
    readonly accountId: string;
    readonly langchainEndpoint: string;
    readonly langchainApiKey: string;
    readonly lambdaStack: LambdaStack;
    readonly cognitoStack: CognitoStack;
    readonly rdsStack: RDSStack;
    readonly vpcStack: VPCStack;
    readonly s3BucketStack: S3BucketStack;
}

export interface IAMRoleProps {
    readonly roleName: string;
    readonly managedPolicies: IManagedPolicy[];
    readonly actions: string[];
    readonly resources: string[];
}

I tried lambda as a service but I am getting troubles due to coldstarts, want to shift to EKS/ECS. Can not shift all to web server, how can I do it via the CDK with minimal code changes.

I want to shift the infra to EKS/ECS , Also suggest the differences it it going to take, I have worked mostly with lambdas and have not ECS/EKS.


Solution

  • When you enable Provisioned Concurrency for a function, the Lambda service will initialize the requested number of execution environments so they can be ready to respond to invocations.

    Ref: https://aws.amazon.com/blogs/aws/new-provisioned-concurrency-for-lambda-functions/

    First of all your Lambda cold start problem you should configure Provisioned concurrency.

    This is the number of pre-initialized execution environments allocated to your function. These execution environments are ready to respond immediately to incoming function requests. Provisioned concurrency is useful for reducing cold start latencies for functions. Configuring provisioned concurrency incurs additional charges to your AWS account.

    If you want to increase performance of the Lambda function then you need to increase its memory.

    want to shift to EKS/ECS. Can not shift all to web server, how can I do it via the CDK with minimal code changes.

    Whether to choose EKS or ECS will depend on your application, your expertise of managing them, control and pricing and portability(vendor lock-in).

    But seeing your question I think ECS would be better choice because:

    If you’re new to container orchestration and deployment, ECS is a good place to start because it is less expensive, and requires little or no expertise in managing Kubernetes clusters. AWS ECS is also a good choice if you are familiar with the AWS platform, because it offers tight integration with Amazon services. ECS you don’t have to pay for a control plane, meaning that it can potentially be cheaper.

    If you choose EKS then:

    If you are looking for multi-cloud capabilities and portability of containerized workloads, EKS is the preferred choice because it doesn’t lock you into the Amazon cloud. EKS also provides additional features, more customization options, and fine-grained control over containerized applications. Keep in mind there is an extra charge per cluster compared to ECS because of the control plane cost AWS charges.

    Ofcourse EKS has also become very easy over the years and recently they launched EKS Auto which automate cluster infrastructure. But If you are come into EKS then you need to be aware of Deployments, monitoring, Scaling, networking, Security.

    SO my suggestions would be to use ECS first but keep in mind the vendor lock in point and then if you want to have more customization and control go for EKS or Kubernetes

    how can I do it via the CDK with minimal code changes.

    This is very subjective but a good starting point would be here (from aws docs)

    Also suggest the differences it it going to take,

    Check this too for sample: https://conermurphy.com/blog/aws-lambda-to-ecs-fargate-migration-guide