postgresqlamazon-rdsaws-dms

AWS DMS - PostgreSQL invalid input value for enum and daterange


I'm performing a logical replication from our PostgreSQL server to AWS RDS, using the database migration tool (DMS). Most tables were migrated successfully, but I'm having troubles with a couple of the tables.

When I run the task to load the table, I get this error on the logs of RDS:

ERROR:  invalid input value for enum property_type: ""
CONTEXT:  unnamed portal parameter $17 = ''
STATEMENT:  INSERT INTO "public"."loans"("id","account_id","loan_number","created_at","updated_at","folio","mers_min","mers_status","mers_status_date","application_number","servicer","servicer_loan_number","status","primary_borrower_last_name","primary_borrower_first_name","property_number_of_units","property_type","property_address_line1","property_address_line2","property_city","property_state","property_zip","property_county_code","property_country_code","property_census_tract_code","property_parcel_id","mortgage_type","qm_loan","purpose","ltv","amortization_type","amount","interest_rate","term","lien_priority","application_date","approval_date","rejected_date","closing_date","funding_date","purchase_date","source","officer","processor","underwriter","appraiser","property_usage","fha_case_number","var_payload","approval_type","approval_message","housing_expense_ratio","total_debt_expense_ratio","subordinate_financing_amount","combined_ltv","property_appraised_value","property_purchase_price","property_appraised_date","property_year_built","credit_score","au_type","au_recommendation","lender_product","heloc_indicator","reverse_indicator","property_pud_indicator","closer","additional_financing_amount","rejected_reason","mortgage_insurance_certificate_number","mortgage_insurance_coverage_amount","mortgage_insurance_premium","day_one_certainty","first_payment_date","maturity_date","principal_and_interest_payment_amount","is_portfolio","financing_concessions_amount","sales_concessions_amount","transaction_costs_amount","is_investment_quality","application_received_date","lender_program","refinance_cash_out_type") values ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14,$15,$16,$17,$18,$19,$20,$21,$22,$23,$24,$25,$26,$27,$28,$29,$30,$31,$32,$33,$34,$35,$36,$37,$38,$39,$40,$41,$42,$43,$44,$45,$46,$47,$48,$49,$50,$51,$52,$53,$54,$55,$56,$57,$58,$59,$60,$61,$62,$63,$64,$65,$66,$67,$68,$69,$70,$71,$72,$73,$74,$75,$76,$77,$78,$79,$80,$81,$82,$83,$84)

The type of column property_type is a nullable enum that takes as values Condominium, Cooperative, ManufacturedHome, SingleFamily, Townhouse and TwoToFourFamily.

I'm experiencing a similar issue with another table from the same database, where I get:

ERROR:  malformed range literal: ""
DETAIL:  Missing left parenthesis or bracket.
CONTEXT:  unnamed portal parameter $3 = ''
STATEMENT:  INSERT INTO "public"."selections_sampling_data"("id","sow_id","period","loan_number","field_data","selected_on","substituted_on","substitution_for_id","received_for_review_on","created_at","updated_at","selected_for","selection_reason","selections_sampling_strategy_id") values ($1,$2,$3,$4,$5,$6,$7,$8,$9,$10,$11,$12,$13,$14)

This one doesn't explicitly tell me which column generated the error, but parameter $3 should correspond with the period column, which is a daterange type.

In both cases, the DMS task seems to be querying for an empty string as value, which is not allowed by those data types. I don't know if this is an error with my schema, or how I configured the DMS task.

[Update]

My task configuration is the following:

{
    "StreamBufferSettings": {
        "StreamBufferCount": 3,
        "CtrlStreamBufferSizeInMB": 5,
        "StreamBufferSizeInMB": 8
    },
    "ErrorBehavior": {
        "FailOnNoTablesCaptured": true,
        "ApplyErrorUpdatePolicy": "LOG_ERROR",
        "FailOnTransactionConsistencyBreached": false,
        "RecoverableErrorThrottlingMax": 1800,
        "DataErrorEscalationPolicy": "SUSPEND_TABLE",
        "ApplyErrorEscalationCount": 0,
        "RecoverableErrorStopRetryAfterThrottlingMax": true,
        "RecoverableErrorThrottling": true,
        "ApplyErrorFailOnTruncationDdl": false,
        "DataTruncationErrorPolicy": "LOG_ERROR",
        "ApplyErrorInsertPolicy": "LOG_ERROR",
        "EventErrorPolicy": "IGNORE",
        "ApplyErrorEscalationPolicy": "LOG_ERROR",
        "RecoverableErrorCount": -1,
        "DataErrorEscalationCount": 0,
        "TableErrorEscalationPolicy": "STOP_TASK",
        "RecoverableErrorInterval": 5,
        "ApplyErrorDeletePolicy": "IGNORE_RECORD",
        "TableErrorEscalationCount": 0,
        "FullLoadIgnoreConflicts": true,
        "DataErrorPolicy": "LOG_ERROR",
        "TableErrorPolicy": "SUSPEND_TABLE"
    },
    "ValidationSettings": {
        "ValidationPartialLobSize": 0,
        "PartitionSize": 10000,
        "RecordFailureDelayLimitInMinutes": 0,
        "SkipLobColumns": false,
        "FailureMaxCount": 10000,
        "HandleCollationDiff": false,
        "ValidationQueryCdcDelaySeconds": 0,
        "ValidationMode": "ROW_LEVEL",
        "TableFailureMaxCount": 1000,
        "RecordFailureDelayInMinutes": 5,
        "MaxKeyColumnSize": 8096,
        "EnableValidation": true,
        "ThreadCount": 5,
        "RecordSuspendDelayInMinutes": 30,
        "ValidationOnly": false
    },
    "TTSettings": {
        "TTS3Settings": null,
        "TTRecordSettings": null,
        "EnableTT": false
    },
    "FullLoadSettings": {
        "CommitRate": 1000,
        "StopTaskCachedChangesApplied": false,
        "StopTaskCachedChangesNotApplied": false,
        "MaxFullLoadSubTasks": 2,
        "TransactionConsistencyTimeout": 600,
        "CreatePkAfterFullLoad": false,
        "TargetTablePrepMode": "DO_NOTHING"
    },
    "TargetMetadata": {
        "ParallelApplyBufferSize": 0,
        "ParallelApplyQueuesPerThread": 0,
        "ParallelApplyThreads": 0,
        "TargetSchema": "",
        "InlineLobMaxSize": 0,
        "ParallelLoadQueuesPerThread": 0,
        "SupportLobs": true,
        "LobChunkSize": 64,
        "TaskRecoveryTableEnabled": false,
        "ParallelLoadThreads": 0,
        "LobMaxSize": 0,
        "BatchApplyEnabled": true,
        "FullLobMode": true,
        "LimitedSizeLobMode": false,
        "LoadMaxFileSize": 0,
        "ParallelLoadBufferSize": 0
    },
    "BeforeImageSettings": null,
    "ControlTablesSettings": {
        "historyTimeslotInMinutes": 5,
        "HistoryTimeslotInMinutes": 5,
        "StatusTableEnabled": false,
        "SuspendedTablesTableEnabled": false,
        "HistoryTableEnabled": false,
        "ControlSchema": "",
        "FullLoadExceptionTableEnabled": false
    },
    "LoopbackPreventionSettings": null,
    "CharacterSetSettings": null,
    "FailTaskWhenCleanTaskResourceFailed": false,
    "ChangeProcessingTuning": {
        "StatementCacheSize": 50,
        "CommitTimeout": 1,
        "BatchApplyPreserveTransaction": true,
        "BatchApplyTimeoutMin": 1,
        "BatchSplitSize": 0,
        "BatchApplyTimeoutMax": 30,
        "MinTransactionSize": 1000,
        "MemoryKeepTime": 60,
        "BatchApplyMemoryLimit": 500,
        "MemoryLimitTotal": 1024
    },
    "ChangeProcessingDdlHandlingPolicy": {
        "HandleSourceTableDropped": true,
        "HandleSourceTableTruncated": true,
        "HandleSourceTableAltered": true
    },
    "PostProcessingRules": null
}

Solution

  • So, after digging through AWS documentation and contacting support, I learned that enum data types don't migrate using DMS. The task will fail no matter what.

    If your database has enum columns, you need to find another solution, i.e. barman and run it on an EC2 instance.