Using Microsoft.Azure.Cosmos 3.3 we are inserting a message into our Azure Cosmos NoSQL database in batches:
foreach (var item in listOfItems)
{
var task = container.CreateItemAsync(item, new PartitionKey(item.TimestampDate));
tasks.Add(task
.ContinueWith(t =>
{
if (t.Status == TaskStatus.RanToCompletion)
{
cost += t.Result.RequestCharge;
}
}));
}
await Task.WhenAll(tasks);
}
FOr testing we ran 1 Item and 1000 items. In each case our request charge is close to 30 RU to insert 1 4k item, e.g.:
[
{
"MessageContent": {
"StationID": "12349902",
"GenerationDeltaTime": "37919",
"StationType": "5",
"ReferencePosition": {
"Position": {
"latitude": 481687966,
"longitude": 163300265,
"quadtree": "020220220002002200"
},
"Confidence": {
"SemiMajorConf": 600,
"SemiMinorConf": 300,
"SemiMajorOrient": 0
}
},
"Heading": {
"Value": 1069,
"Confidence": "28"
},
"Speed": {
"Value": 833,
"Confidence": "50"
},
"LongitudinalAcceleration": {
"Value": 161,
"Confidence": "\n 102\n "
},
"Curvature": {
"Value": 1023,
"Confidence": "unavailable"
},
"YawRate": {
"Value": 32767,
"Confidence": "unavailable"
},
"VehicleRole": "",
"DriveDirection": "unavailable",
"LogDeviceType": null,
"FixedStationID": null,
"MessageAction": null,
"Time": null,
"LocationGps": null,
"LocationQuadTree": null,
"CommunicationChannel": null,
"MessageType": "CAM"
},
"id": "550fa5b4-f433-46d5-acc5-4d1f0a623916",
"DeviceType": "Central",
"TimestampDate": "04-07-2023",
"header": {
"version": {
"major": 1,
"minor": 1
},
"name": "sc3-management",
"timestamp": "2023-07-04T09:44:15.066795Z",
"uuid": "e9afff0b-5508-4ab7-9f46-a607e42f267d"
},
"message": {
"uuid": "550fa5b4-f433-46d5-acc5-4d1f0a623916",
"timestamp": "2023-07-04T09:44:16.312795Z",
"direction": "Arx",
"rx_device": "rsu001",
"rx_location": {
"latitude": 48.171516,
"longitude": 16.3321347,
"quadtree": "120230113201332120"
},
"messageid": 2,
"protocolversion": 2,
"security": "Unsigned",
"tx_device": "12349902",
"tx_location": {
"latitude": 48.1687966,
"longitude": 16.3300265,
"quadtree": "120230113201332213"
},
*"hash": "XRa+ywxEQKMM2weQN7wPNK6q8rTrdQqdE85LXdM6aq0=",
"message": [
{
"encoding": "Zxer",
"data": "eJyVVlFvmzAQfu+vQHnPMNAQkFyqqlm1SotWVVn27MKVeHJMZiBb/v0MpmAcE6Vv8Xff5/vucj6B7//tmXMEUdKC3828L2jmAE+LjPL8bvZz8zSPZvfJDX58WCc3joN3QDIQzU95OIiiKtKCbZU88bFrQoq4h7IkOTyvGspwUMGyIpWkyrPnB7dxjCRnwJqk7pAVp2Tf6XLgIFraClhFNnQPSbCMvRi7tpASSfkLEWQPlTSoMIm+kZKmjwWvCOUf1eneNqcDJIveVnscSALeQcimwUtR0qqvuosyKanqDJLbyAujZRyG2O2xEbHguUK9MAgQ8kOZcQB16qFLJC2/06xJ/ZUxeihHrMY+7Oma/C7EQExChGQhloBFSvmIEfRSIzCR9YegwFXLEj2pjutluVfVhQk7754GbwmrIfGjqHE7Bu0CrZAGmyOE5gvkJoPaXuoQ14bBnZwG7NrHDO9ovnsS8KeWqpNtClvdFnY0ZdDHv+mqka3mtcjna1TboaoRHgpj9ax6yMrWCvejXjDVDktiXB4AMnNAGkxljYJADsZwthC1bAvUkaccnGXDmaBHWFEBaftf4JqTI6GMvDFo/mEjrCuPquHfgefVzjA2in201JelWAIXlEMZzzyj6dmLaBW82AjpGMSLgFK+G9e40f3UlQbdVvEvmslA6PdUBdh2FeWEPaQpsG7dGtamaF3HQm9YcHbKdddNbSLHkX+K0azP3THNH7HSWhxJVQvTcI/rI2KAEwp9KRkza+OMPFvtaFcTltasrWJdZJfuN4j6bSfy95VUpv0OVZUF/jJcYneEWekXaj1njCo9c9Et2auWpVxYFzav7IP5rdBC8mPIbb6G/gNOGssB"
}
],
"rx_device_type": "Rsu",
"test_message": false
},
"MessageType": "CAM",
"_rid": "a5ZuAKPbo5IBAAAAAAAAAA==",
"_self": "dbs/a5ZuAA==/colls/a5ZuAKPbo5I=/docs/a5ZuAKPbo5IBAAAAAAAAAA==/",
"_etag": "\"8000178a-0000-0c00-0000-64a3eaf00000\"",
"_attachments": "attachments/",
"_ts": 1688464112
}
]
Note we do have a partition that chnages every day and will have about 8 million messages in each day.
The same RU costs are visible through portal for 1000 messages we get about 30RU each.
We have added an index that excludes the inner hash/message and have also tried removing the message altogether, we can never get below about 28RU.
If we query this object via portal we get 2.7kb and it costs 2.87RU.
I've looked here to estimate cost and get 7RUs for a 4kb insert: https://cosmos.azure.com/capacitycalculator/
So why is the Insert RU cost so high?
You need to verify and optimize your Indexing Policy.
The calculator uses a Indexing Off by default. But your container probably has Indexing Automatic/Consistent.
If you set the calculator with Indexing Automatic and use your document as example (load the JSON) or use the number of properties (72), you get ~35RU which is similar to what you are getting on your operations:
If the Indexing is Off (default), then the RU is 7:
Make sure to adjust your Indexing Policy on your Container to fit your query and operational needs.