I enabled the Native blob soft delete feature in the Azure AI Search datasource (Azure Blob Storage kind). I enabled also soft delete in the ADLS. When I try to verify it, I see the file hasn't been deleted. After some additional research I found in the documentation that "Document keys for the documents in your index must be mapped to either be a blob property or blob metadata, such as "metadata_storage_path".
Ok, but when I try to map the metadata_storage_path to the document key I get an error: "Keys can only contain letters, digits, underscore (_), dash (-), or equal sign (=). " How can I map the metadata_storage_path to the key as written in the documentation?? Path always contains special characters.
my indexer:
{
"@odata.context": "*******.search.windows.net/$metadata#indexers/$entity",
"@odata.etag": "\"*****************\"",
"name": "tracking-changes-and-deletions-indexer",
"description": null,
"dataSourceName": "some",
"skillsetName": "experimental-skillset-test",
"targetIndexName": "tracking-changes-and-deletions-index",
"disabled": null,
"schedule": null,
"parameters": {
"batchSize": null,
"maxFailedItems": null,
"maxFailedItemsPerBatch": null,
"base64EncodeKeys": null,
"configuration": {
"dataToExtract": "contentAndMetadata",
"parsingMode": "text",
"imageAction": "none"
}
},
"fieldMappings": [
{
"sourceFieldName": "metadata_storage_path",
"targetFieldName": "title",
"mappingFunction": null
},
{
"sourceFieldName": "metadata_storage_last_modified",
"targetFieldName": "storage_last_modified",
"mappingFunction": null
}
],
"outputFieldMappings": [],
"cache": null,
"encryptionKey": null
}
and index:
{
"name": "tracking-changes-and-deletions-index",
"defaultScoringProfile": null,
"fields": [
{
"name": "id",
"type": "Edm.String",
"searchable": true,
"filterable": false,
"retrievable": true,
"stored": true,
"sortable": false,
"facetable": false,
"key": true,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": "keyword",
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"vectorEncoding": null,
"synonymMaps": []
},
{
"name": "content",
"type": "Edm.String",
"searchable": true,
"filterable": true,
"retrievable": true,
"stored": true,
"sortable": false,
"facetable": false,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"vectorEncoding": null,
"synonymMaps": []
},
{
"name": "title",
"type": "Edm.String",
"searchable": true,
"filterable": true,
"retrievable": true,
"stored": true,
"sortable": false,
"facetable": false,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": null,
"vectorSearchProfile": null,
"vectorEncoding": null,
"synonymMaps": []
},
{
"name": "embedding",
"type": "Collection(Edm.Single)",
"searchable": true,
"filterable": false,
"retrievable": true,
"stored": true,
"sortable": false,
"facetable": false,
"key": false,
"indexAnalyzer": null,
"searchAnalyzer": null,
"analyzer": null,
"normalizer": null,
"dimensions": 1536,
"vectorSearchProfile": "vector-profile-1717156292529",
"vectorEncoding": null,
"synonymMaps": []
}
],
"scoringProfiles": [],
"corsOptions": null,
"suggesters": [],
"analyzers": [],
"normalizers": [],
"tokenizers": [],
"tokenFilters": [],
"charFilters": [],
"encryptionKey": null,
"similarity": {
"@odata.type": "#Microsoft.Azure.Search.BM25Similarity",
"k1": null,
"b": null
},
"semantic": {
"defaultConfiguration": null,
"configurations": []
},
"vectorSearch": {
"algorithms": [
{
"name": "vector-config-1717156300344",
"kind": "hnsw",
"hnswParameters": {
"metric": "cosine",
"m": 4,
"efConstruction": 400,
"efSearch": 500
},
"exhaustiveKnnParameters": null
},
{
"name": "vector-config-1718463795492",
"kind": "exhaustiveKnn",
"hnswParameters": null,
"exhaustiveKnnParameters": {
"metric": "cosine"
}
}
],
"profiles": [
{
"name": "vector-profile-1717156292529",
"algorithm": "vector-config-1717156300344",
"vectorizer": "vectorizer-1717156312140",
"compression": null
}
],
"vectorizers": [
{
"name": "vectorizer-1717156312140",
"kind": "customWebApi",
"azureOpenAIParameters": null,
"customWebApiParameters": {
"httpMethod": "POST",
"uri": "***********************************,
"timeout": "PT3M50S",
"authResourceId": null,
"httpHeaders": {},
"authIdentity": null
},
"aiServicesVisionParameters": null,
"amlParameters": null
}
],
"compressions": []
}
}
I got the answer from azure support team, they say that I need explicitly map the ADLS metadata field to the key field with base64 function in the indexer, like: