I'm trying to write an Opensearch query which will exclude any results which match a list of terms. Per what I can tell in Opensearch documentation, this sort of behavior should be supported. However, of the two queries I have below, only the one which doesn't use a list (working_query) works. Unfortunately, googling / searching stackoverflow for the error hasn't yielded any results that seem relevant. Thanks in advance!
working_query = {
"bool": {
"must": [
{
"match_phrase": {
"textContent": {
"query": "must_phrase",
"slop": 10,
}
}
},
{
"bool": {
"should": [
{"match_phrase": {"textContent": "should_phrase"}},
]
}
},
],
"must_not":[
{"match_phrase":{"folderIdentifier.identifierValue":'elem1'}}
],
}
}
broken_query = {
"bool": {
"must": [
{
"match_phrase": {
"textContent": {
"query": "must_phrase",
"slop": 10,
}
}
},
{
"bool": {
"should": [
{"match_phrase": {"textContent": "should_phrase"}},
]
}
},
],
"must_not":[
{"match_phrase":{"folderIdentifier.identifierValue":['elem1','elem2']}}
],
}
}
Error:
---------------------------------------------------------------------------
RequestError Traceback (most recent call last)
Cell In[56], line 25
1 broken_query = {
2 "bool": {
3 "must": [
(...)
23 }
24 }
---> 25 test=search_docs_full(broken_query)
Cell In[11], line 29, in search_docs_full(text_query)
25 if last_hit != 0:
26 query['search_after'] = [last_hit]
---> 29 results = searchwrapper.client.search(index=index_name,body=query, request_timeout=1200)
30 if len(results['hits']['hits']) != 0:
31 previous_hit = last_hit
File ~/.local/lib/python3.11/site-packages/opensearchpy/client/utils.py:181, in query_params.<locals>._wrapper.<locals>._wrapped(*args, **kwargs)
178 if v is not None:
179 params[p] = _escape(v)
--> 181 return func(*args, params=params, headers=headers, **kwargs)
File ~/.local/lib/python3.11/site-packages/opensearchpy/client/__init__.py:1742, in OpenSearch.search(self, body, index, params, headers)
1739 if "from_" in params:
1740 params["from"] = params.pop("from_")
-> 1742 return self.transport.perform_request(
1743 "POST",
1744 _make_path(index, "_search"),
1745 params=params,
1746 headers=headers,
1747 body=body,
1748 )
File ~/.local/lib/python3.11/site-packages/opensearchpy/transport.py:448, in Transport.perform_request(self, method, url, params, body, timeout, ignore, headers)
446 raise e
447 else:
--> 448 raise e
450 else:
451 # connection didn't fail, confirm its live status
452 self.connection_pool.mark_live(connection)
File ~/.local/lib/python3.11/site-packages/opensearchpy/transport.py:409, in Transport.perform_request(self, method, url, params, body, timeout, ignore, headers)
406 connection = self.get_connection()
408 try:
--> 409 status, headers_response, data = connection.perform_request(
410 method,
411 url,
412 params,
413 body,
414 headers=headers,
415 ignore=ignore,
416 timeout=timeout,
417 )
419 # Lowercase all the header names for consistency in accessing them.
420 headers_response = {
421 header.lower(): value for header, value in headers_response.items()
422 }
File ~/.local/lib/python3.11/site-packages/opensearchpy/connection/http_requests.py:232, in RequestsHttpConnection.perform_request(self, method, url, params, body, timeout, allow_redirects, ignore, headers)
219 if (
220 not (200 <= response.status_code < 300)
221 and response.status_code not in ignore
222 ):
223 self.log_request_fail(
224 method,
225 url,
(...)
230 raw_data,
231 )
--> 232 self._raise_error(
233 response.status_code,
234 raw_data,
235 response.headers.get("Content-Type"),
236 )
238 self.log_request_success(
239 method,
240 url,
(...)
245 duration,
246 )
248 return response.status_code, response.headers, raw_data
File ~/.local/lib/python3.11/site-packages/opensearchpy/connection/base.py:316, in Connection._raise_error(self, status_code, raw_data, content_type)
313 except (ValueError, TypeError) as err:
314 logger.warning("Undecodable raw error response from server: %s", err)
--> 316 raise HTTP_EXCEPTIONS.get(status_code, TransportError)(
317 status_code, error_message, additional_info
318 )
RequestError: RequestError(400, 'x_content_parse_exception', '[1:325] [bool] failed to parse field [must_not]')
match_phrase
dosen't support array
as values you need to change this
{"match_phrase":{"folderIdentifier.identifierValue":['elem1','elem2']}}
to this
"bool":{
"should":[
{"match_phrase":{"folderIdentifier.identifierValue":'elem1'}},
{"match_phrase":{"folderIdentifier.identifierValue":'elem2'}}]}