I use rdflib
python library to model a graph of contacts, and perform sparql
queries to retrieve who knows who. This works fine when people as added as URIRef
, but not when using BNode
.
The example graph can be represented as follow:
bob - knows -> linda
alice - knows -> linda
tom - knows -> linda
knows -> bob
Only Tom knows Bob, and no one knows Tom.
I perform the following 2 queries:
URIRef
, it works as expected. However, when Tom is added as a BNode
, this query returns 3 names!
use_blank_node = True # switch to see the undesired behavior happens only with blank node
pred_knows = URIRef("http://example.org/knows")
pred_named = URIRef("http://example.org/named")
def create_graph() -> Graph:
graph = Graph()
bob = URIRef("http://example.org/people/Bob")
linda = BNode() # a GUID is generated
alice = BNode()
tom = BNode() if use_blank_node else URIRef("http://example.org/people/Tom")
print(f"{str(tom)=}")
remy = BNode()
graph.add((bob, pred_named, Literal("Bob")))
graph.add((alice, pred_named, Literal("Alice")))
graph.add((tom, pred_named, Literal("Tom")))
graph.add((linda, pred_named, Literal("Linda")))
graph.add((remy, pred_named, Literal("Remy")))
graph.add((bob, pred_knows, linda))
graph.add((alice, pred_knows, linda))
graph.add((tom, pred_knows, linda))
graph.add((tom, pred_knows, bob))
return graph
find_tom_who_knows_bob_query = f"""SELECT DISTINCT ?knowsbob ?nameofwhoknowsbob
WHERE
{{ ?knowsbob <{pred_knows}> <http://example.org/people/Bob> ;
<{pred_named}> ?nameofwhoknowsbob .
}}"""
def find_who_know_tom(tom_id) -> str:
tom_query = f"_:{tom_id}" if type(tom_id) is BNode else f"<{tom_id}>"
return f"""SELECT DISTINCT ?nameOfWhoKnowsTom
WHERE
{{ ?iriOfWhoKnowsTom <{pred_knows}> {tom_query} ;
<{pred_named}> ?nameOfWhoKnowsTom}}"""
def main():
graph = create_graph()
print("=" * 60, "\n", graph.serialize(), "\n", "=" * 60)
result = list(graph.query(find_tom_who_knows_bob_query))
assert len(result) == 1 and len(result[0]) == 2
tom_id = result[0][0]
print(f"{str(tom_id)=}")
assert (type(tom_id) == BNode and use_blank_node) or (type(tom_id) == URIRef and use_blank_node is False)
assert str(result[0][1]) == "Tom"
query = find_who_know_tom(tom_id)
print(query)
result = list(graph.query(query))
print(
"They know Tom:", ", ".join([str(r[0]) for r in result])
) # why is it not empty when use_blank_node = True
# prints: "They know Tom: Bob, Alice, Tom"
if __name__ == "__main__":
main()
My question: how to correctly use sparql so that the query also works with blank node ?
Blank nodes are similar to free variables. From SPARQL 1.1 Query Language:
An application writer should not expect blank node labels in a query to refer to a particular blank node in the data.
Your second query, instead of
SELECT ?nameOfWhoKnowsTom WHERE {
?iriOfWhoKnowsTom ex:knows _:N6fb3b031995c43cfbf3e257ec0c0eac0 ;
ex:named ?nameOfWhoKnowsTom .
}
should be like this one:
SELECT ?nameOfWhoKnowsTom WHERE {
?iriOfWhoKnowsTom ex:knows / ex:named "Tom" ;
ex:named ?nameOfWhoKnowsTom .
}
See also the blank-nodes tag info.