I'm relatively new-ish to programming in general (business analytics student turned data analyst) and I'm testing a python script to iterate over csv rows and construct a cypher query for each row to load into neo4j -
import pandas as pd
from neo4j import GraphDatabase
pd.set_option('display.max_colwidth', -1)
# neo4j credentials
uri= "bolt://localhost:7687"
userName= "neo4j"
password= "password"
df = pd.read_csv('C://Users/ABC/Documents/Test/Test/lineage_stored_procedure_dedup.csv',
sep=',', index_col=None, header=0,usecols=[0,1,2,3,4,5])
df.columns.str.replace(' ', '')
graphDB_Driver = GraphDatabase.driver(uri, auth=(userName, password))
with graphDB_Driver.session() as graphDB_Session:
for row in df.iterrows():
cq = 'merge (p:Program{programName:"'+df['Parent_Procedure']+'"}) set p.type = "'+df['Parent_Object_Type']+'"'
res = graphDB_Session.run(cq)
graphDB_Driver.close()
I get the following error -
Traceback (most recent call last):
File "<ipython-input-91-01ba397763e3>", line 1, in <module>
runfile('C:/Users/ABC/Documents/Test/Test/StoredProcLoadScript.py', wdir='C:/Users/ABC/Documents/Test/Test')
File "C:\Users\ABC\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 827, in runfile
execfile(filename, namespace)
File "C:\Users\ABC\Anaconda3\lib\site-packages\spyder_kernels\customize\spydercustomize.py", line 110, in execfile
exec(compile(f.read(), filename, 'exec'), namespace)
File "C:/Users/ABC/Documents/Test/Test/StoredProcLoadScript.py", line 35, in <module>
res = graphDB_Session.run(cq)
File "C:\Users\ABC\Anaconda3\lib\site-packages\neo4j\__init__.py", line 429, in run
if not statement:
File "C:\Users\ABC\Anaconda3\lib\site-packages\pandas\core\generic.py", line 1555, in __nonzero__
self.__class__.__name__
ValueError: The truth value of a Series is ambiguous. Use a.empty, a.bool(), a.item(), a.any() or a.all().
I understand that the error means I did not use the correct bitwise operators as per pandas documentation. But I don't understand where did I even need to use it in the code to begin with? Appreciate any and all help. Thank you.
#Connect to the neo4j database server
graphDB_Driver = GraphDatabase.driver(uri, auth=(userName, password))
#CREATE NODES (:Program{Parent_Procedure}) set property 'type' = Parent_Object_Type
with graphDB_Driver.session() as graphDB_Session:
for index, row in df.iterrows():
cq1 = 'merge (p:Program{programName:"'+row["Parent_Procedure"]+'"}) set p.type = "'+row["Parent_Object_Type"]+'"'
#Execute the Cypher query
res1 = graphDB_Session.run(cq1)
print(res1)
#CREATE NODES (:Program{Called_Procedure}) set property 'type' = Called_Object_Type
for index, row in df.iterrows():
cq2 = 'merge (p:Program{programName:"'+row["Called_Procedure"]+'"}) set p.type = "'+row["Called_Object_Type"]+'"'
#Execute the Cypher query
res2 = graphDB_Session.run(cq2)
print(res2)
#Create relationship - (Parent_Procedure)-[:CALLS_TO]->(Called_Procedure)
for index, row in df.iterrows():
cq3 = 'match (p1:Program{programName:"'+row["Parent_Procedure"]+'"}) match (p2:Program{programName:"'+row["Called_Procedure"]+'"}) merge (p1)-[:CALLS_TO]->(p2)'
#Execute the Cypher query
res3 = graphDB_Session.run(cq3)
print(res3)
graphDB_Driver.close()