Could you please help to solve the issue with error regarding indices?
df18 =pd.DataFrame({'Index': [0,1,2,3,4,],
'Name': ['Anna', 'Jan', 'Pawel','Sonia', "Klaudia"],
'Age': [20,25,30,35,35],
'Birthday': ['2004-01-01','1999-01-01','1994-01-01','1989-01-01','1999-01-01',]
} )
df19 =pd.DataFrame({'Index': [0,1,2,3,4,],
'Name': ['Anna', 'Gabi', 'Diwa','Emil', "Klaudia"],
'Age': [20,30,30,35,35],
'Birthday': ['2004-01-01','1994-01-01','1994-01-01','1989-01-01','1999-01-01',]
})
df18["Index"] = df18["Index"].astype('int')
df19["Index"] = df19["Index"].astype('int')
df18 = df18.set_index('Index')
df19 = df19.set_index('Index')
import recordlinkage
indexer = recordlinkage.Index('')
indexer.block('Age')
pairs = indexer.index(df18,df19)
TypeError: slice indices must be integers or None or have an index method
I tried to set the index and change the type into int, but nothing help.
You can do it this way:
import pandas as pd
import recordlinkage
df18 = pd.DataFrame({
'Index': [0, 1, 2, 3, 4],
'Name': ['Anna', 'Jan', 'Pawel', 'Sonia', 'Klaudia'],
'Age': [20, 25, 30, 35, 35],
'Birthday': ['2004-01-01', '1999-01-01', '1994-01-01', '1989-01-01', '1999-01-01']
})
df19 = pd.DataFrame({
'Index': [0, 1, 2, 3, 4],
'Name': ['Anna', 'Gabi', 'Diwa', 'Emil', 'Klaudia'],
'Age': [20, 30, 30, 35, 35],
'Birthday': ['2004-01-01', '1994-01-01', '1994-01-01', '1989-01-01', '1999-01-01']
})
df18.set_index('Index', inplace=True)
df19.set_index('Index', inplace=True)
indexer = recordlinkage.Index()
indexer.block('Age')
pairs = indexer.index(df18, df19)
print(pairs)
which gives you
MultiIndex([(0, 0),
(2, 1),
(2, 2),
(3, 3),
(3, 4),
(4, 3),
(4, 4)],
names=['Index_1', 'Index_2'])