My scenario:
I can do that using Recursive CTE. However, My supervisor asked me to find an alternative way for that :(.
Recursive CTE Code:
with recursive cte as (
select ID, Email, MobileNo, DeviceId, IPAddress, id as tracking
from tableuser
where isfraudsterstatus = 1
union all
select u.id, u.email, u.mobileno, u.deviceid, u.ipaddress , concat_ws(',', cte.tracking, u.id)
from cte join
tableuser u
on u.email = cte.email or
u.mobileno = cte.mobileno or
u.deviceid = cte.deviceid or
u.ipaddress = cte.ipaddress
where find_in_set(u.id, cte.tracking) = 0
)
select *
from cte;
OUTPUT:
Hmmm can I do that using Python ? I am thinking about pandas
import numpy as np
import pandas as pd
import functools
df = pd.DataFrame({'userId':
[1, 2, 3, 4,],
'phone':
['01111', '01111', '53266', '7455'],
'email':
['aziz@gmail', 'aziz1@gmail', 'aziz1@gmail', 'aziz2@gmail'],
'deviceId':
['Ab123', 'Ab1234', 'Ab12345', 'Ab12345'],
'isFraud':
[1,0,0,0]})
Here's a solution. It basically calculates the transitive closure of the fraudster users:
df = pd.DataFrame({'userId':
[1, 2, 3, 4,],
'phone':
['01111', '01111', '53266', '7455'],
'email':
['aziz@gmail', 'aziz1@gmail', 'aziz1@gmail', 'aziz2@gmail'],
'deviceId':
['Ab123', 'Ab1234', 'Ab12345', 'Ab12345'],
'isFraud':
[1,0,0,0]})
def expand_fraud(no_fraud, fraud, col_name):
t = pd.merge(no_fraud, fraud, on = col_name)
if len(t):
print(f"Found Match on {col_name}")
df.loc[df.userId.isin(t.userId_x), "isFraud"] = 1
return True
return False
while True:
added_fraud = False
fraud = df[df.isFraud == 1]
no_fraud = df[df.isFraud == 0]
added_fraud |= expand_fraud(no_fraud, fraud, "deviceId")
added_fraud |= expand_fraud(no_fraud, fraud, "email")
added_fraud |= expand_fraud(no_fraud, fraud, "phone")
if not added_fraud:
break
print(df)
The output is:
userId phone email deviceId isFraud
0 1 01111 aziz@gmail Ab123 1
1 2 01111 aziz1@gmail Ab1234 1
2 3 53266 aziz1@gmail Ab12345 1
3 4 7455 aziz2@gmail Ab12345 1