I have an array of names and roles of people within a company: Example array:
names_and_titles = [
("Samantha Reyes", "Innovation", "Product Owner"),
("Ethan McAllister", "Data Scientist"),
("Priya Deshmukh", "Data Architect", "SMT"),
("Marcus Liu", "Stream 3"),
("Elena Petrova", "SMT", "Stream 3"),
]
I also have a csv file with all of the previous pairs that have been generated.
I want to create a list of paired individuals that have not been paired before (unique pair) which also do not share the same role e.g. "Marcus Liu" and "Elena Petrova" cannot be paired together as they share the same role "Stream 3".
This is my code for generating unique pairs and saving the generated pairs back into the CSV file.
import random
import pandas as pd
# An array of all names and titles
names_and_titles = [
("Samantha Reyes", "Innovation", "Product Owner"),
("Ethan McAllister", "Data Scientist"),
("Priya Deshmukh", "Data Architect", "SMT"),
("Marcus Liu", "Stream 3"),
("Elena Petrova", "SMT", "Stream 3"),
]
# Load previously generated pairs from the CSV file
def load_seen_pairs_from_csv(prev_pairs2):
df = pd.read_csv(prev_pairs2)
seen_pairs = set()
for _, row in df.iterrows():
pair = (row['name1'], row['name2'])
reverse_pair = (row['name2'], row['name1'])
seen_pairs.add(pair)
seen_pairs.add(reverse_pair)
return seen_pairs
# Save new pairs to the CSV file
def save_pairs_to_csv(prev_pairs2, pairs):
df = pd.DataFrame(pairs, columns=['name1', 'name2'])
df.to_csv(prev_pairs2, index=False, mode='a', header=False)
# Path to the CSV file containing previously generated pairs
csv_file_path = 'prev_pairs2.csv'
# Initialize the seen_pairs set with previously created pairs
seen_pairs = load_seen_pairs_from_csv(csv_file_path)
# Excluded pairs for pairing logic
excluded_pairs = []
def create_unique_pairs_with_debugging_and_fallback(names_and_titles, seen_pairs, excluded_pairs):
excluded_set = set(excluded_pairs) | set((pair[1], pair[0]) for pair in excluded_pairs) # Include reverse pairs
max_retries = 10000 # Increase the number of retries to improve pairing chances
retries = 0
while retries < max_retries:
random.shuffle(names_and_titles)
pairs = []
used_names = set() # Track names already paired in this run
skipped = [] # Track skipped individuals for debugging
for i in range(0, len(names_and_titles) - 1, 2):
person1, person2 = names_and_titles[i], names_and_titles[i + 1]
roles1 = set(person1[2:]) if len(person1) > 2 else set() # Handle blank roles
roles2 = set(person2[2:]) if len(person2) > 2 else set() # Handle blank roles
pair = (person1[0], person2[0])
reverse_pair = (person2[0], person1[0])
# Debugging: Log why pairs are skipped
if pair in excluded_set or reverse_pair in excluded_set:
print(f"Skipping pair due to exclusion: {person1[0]} - {person2[0]}")
continue
if pair in seen_pairs or reverse_pair in seen_pairs:
print(f"Skipping pair due to seen pair: {person1[0]} - {person2[0]}")
continue
if roles1 & roles2: # Skip if roles overlap
print(f"Skipping pair due to role overlap: {person1[0]} ({roles1}) - {person2[0]} ({roles2})")
continue
if person1[0] in used_names or person2[0] in used_names: # Avoid duplicate pairings in this run
print(f"Skipping pair due to duplicate usage: {person1[0]} - {person2[0]}")
continue
pairs.append((person1, person2))
used_names.update([person1[0], person2[0]]) # Mark names as used
# Add skipped individuals to the next round
skipped = [person for person in names_and_titles if person[0] not in used_names]
if not skipped: # If no one is skipped, pairing is complete
pairs_set = set((pair[0][0], pair[1][0]) for pair in pairs)
if not any((pair in seen_pairs or (pair[1], pair[0]) in seen_pairs) for pair in pairs_set):
seen_pairs.update(pairs_set)
return pairs
retries += 1
# If retries are exhausted, raise an error with debugging information
print("Unable to generate unique pairs with the given restrictions.")
print(f"Skipped individuals: {[person[0] for person in skipped]}")
raise ValueError("Unable to generate unique pairs with the given restrictions.")
# Generate unique pairs with debugging and fallback logic
unique_pairs = create_unique_pairs_with_debugging_and_fallback(names_and_titles, seen_pairs, excluded_pairs)
# Print the unique pairs with names only
for pair in unique_pairs:
print(f"{pair[0][0]} - {pair[1][0]}")
print("-----")
# Print the total number of pairs
print(f"Total pairs: {len(unique_pairs)}")
# Save the unique pairs to the CSV file
save_pairs_to_csv(csv_file_path, [(pair[0][0], pair[1][0]) for pair in unique_pairs])
I was expecting to be given a list of pairs who are unique and do not share the same role. However I am generating a list of pairs where a few pairings share the same role.
For those wondering this is the format of the prev_pairs2.csv file:
name1,name2
Ava Thompson,Noah Bennett
Liam Carter,Zara Mahmood
Maya Chen,Oliver Grant
Nina Kowalski,Tomás Rivera
Rajiv Mehta,Grace O'Connor
Daniel Okoro,Isla McKenzie
I believe your problem lies in the fact that:
roles1 = set(person1[2:]) if len(person1) > 2 else set() # Handle blank roles
roles2 = set(person2[2:]) if len(person2) > 2 else set() # Handle blank roles
Should be:
# check here --------v----xxx not needed
roles1 = set(person1[1:]) # if len(person1) > 1 else set() # Handle blank roles
roles2 = set(person2[1:]) # if len(person2) > 1 else set() # Handle blank roles
as python uses 0-base indexing, and your person
items look like:
#[0: [1: [2: [3:
('Name', 'Role1', 'Role2', ...)
The way it is currently written, you are omitting the first role for each person. Therefore, it is normal if you observe overlaps ;)
Edit: As stated per @simon in the comments, the if ... else ...
part is not even needed.