I have a column with country where each row has more than one country listed. I want to convert each country to continent. In the past I have used country converter, but when I try to use it in this case, I get an error because there is more than one country per row.
How can I fix this?
!pip install country_converter --upgrade
import pandas as pd
import country_converter as coco
import pycountry_convert as pc
df = pd.DataFrame()
df['country']=['United States, Canada, England', 'United Kingdom, Spain, South Korea', 'Spain', 'France, Sweden']
# CONVERT COUNTRY TO ISO COUNTRY
cc = coco.CountryConverter()
# Create a list of country names for the dataframe
country = []
for name in df['country']:
country.append(name)
# Converting country names to ISO 3
iso_alpha = cc.convert(names = country, to='ISO3')
# CONVERT ISO COUNTRY TO CONTENENT
def country_to_continent(country_name):
country_alpha2 = pc.country_name_to_country_alpha2(country_name)
country_continent_code = pc.country_alpha2_to_continent_code(country_alpha2)
country_continent_name = pc.convert_continent_code_to_continent_name(country_continent_code)
return country_continent_name
# converting to contenents
contenent=[]
for iso in iso_alpha:
try:
country_name = iso
contenent.append(country_to_continent(country_name))
except:
contenent.append('other')
# add contenents to original dataframe
df['Contenent']=contenent
Assuming I understood you correctly, you want the result back in the DataFrame. Therefore, each row would have multiple continents matching to the corresponding country.
If so, you'll need to split each row, and then split the string so that each country can be processed separately, then join back row by row before putting back into DataFrame.
A few things to note:
Here is the code that works for me:
import pandas as pd
import country_converter as coco
import pycountry_convert as pc
# CONVERT ISO COUNTRY TO CONTENENT
def country_to_continent(country_name):
country_alpha2 = pc.country_name_to_country_alpha2(country_name)
country_continent_code = pc.country_alpha2_to_continent_code(country_alpha2)
country_continent_name = pc.convert_continent_code_to_continent_name(country_continent_code)
return country_continent_name
# ------ MAIN -------
df = pd.DataFrame()
df['country']=['United States, Canada, England', 'United Kingdom, Spain, South Korea', 'Spain', 'France, Sweden']
# CONVERT COUNTRY TO ISO COUNTRY
cc = coco.CountryConverter()
# Create a list of country names for the dataframe
cont_list=[]
for arow in df['country']:
country = []
arowarr = arow.split(", ")
for aname in arowarr:
country.append(aname)
#print(f'org:{arow} split:{country}')
# Converting country names to ISO 3
iso_alpha = cc.convert(names = country, to='ISO3')
#print(f'iso_alpha:{iso_alpha} type:{type(iso_alpha)}')
# converting to contenents
contenent=[]
if (type(iso_alpha) == type("")):
try:
#print(f' iso_alpha:{iso_alpha}')
contenent.append(country_to_continent(iso_alpha))
except:
contenent.append('other')
else:
for iso in iso_alpha:
try:
#print(f' iso:{iso}')
contenent.append(country_to_continent(iso))
except:
contenent.append('other')
# convert array back to string
str_cont = ', '.join(contenent)
#print(f'str_cont:{str_cont}')
cont_list.append(str_cont)
# add contenents to original dataframe
df['Contenent']=cont_list
print(f"DF Contenent: \n{df['Contenent']}")