I have been trying to optimize the two following nested loops:
def startbars(query_name, commodity_name):
global h_list
nc, s, h_list = [], {}, {}
query = """ SELECT wbcode, Year, """+query_name+"""
FROM innovotable WHERE commodity='"""+commodity_name+"""' and
"""+query_name+""" != 'NULL' """
rows = cursor.execute(query)
for row in rows:
n = float(row[2])
s[str(row[0])+str(row[1])] = n
nc.append(n)
for iso in result:
try:
for an_year in xrange(1961, 2031, 1):
skey = iso+str(an_year)
h_list[skey] = 8.0 / max(nc) * s[skey]
except:
pass
Any ideas? Thanks.
Your code isn't complete which makes it hard to give good advice but:
Also you need to know how slow the current code is, and how fast you need it to be, otherwise your optimisations maybe misplaced.
Your datastructures are all messed up. Maybe something list this would be faster:
def startbars(query_name, commodity_name):
assert query_name in INNOVOTABLE_FIELD_NAMES
## TODO: Replace with proper SQL query
query = """ SELECT wbcode, Year, """+query_name+"""
FROM innovotable WHERE commodity='"""+commodity_name+"""' and
"""+query_name+""" != 'NULL' """
rows = cursor.execute(query)
mapYearToWbcodeToField = {}
nc = []
global h_list
h_list = {}
for row in rows:
n = float(row[2])
wbCodeToField = mapYearToWbcodeToField.setdefault(int(row[1]),{})
wbCodeToField[str(row[0])] = n
nc.append(n)
constant = 8.0 / max(nc)
for (an_year,wbCodeToField) in mapYearToWbcodeToField.iteritems():
if an_year < 1961 or an_year > 2031:
continue
for (wbCode,value) in wbCodeToField.iteritems():
if wbCode not in result:
continue
skey = wbCode+str(an_year)
h_list[skey] = constant * value
Or moving all checks into the first loop:
def startbars(query_name, commodity_name):
assert query_name in INNOVOTABLE_FIELD_NAMES
## TODO: Replace with proper SQL query
query = """ SELECT wbcode, Year, """+query_name+"""
FROM innovotable WHERE commodity='"""+commodity_name+"""' and
"""+query_name+""" != 'NULL' """
rows = cursor.execute(query)
data = []
maxField = None
for row in rows:
an_year = int(row[1])
if an_year < 1961 or an_year > 2031:
continue
wbCode = str(row[0])
if wbCode not in result:
continue
n = float(row[2])
data.append((wbCode+str(an_year),n))
if maxField is None or n > maxField:
maxField = n
constant = 8.0 / maxField
global h_list
h_list = {}
for (skey,n) in data:
h_list[skey] = constant * n