pythonnested-loopsoptimizer-hints

Optimize two simple nested loops


I have been trying to optimize the two following nested loops:

def startbars(query_name, commodity_name):

     global h_list
     nc, s, h_list = [], {}, {}
     query = """ SELECT wbcode, Year, """+query_name+""" 
                 FROM innovotable WHERE commodity='"""+commodity_name+"""' and

                 """+query_name+""" != 'NULL' """
     rows = cursor.execute(query)
     for row in rows:
         n = float(row[2])
         s[str(row[0])+str(row[1])] = n
         nc.append(n)
     for iso in result:
         try:
             for an_year in xrange(1961, 2031, 1):
                 skey = iso+str(an_year)
                 h_list[skey] = 8.0 / max(nc) * s[skey]
         except:
             pass

Any ideas? Thanks.


Solution

  • Your code isn't complete which makes it hard to give good advice but:

    1. Inner loop doesn't depend on outer-loop, so pull it out of the outer loop.
    2. max(nc) is a constant after first loop, so pull it out of the loops.

    Also you need to know how slow the current code is, and how fast you need it to be, otherwise your optimisations maybe misplaced.

    Your datastructures are all messed up. Maybe something list this would be faster:

    def startbars(query_name, commodity_name):
    
        assert query_name in INNOVOTABLE_FIELD_NAMES
    
        ## TODO: Replace with proper SQL query
        query = """ SELECT wbcode, Year, """+query_name+""" 
                 FROM innovotable WHERE commodity='"""+commodity_name+"""' and
    
                 """+query_name+""" != 'NULL' """
        rows = cursor.execute(query)
    
        mapYearToWbcodeToField = {}
        nc = []
        global h_list
        h_list = {}
    
        for row in rows:
            n = float(row[2])
            wbCodeToField = mapYearToWbcodeToField.setdefault(int(row[1]),{})
            wbCodeToField[str(row[0])] = n
            nc.append(n)
    
        constant = 8.0 / max(nc)
    
    
        for (an_year,wbCodeToField) in mapYearToWbcodeToField.iteritems():
            if an_year < 1961 or an_year > 2031:
                continue
    
            for (wbCode,value) in wbCodeToField.iteritems():
                if wbCode not in result:
                    continue
    
                skey = wbCode+str(an_year)
                h_list[skey] = constant * value
    

    Or moving all checks into the first loop:

    def startbars(query_name, commodity_name):
    
        assert query_name in INNOVOTABLE_FIELD_NAMES
    
        ## TODO: Replace with proper SQL query
        query = """ SELECT wbcode, Year, """+query_name+""" 
                 FROM innovotable WHERE commodity='"""+commodity_name+"""' and
    
                 """+query_name+""" != 'NULL' """
        rows = cursor.execute(query)
    
        data = []
        maxField = None
    
        for row in rows:
            an_year = int(row[1])
            if an_year < 1961 or an_year > 2031:
                continue
    
            wbCode = str(row[0])
            if wbCode not in result:
                continue
    
            n = float(row[2])
    
            data.append((wbCode+str(an_year),n))
            if maxField is None or n > maxField:
                maxField = n
    
        constant = 8.0 / maxField
    
        global h_list
        h_list = {}
    
        for (skey,n) in data:
            h_list[skey] = constant * n