pythonalgorithmperformanceyearmonth

Finding consecutive months in a list


I'm interested in finding the largest set in an unordered list of months, that can be returned as an ordered list of distinct, consecutive months.

For example:

consecutive_months(["December", "January", "February", "April"])

Would output:

"December", "January", "February"

And:

consecutive_months(["February", "December", "January"])

Would output:

"December", "January", "February"

The following works but I'm curious if anyone has ideas for a more elegant way:

MONTHS = ["January", "February", "March", 
          "April", "May", "June",
          "July", "August", "September", 
          "October", "November", "December"] 

def consecutive_months(lst_of_months):
    # create two years of months to handle going from Dec to Jan        
    results = []
    for m in set(lst_of_months):
        results.append((m,MONTHS.index(m)))
        results.append((m,MONTHS.index(m)+12))            
    results = sorted(results, key=lambda x: x[1])
    
    # find the longest series of consecutive months
    this_series = []    
    longest_series = []
    for this_month in results:
        if len(this_series) > 0:
            last_month = this_series[-1]
            if last_month[1] + 1 == this_month[1]:
                this_series.append(this_month)
            else:
                this_series = [this_month]
        else:
            this_series = [this_month]           
        if len(this_series) > len(longest_series):
            longest_series = [m for (m,i) in this_series]

    return longest_series

Here is a pastebin with sample inputs and expected outputs.


Solution

  • Here are two working approaches from a friend who also looked at the problem. The first is performant and uses the modulo operator so that the list doesn't need to copied onto itself.

    month_names = [
        'January', 'February',
        'March', 'April', 'May',
        'June', 'July', 'August',
        'September', 'October',
        'November', 'December'
    ]
    ​
    # Looks like: {'January': 0, 'February': 1...}
    month_name_to_index = {
      value: index
      for index, value
      in enumerate(month_names)
    }
    ​
    ​
    def consecutive_months(list_of_months_by_name):
      if not list_of_months_by_name:
        # If the list is empty, return None.
        return None
    ​
      month_was_seen = [False] * 12  # Looks like: [False, False, ...]
      
      for month_name in list_of_months_by_name: 
        month_was_seen[month_name_to_index[month_name]] = True
    ​
      # Seek to first missing month:
      for start_index in range(12):
        if not month_was_seen[start_index]:
          break
    ​
      # If there is no missing month, return the whole year.
      if month_was_seen[start_index]:
        return {"from": "January", "to": "December", "length": 12}
    ​
      # Starting from the first missing month, loop around the year
      # and keep track of the longest run using one boolean and four
      # integers.
      running = False
      longest_run_index = None
      longest_run_length = 0
      current_run_index = None
      current_run_length = None
      for offset in range(1, 13):
        index = (start_index + offset) % 12
        if month_was_seen[index]:
          # Continue a run or begin a run.
          if running:
            current_run_length += 1
            continue
          running = True
          current_run_index = index 
          current_run_length = 1
          continue
        if running:
          # End the run.
          running = False
          if current_run_length > longest_run_length:
            longest_run_index = current_run_index 
            longest_run_length = current_run_length
    ​
      return {
        "from": month_names[longest_run_index],
        "to": month_names[(longest_run_index + longest_run_length - 1) % 12],
        "length": longest_run_length
      }
    

    The second is a clever one-liner:

    MONTH_NAMES = [
        'January', 'February',
        'March', 'April', 'May',
        'June', 'July', 'August',
        'September', 'October',
        'November', 'December'
    ]
    ​
    def consecutive_months(list_of_months_by_name):
      return max(
        (
          len(segment)-segment.index(":")-1,
          (MONTH_NAMES*2)[
              int(segment[:segment.index(":")])+1
              :
              int(segment[:segment.index(":")]) + len(segment) - segment.index(":")
          ]
        )
        for segment in 
        "".join([
          "x" if month_name in list_of_months_by_name else f",{index}:"
          for index, month_name in enumerate(MONTH_NAMES*2)
        ]).split(",")
        if ":" in segment
      )[1] if set(MONTH_NAMES) - set(list_of_months_by_name) else MONTH_NAMES
    

    Both algorithms return the expected results for the test data. Thanks AV!