I'm interested in finding the largest set in an unordered list of months, that can be returned as an ordered list of distinct, consecutive months.
For example:
consecutive_months(["December", "January", "February", "April"])
Would output:
"December", "January", "February"
And:
consecutive_months(["February", "December", "January"])
Would output:
"December", "January", "February"
The following works but I'm curious if anyone has ideas for a more elegant way:
MONTHS = ["January", "February", "March",
"April", "May", "June",
"July", "August", "September",
"October", "November", "December"]
def consecutive_months(lst_of_months):
# create two years of months to handle going from Dec to Jan
results = []
for m in set(lst_of_months):
results.append((m,MONTHS.index(m)))
results.append((m,MONTHS.index(m)+12))
results = sorted(results, key=lambda x: x[1])
# find the longest series of consecutive months
this_series = []
longest_series = []
for this_month in results:
if len(this_series) > 0:
last_month = this_series[-1]
if last_month[1] + 1 == this_month[1]:
this_series.append(this_month)
else:
this_series = [this_month]
else:
this_series = [this_month]
if len(this_series) > len(longest_series):
longest_series = [m for (m,i) in this_series]
return longest_series
Here is a pastebin with sample inputs and expected outputs.
Here are two working approaches from a friend who also looked at the problem. The first is performant and uses the modulo operator so that the list doesn't need to copied onto itself.
month_names = [
'January', 'February',
'March', 'April', 'May',
'June', 'July', 'August',
'September', 'October',
'November', 'December'
]
# Looks like: {'January': 0, 'February': 1...}
month_name_to_index = {
value: index
for index, value
in enumerate(month_names)
}
def consecutive_months(list_of_months_by_name):
if not list_of_months_by_name:
# If the list is empty, return None.
return None
month_was_seen = [False] * 12 # Looks like: [False, False, ...]
for month_name in list_of_months_by_name:
month_was_seen[month_name_to_index[month_name]] = True
# Seek to first missing month:
for start_index in range(12):
if not month_was_seen[start_index]:
break
# If there is no missing month, return the whole year.
if month_was_seen[start_index]:
return {"from": "January", "to": "December", "length": 12}
# Starting from the first missing month, loop around the year
# and keep track of the longest run using one boolean and four
# integers.
running = False
longest_run_index = None
longest_run_length = 0
current_run_index = None
current_run_length = None
for offset in range(1, 13):
index = (start_index + offset) % 12
if month_was_seen[index]:
# Continue a run or begin a run.
if running:
current_run_length += 1
continue
running = True
current_run_index = index
current_run_length = 1
continue
if running:
# End the run.
running = False
if current_run_length > longest_run_length:
longest_run_index = current_run_index
longest_run_length = current_run_length
return {
"from": month_names[longest_run_index],
"to": month_names[(longest_run_index + longest_run_length - 1) % 12],
"length": longest_run_length
}
The second is a clever one-liner:
MONTH_NAMES = [
'January', 'February',
'March', 'April', 'May',
'June', 'July', 'August',
'September', 'October',
'November', 'December'
]
def consecutive_months(list_of_months_by_name):
return max(
(
len(segment)-segment.index(":")-1,
(MONTH_NAMES*2)[
int(segment[:segment.index(":")])+1
:
int(segment[:segment.index(":")]) + len(segment) - segment.index(":")
]
)
for segment in
"".join([
"x" if month_name in list_of_months_by_name else f",{index}:"
for index, month_name in enumerate(MONTH_NAMES*2)
]).split(",")
if ":" in segment
)[1] if set(MONTH_NAMES) - set(list_of_months_by_name) else MONTH_NAMES
Both algorithms return the expected results for the test data. Thanks AV!