I need to extract an msg type attachment from an email and save the MSG attachment to a location in python.
The script i wrote works for nearly all types of files other than outlook items
def parse_attachment(message_part):
content_disposition = message_part.get("Content-Disposition", None)
if content_disposition:
dispositions = content_disposition.strip().split(";")
if bool(content_disposition and (dispositions[0].lower() == "attachment" or dispositions[0].lower() == "inline")):
file_data = message_part.get_payload(decode=True)
debug(message_part)
attachment = {}
attachment['data'] = file_data
attachment['content_type'] = message_part.get_content_type()
attachment['size'] = len(file_data)
for param in dispositions[1:]:
name,value = param.split("=")
name = name.lower().strip()
value = value.strip().strip("\"")
if name == "filename":
attachment['name'] = value
elif name == "creation-date":
attachment['creation-date'] = value
elif name == "modification-date":
attachment['modification-date'] = value
elif name == "size":
attachment['size'] = value
return attachment
return None
We have to handle email attachments separately. However, if we use walk()
, which is an all-purpose generator which can be used to iterate over all the parts and subparts of a message object tree, in depth-first traversal order, we end up parsing the attachment email as well.
So, we will have to use get_payload()
for getting each individual part of the email. Here is how we can parse the email attachments -
def get_subject(msgobj) :
subject = None
if msgobj['Subject'] is not None:
decodefrag = decode_header(msgobj['Subject'])
subj_fragments = []
for s , enc in decodefrag:
if enc:
s = unicode(s , enc).encode('utf8','replace')
subj_fragments.append(s)
subject = ''.join(subj_fragments)
subject = re.sub('\n', '', subject)
return subject
def get_msg_file_as_attachment(message_part):
attachment = {}
attachment['data'] = message_part.get_payload()[0].as_string(unixfrom=True)
attachment['content_type'] = message_part.get_content_type()
attachment['name'] = get_subject(message_part.get_payload()[0])
attachment['name'] += '.eml'
attachment['size'] = len(attachment['data'])
return attachment
def parse_attachment(message_part):
content_disposition = message_part.get("Content-Disposition", None)
content_type = message_part.get_content_type()
if content_disposition:
dispositions = content_disposition.strip().split(";")
if bool(content_disposition and (dispositions[0].lower() == "attachment" or dispositions[0].lower() == "inline")):
if (content_type.lower().strip() == 'message/rfc822'):
return get_msg_file_as_attachment(message_part)
else:
file_data = message_part.get_payload(decode=True)
attachment = {}
attachment['data'] = file_data
attachment['content_type'] = content_type
attachment['size'] = len(file_data)
attachment['name'] = message_part.get_filename()
return attachment
return None