#!/usr/bin/env python """Strip mail box of attachements and extrax junk Usage: mailarch [options] msgfile Options: -h / --help Print this message and exit. msgfile is the path to the file containing the MIME message. Usage mailarch mbox > mi2004sc.txt python ~/play/python/mailarch.py ~/Library/Mail/Mailboxes/shortlist.mbox/mbox > /tmp/mi200YMs.txt python ~/play/python/mailarch.py ~/Library/Mail/POP-jchatjch@mail.earthlink.net/INBOX.mbox/mbox > /tmp/mi200YM.txt python ~/play/python/mailarch.py ~/Library/Mail/POP-jchatjch@mail.earthlink.net/Sent\ Messages.mbox/mbox > /tmp/mo200YM.txt """ import sys import os import getopt import errno import mimetypes import email import email.Errors import mailbox import formatter, time def usage(code, msg=''): print >> sys.stderr, __doc__ if msg: print >> sys.stderr, msg sys.exit(code) def msgfactory(fp): try: return email.message_from_file(fp) except email.Errors.MessageParseError: # Don't return None since that will # stop the mailbox iterator return '' def main(): try: opts, args = getopt.getopt(sys.argv[1:], 'hd:', ['help', 'directory=']) except getopt.error, msg: usage(1, msg) dir = os.curdir for opt, arg in opts: if opt in ('-h', '--help'): usage(0) elif opt in ('-d', '--directory'): dir = arg try: msgfile = args[0] except IndexError: usage(1) fp = open(msgfile) # msg = email.message_from_file(fp) mbox = mailbox.UnixMailbox(fp, msgfactory) msg = mbox.next() print "Cleaning %s with mailarch.py on %s"%(msgfile, time.strftime("%a, %d %b %Y %H:%M:%S +0000", time.gmtime(time.time()))) counter = 1 while msg is not None: print "\n=== msg %d %s\n"%(counter, msg.get_unixfrom()) if msg.is_multipart(): # print msg.keys() if msg.has_key('From'): print 'From: '+msg.get('From') if msg.has_key('To'): print 'To: '+msg.get('To') if msg.has_key('CC'): print 'CC: '+msg.get('CC') if msg.has_key('Date'): print 'Date: '+msg.get('Date') if msg.has_key('Subject'): print 'Subject: '+msg.get('Subject') if msg.has_key('Reply-To'): print 'Reply-To: '+msg.get('Reply-To') haveText = False skipped = ' ' for part in msg.walk(): if part.get_content_type() == 'text/plain': haveText = True else: skipped = skipped + ' ' + part.get_content_type() if haveText == False: print "!! No test/plain !!" + skipped else: print " ** MIME Multipart - skipped others :%s\n"%(skipped) for part in msg.walk(): if haveText: if part.get_content_type() == 'text/plain': print part.get_payload() something = True else: if part.get_content_maintype() == 'text': print part something = True else: print msg.as_string(True) something = True counter += 1 msg = mbox.next() fp.close() if __name__ == '__main__': main()