Skip to content
Advertisement

Read email in python 3.7 using imaplib with HTML body and attachments in the email

I would really appreciate if someone can help me with this issue.

I have implemented the below code to read “unread emails from gmail inbox”. I need to print “To”, “From”, “Subject”, “Body” and “save attachments in a specified location”

I have 2 issues here.

  1. If there is any email with attachments, it gives the error Body: [<email.message.Message object at 0x026D1050>, <email.message.Message object at 0x02776B70>]. It will print all the required things and saves attachments but DOESN’T print the body.

This works fine if no attachment is included.

  1. If there is an email body with any styling in it like “bold/italic/underline/colour…etc”, it doesn’t print as it is.

Example : Python is printed as Python=C2=A0i= and sometimes different styling is seperated by “*”.

def get_body(email_message):
for payload in email_message.get_payload():
     # print('Body:t', payload.get_payload())
     break
return(payload.get_payload())
def read_email(server,uname,pwd):
    username = uname
    password = pwd
    mail = imaplib.IMAP4_SSL(server)
    mail.login(username, password)
    mail.select("inbox")
    try:
        result, data = mail.uid('search', None, '(UNSEEN)')
        inbox_item_list = data[0].split()
        most_recent = inbox_item_list[-1]
        result2, email_data = mail.uid('fetch', most_recent, '(RFC822)')
        raw_email = email_data[0][1].decode("UTF-8")
        email_message = email.message_from_string(raw_email)
        for part in email_message.walk():
            if part.get_content_maintype() == 'multipart':
                continue
            if part.get('Content-Disposition') is None:
                continue
            filename = part.get_filename()
            att_path = os.path.join(location, filename)

            if not os.path.isfile(att_path):
                fp = open(att_path, 'wb')
                fp.write(part.get_payload(decode=True))
                fp.close()
                print('Downloaded file:', filename)
        if email_message.is_multipart():
            for payload in email_message.get_payload():
                print('To:tt', email_message['To'])
                print('From:t',     email_message['From'])
                print('Subject:', email_message['Subject'])
                print('Date:t',email_message['Date'])
                print('Body:t', get_body(email_message))
                break        
        else:
            print('Nothing'])               
    except IndexError:
        print("No new email")
while True:
    read_email("imap.gmail.com", "s@gmail.com", "spassword")
time.sleep(10)

Many thanks

Advertisement

Answer

I new to python and this is the complete working code I have done to read unseen emails. You can print the elements according to your requirements. It works for gmail and office 365. This script runs for every 10 seconds. This might also work for other email providers by passing the credentials. Hope this helps.

import email
import imaplib
import os
import html2text
import time
detach_dir = 'locationWhereYouWantToSaveYourAttachments'


def get_body(email_message):
    for payload in email_message.get_payload():
        break
    return payload.get_payload()
 def two_way_email(server,uname,pwd):
    username = uname
    password = pwd
    mail = imaplib.IMAP4_SSL(server)
    mail.login(username, password)
    mail.select("inbox")
    try:
        result, data = mail.uid('search', None, '(UNSEEN)')
        inbox_item_list = data[0].split()
        most_recent = inbox_item_list[-1]
        result2, email_data = mail.uid('fetch', most_recent, '(RFC822)')
        raw_email = email_data[0][1].decode("UTF-8")
        email_message = email.message_from_string(raw_email)
         for part in email_message.walk():
            if part.get_content_maintype() == 'multipart':
                continue
            if part.get('Content-Disposition') is None:
                continue
             filename = part.get_filename()
            att_path = os.path.join(detach_dir, filename)
             if not os.path.isfile(att_path):
                fp = open(att_path, 'wb')
                fp.write(part.get_payload(decode=True))
                fp.close()
                print('Downloaded file:', filename)
        if email_message.is_multipart():
            for payload in email_message.get_payload():
                print('To:tt', email_message['To'])
                print('From:t',     email_message['From'])
                print('Subject:', email_message['Subject'])
                print('Date:t',email_message['Date'])
                for part in email_message.walk():
                    if (part.get_content_type() == 'text/plain') and (part.get('Content-Disposition') is None):
                        print('Body:t',part.get_payload())
                break
        else:
            print('To:tt', email_message['To'])
            print('From:t', email_message['From'])
            print('Subject:', email_message['Subject'])
            print('Date:t', email_message['Date'])
            print('Thread-Index:t', email_message['Thread-Index'])
            text = f"{email_message.get_payload(decode=True)}"
            html = text.replace("b'", "")
            h = html2text.HTML2Text()
            h.ignore_links = True
            output = (h.handle(f'''{html}''').replace("\r\n", ""))
            output = output.replace("'", "")
            print(output)
     except IndexError:
        print("No new email")
while True:
    two_way_email("outlook.office365.com", "yourOffice365EmailAddressHere", "yourpassword")
     two_way_email("imap.gmail.com", "yourGmailAddressHere", "yourPassword")
    time.sleep(10)
User contributions licensed under: CC BY-SA
1 People found this is helpful
Advertisement