none
How to parse outlook body e-mail ? RRS feed

  • Question

  • Hi , 

    I`m trying to parse outlook body e-mails from a subfolder of inbox.The e-mail body in some cases contains a table or just text.I`m learning how to do it with html agility pack, but i have no clue how to loop thru e-mails.For example from an e-mail that contains a table i want to write every cell of the table in a new column of excel.

    For know i have the code below but it`s just writes the body of the e-mail to a cell.

    Please give some hints , any help.

    Thank`s.

    Microsoft.Office.Interop.Outlook.Application app = null;
    Microsoft.Office.Interop.Outlook.NameSpace ns = null;
    Microsoft.Office.Interop.Outlook.MAPIFolder inboxFolder = null;
    Microsoft.Office.Interop.Outlook.MAPIFolder subFolder = null;
    
    Excel.Application oApp=null;
    Excel.Workbook oWB=null;
    Excel.Worksheet oSheet=null;
    string fileTest = "C:\\Users\\Daniel\\Desktop\\test.xlsx";
    
    if (File.Exists(fileTest))
    {
        File.Delete(fileTest);
    }
    
    oApp = new Excel.Application();
    oWB = oApp.Workbooks.Add();
    oSheet = (Excel.Worksheet)oWB.Worksheets.get_Item(1);
    
    try
    {
        app = new Microsoft.Office.Interop.Outlook.Application();
        ns = app.GetNamespace("MAPI");
        ns.Logon("", "",false,Missing.Value);
        inboxFolder = ns.GetDefaultFolder(Microsoft.Office.Interop.Outlook.OlDefaultFolders.olFolderInbox);
        subFolder = inboxFolder.Folders["Octavian"]; //folder.Folders[1]; also works
        Console.WriteLine("Folder Name: {0}, EntryId: {1}", subFolder.Name, subFolder.EntryID);
        Console.WriteLine("Num Items: {0}", subFolder.Items.Count.ToString());
    
        for (int i = 1; i <= subFolder.Items.Count; i++)
        {
            Microsoft.Office.Interop.Outlook.MailItem item = (Microsoft.Office.Interop.Outlook.MailItem)subFolder.Items[i];
    
            oSheet.Cells[i, 1] = i.ToString();
            oSheet.Cells[i, 2] = item.Subject;
            oSheet.Cells[i, 3] = item.SentOn.ToLongDateString();
            oSheet.Cells[i, 4] = item.Body.ToString();
    
        }
    }
    catch (System.Runtime.InteropServices.COMException ex)
    {
        Console.WriteLine(ex.ToString());
    }
    finally
    {
                    
        oWB.SaveAs(fileTest);
        oWB.Close();
        oApp.Quit();
        oApp = null;
        ns = null;
        inboxFolder = null;
        app = null;

    Saturday, October 22, 2016 9:41 PM

All replies

  • Just a hint.....

    The MailItem object has a propery HTMLBody. It will return body of email in HTML format.

    After getting the HTML you can parse with HTML DOM object or any other HTML Parser.


    Best Regards,
    Asadulla Javed,
    Jadavpore & Asansol

    Sunday, October 23, 2016 3:26 AM
    Answerer
  • Hello Danie,

    The Outlook object model provides three main ways for working with item bodies:

    1. Body - a string representing the clear-text body of the Outlook item. 
    2. HTMLBody - a string representing the HTML body of the specified item.
    3. Word editor - the Microsoft Word Document Object Model of the message being displayed. The WordEditor property of the Inspector class returns an instance of the Document class from the Word object model which you can use to set up the message body.

    You can read more about all these ways in the Chapter 17: Working with Item Bodies article. It us up to you which way is to choose to customize the message body.


    [custom.development]

    Sunday, October 23, 2016 8:36 PM
  • Hi Eugene ,

    Please help me with the exception i get :

    "An unhandled exception of type 'System.ArgumentNullException' occurred in System.Core.dll

    Additional information: Value cannot be null."

    And tell me if i am on the wright track to parse the email body.

    Thank you !

     static void Main(string[] args)
            {
                Microsoft.Office.Interop.Outlook.Application app = null;
                Microsoft.Office.Interop.Outlook._NameSpace ns = null;
                Microsoft.Office.Interop.Outlook.MAPIFolder inboxFolder = null;
                Microsoft.Office.Interop.Outlook.MAPIFolder subFolder = null;
    
                Excel.Application oApp;
                Excel.Workbook oWB;
                Excel.Worksheet oSheet;
    
                app = new Microsoft.Office.Interop.Outlook.Application();
                ns = app.GetNamespace("MAPI");
                ns.Logon("daniel.tou@XXXXX", "XXXXXX", false, true);
                inboxFolder = ns.GetDefaultFolder(Microsoft.Office.Interop.Outlook.OlDefaultFolders.olFolderInbox);
                subFolder = inboxFolder.Folders["Test"]; //folder.Folders[1]; also works
    
                oApp = new Excel.Application();
                oWB = oApp.Workbooks.Add();
                oSheet = (Excel.Worksheet)oWB.Worksheets.get_Item(1);
    
                MailItem mailItem = app.CreateItem(OlItemType.olMailItem);
                mailItem.HTMLBody = subFolder.ToString();
    
                HtmlAgilityPack.HtmlDocument emailBody = new HtmlAgilityPack.HtmlDocument();
                emailBody.LoadHtml(mailItem.HTMLBody);
    
                HtmlAgilityPack.HtmlNode[] nodes = emailBody.DocumentNode.SelectNodes("//td").ToArray();
    
                foreach (HtmlAgilityPack.HtmlNode item in nodes)
                {
                    Console.WriteLine(item.InnerHtml);
                }



    Wednesday, December 28, 2016 11:29 AM