none
Convert a HTML table with rowspans to datatable C#

    Question

  • I need to convert a Html Table to DataTable in C#. I used HtmlAgilityPack but it does not convert it well because of rowspans. The code I am currently using is:

     private static DataTable convertHtmlTableToDataTable()
        {
            WebClient webClient = new WebClient();
            string urlContent = webClient.DownloadString("http://example.com");
    
            string tableCode = getTableCode(urlContent);
    
            string htmlCode = tableCode.Replace(" ", " ");
    
            HtmlDocument doc = new HtmlDocument();
            doc.LoadHtml(htmlCode);
            var headers = doc.DocumentNode.SelectNodes("//tr/th");
            DataTable table = new DataTable();
    
            foreach (HtmlNode header in headers)
            {
                table.Columns.Add(header.InnerText);
            }
            foreach (var row in doc.DocumentNode.SelectNodes("//tr[td]"))
            {
                table.Rows.Add(row.SelectNodes("td").Select(td => td.InnerText).ToArray());
            }
            return table;
        }

    And this is a part of Html Table:

    <table class="tabel" cellspacing="0" border="0">
        <caption style="font-family:Verdana; font-size:20px;">SEMGRP</caption>
        <tr>
            <th class="celula" >Ora</th>
            <th  class="latime_celula celula">Luni</th>
            <th  class="latime_celula celula">Marti</th>
            <th  class="latime_celula celula">Miercuri</th>
            <th  class="latime_celula celula">Joi</th>
            <th  class="latime_celula celula">Vineri</th>
        </tr>
        <tr>
            <td class="celula" nowrap="nowrap">8-9</td>
            <td class="celula" rowspan="2">
                                    <table border="0" align="center">
                                        <tr>
                                            <td nowrap="nowrap" align="center">   
                                                Curs    
                                                <br />
                                                <a class="link_celula" href="afis_n0.php?id_tip=287&amp;tip=p">Prof</a> 
                                                <br />
                                                <a class="link_celula" href="afis_n0.php?id_tip=9&amp;tip=s">Sala</a>
                                                <br />
                                            </td>
                                        </tr>
                                    </table>
            </td>
            <td class="celula" rowspan="2">
                                    <table border="0" align="center">
                                        <tr>
                                            <td nowrap="nowrap" align="center">
                                                Curs    
                                                <br />
                                                <a class="link_celula" href="afis_n0.php?id_tip=287&amp;tip=p">Prof</a> 
                                                <br />
                                                <a class="link_celula" href="afis_n0.php?id_tip=12&amp;tip=s">Sala</a>  
                                                <br />
                                            </td>
                                        </tr>
                                    </table>
            </td>
            <td class="celula">&nbsp;</td>
            <td class="celula">&nbsp;</td>
            <td class="celula" rowspan="2">
                                    <table border="0" align="center">
                                        <tr>
                                            <td nowrap="nowrap" align="center">
                                            Curs
                                            <br />
                                            <a class="link_celula" href="afis_n0.php?id_tip=293&amp;tip=p">Prof</a>
                                            <br />
                                            <a class="link_celula" href="afis_n0.php?id_tip=9&amp;tip=s">Sala</a>
                                            <br />
                                            </td>
                                        </tr>
                                    </table>
            </td>
        </tr>
        <tr>
            <td class="celula" nowrap="nowrap">9-10</td>
            <td class="celula">&nbsp;</td>
            <td class="celula">&nbsp;</td>
        </tr>
        <tr>
            <td class="celula" nowrap="nowrap">10-11</td>
            <td class="celula" rowspan="2">
                                    <table border="0" align="center">
                                        <tr>
                                            <td nowrap="nowrap" align="center">   Curs
                                            <br /><a class="link_celula" href="afis_n0.php?id_tip=303&amp;tip=p">Prof</a>
                                            <br /><a class="link_celula" href="afis_n0.php?id_tip=9&amp;tip=s">Sala</a>
                                            <br />
                                            </td>
                                        </tr>
                                    </table>
            </td>
            <td class="celula" rowspan="2">
                                    <table border="0" align="center">
                                        <tr>
                                            <td nowrap="nowrap" align="center">   Curs
                                            <br />
                                            <a class="link_celula" href="afis_n0.php?id_tip=331&amp;tip=p">Prof</a>
                                            <br />
                                            <a class="link_celula" href="afis_n0.php?id_tip=14&amp;tip=s">Sala</a>  
                                            <br />
                                            </td>
                                        </tr>
                                    </table>
            </td>
            <td class="celula" rowspan="2">
                                    <table border="0" align="center">
                                        <tr>
                                            <td nowrap="nowrap" align="center">   Curs
                                            <br /><a class="link_celula" href="afis_n0.php?id_tip=330&amp;tip=p">Prof</a>   
                                            <br /><a class="link_celula" href="afis_n0.php?id_tip=9&amp;tip=s">Sala</a> 
                                            <br />
                                            </td>
                                        </tr>
                                    </table>
            </td>
            <td class="celula">&nbsp;</td>
            <td class="celula" rowspan="2">
                                    <table border="0" align="center">
                                        <tr>
                                            <td nowrap="nowrap" align="center">   Curs
                                            <br />
                                            <a class="link_celula" href="afis_n0.php?id_tip=293&amp;tip=p">Prof</a>
                                            <br />
                                            <a class="link_celula" href="afis_n0.php?id_tip=10&amp;tip=s">Sala</a>  <br />
                                            </td>
                                        </tr>
                                    </table>
            </td>
        </tr>
        <tr>
            <td class="celula" nowrap="nowrap">11-12</td>
            <td class="celula">&nbsp;</td>
        </tr>
        <tr>
    I tried some solutions but I did not find any good... Thanks for any help in advance.




    • Edited by StefanTP Saturday, April 22, 2017 11:34 PM
    Saturday, April 22, 2017 11:33 PM

All replies

  • No automated system is going to help you with this. The HTML table you posted isn't consistent enough to create a table. What you'll have to do is enumerate the TR elements in the table and then the TD elements within each row. For each one you'll have to decide how to convert that cell to an appropriate value for your column. Note that DataTable requires that each column type be specified so unless you can narrow down the column data then most of them will be strings. What you store in that string depends upon the TD element.In your example you have such a mix of data that you're going to have to apply some filtering rules such that it makes sense for you. As example 1 cell contains a couple newlines and links. You'll have to decided how to translate that based upon your needs.

    Also note that HTML does not require that all cells in a row be defined and this doesn't even get into the colspan attribute. You're going to have to handle all this based upon your input. We cannot really help here as we don't understand your requirements. I would recommend that you start by just getting the entire HTML text of the TD into a column. Then you can start cleaning up the column values.

    For questions related to HAP please post them in their forums. We can only help with C#-specific questions here.

    Michael Taylor
    http://www.michaeltaylorp3.net

    Monday, April 24, 2017 2:45 PM
    Moderator
  • Hi StefanTP,

    Thank you for posting here.

    For your question, please refer to the thread I done before. After get the list of the html. And then put the list to a datatable.

    And then I do the following to convert the list to the datatable for your reference.

      private void Form1_Load(object sender, EventArgs e)
            {
                WebClient webClient = new WebClient();
                string page = webClient.DownloadString("http://racing.hkjc.com/racing/Info/Meeting/Results/English/Local/20161116/HV/3");
    
                HtmlAgilityPack.HtmlDocument doc = new HtmlAgilityPack.HtmlDocument();
                doc.LoadHtml(page);
    
                List<List<string>> table = doc.DocumentNode.SelectSingleNode("//table[@class='tableBorder trBgBlue tdAlignC number12 draggable']")
                            .Descendants("tr")
                            .Where(tr => tr.Elements("td").Count() == 12)
                            .Select(tr => tr.Elements("td").Select(td => td.InnerText.Trim()).ToList())
                            .ToList();
    
                dataGridView1.ColumnCount = table[0].Count;
                int x = 0;
                foreach (var item in table[0])
                {
                    dataGridView1.Columns[x].Name = item;
                    x++;
                }
                table.RemoveAt(0);
                foreach (var item in table)
                {
                    string[] s = item.ToArray();
                    dataGridView1.Rows.Add(s);
                }

    This is what I get from the html. 

    If it could not solved your question, please provide the url which you download the html for us to test.

    Best Regards,

    Wendy


    MSDN Community Support
    Please remember to click "Mark as Answer" the responses that resolved your issue, and to click "Unmark as Answer" if not. This can be beneficial to other community members reading this thread. If you have any compliments or complaints to MSDN Support, feel free to contact MSDNFSF@microsoft.com.


    Tuesday, April 25, 2017 7:03 AM
    Moderator