locked
Split datatable based on row count and create new sheet and export to Excel RRS feed

  • Question

  • User-73514677 posted

    Hi.

    I am trying to export data from dataset to Excel 2003 version ( C#, VS 2010, ASP.NET Web Application ) . By default, excel can take 63000 rows in a single sheet. Post that, there will be out of Memory exception thrown.

    In the dataset, I have 4 data tables. I am currently using the below code to transfer data, which works if the row count is less.

    I am trying to achieve -  If the data row is more than 60000 , then move the next set of records to another data table and then pass the data table one by one to the function to export it.

    My below code, takes the whole of dataset and tries to create the excel. The below code is a reference from code project

    Below is the Excel Helper class 

     const int rowLimit = 63000;
            
            private static string getWorkbookTemplate()
            {
                var sb = new StringBuilder();
                sb.Append("<xml version>\r\n<Workbook xmlns=\"urn:schemas-microsoft-com:office:spreadsheet\"\r\n");
                sb.Append(" xmlns:o=\"urn:schemas-microsoft-com:office:office\"\r\n xmlns:x=\"urn:schemas- microsoft-com:office:excel\"\r\n xmlns:ss=\"urn:schemas-microsoft-com:office:spreadsheet\">\r\n");
                sb.Append(" <Styles>\r\n <Style ss:ID=\"Default\" ss:Name=\"Normal\">\r\n <Alignment ss:Vertical=\"Bottom\"/>\r\n <Borders/>");
                sb.Append("\r\n <Font/>\r\n <Interior/>\r\n <NumberFormat/>\r\n <Protection/>\r\n </Style>\r\n <Style ss:ID=\"BoldColumn\">\r\n <Font ");
                sb.Append("x:Family=\"Swiss\" ss:Bold=\"1\"/>\r\n </Style>\r\n <Style ss:ID=\"s62\">\r\n <NumberFormat");
                sb.Append(" ss:Format=\"@\"/>\r\n </Style>\r\n <Style ss:ID=\"Decimal\">\r\n <NumberFormat ss:Format=\"0.0000\"/>\r\n </Style>\r\n ");
                sb.Append("<Style ss:ID=\"Integer\">\r\n <NumberFormat ss:Format=\"0\"/>\r\n </Style>\r\n <Style ss:ID=\"DateLiteral\">\r\n <NumberFormat ");
                sb.Append("ss:Format=\"mm/dd/yyyy;@\"/>\r\n </Style>\r\n <Style ss:ID=\"s28\">\r\n");
                sb.Append("<Alignment ss:Horizontal=\"Left\" ss:Vertical=\"Top\" ss:ReadingOrder=\"LeftToRight\" ss:WrapText=\"1\"/>\r\n");
                sb.Append("<Font x:CharSet=\"1\" ss:Size=\"9\" ss:Color=\"#808080\" ss:Underline=\"Single\"/>\r\n");
                sb.Append("<Interior ss:Color=\"#FFFFFF\" ss:Pattern=\"Solid\"/> </Style>\r\n</Styles>\r\n {0}</Workbook>");
             
                return sb.ToString();
            }
    
            private static string replaceXmlChar(string input)
            {
                input = input.Replace("&", "&");
                input = input.Replace("<", "<");
                input = input.Replace(">", ">");
                input = input.Replace("\"", "&quot;");
                input = input.Replace("'", "&apos;");
                return input;
            }
    
            private static string getWorksheets(DataSet source)
            {
                var sw = new StringWriter();
                if (source == null || source.Tables.Count == 0)
                {
                    sw.Write("<Worksheet ss:Name=\"Sheet1\"><Table><Row><Cell  ss:StyleID=\"s62\"><Data ss:Type=\"String\"></Data></Cell></Row></Table></Worksheet>");
                    return sw.ToString();
                }
                foreach (DataTable dt in source.Tables)
                {
                    if (dt.Rows.Count == 0)
                        sw.Write("<Worksheet ss:Name=\"" + replaceXmlChar(dt.TableName) + "\"><Table><Row><Cell  ss:StyleID=\"s62\"><Data ss:Type=\"String\"></Data></Cell></Row></Table></Worksheet>");
                    else
                    {
                        //write each row data
                        var sheetCount = 0;
                        for (int i = 0; i < dt.Rows.Count; i++)
                        {
                            if ((i % rowLimit) == 0)
                            {
                                //add close tags for previous sheet of the same data table
                                if ((i / rowLimit) > sheetCount)
                                {
                                    sw.Write("</Table></Worksheet>");
                                    sheetCount = (i / rowLimit);
                                }
                                sw.Write("<Worksheet ss:Name=\"" +
    				replaceXmlChar(dt.TableName) +
                                         (((i / rowLimit) == 0) ? "" :
    				Convert.ToString(i / rowLimit)) + "\"><Table>");
                                //write column name row
                                sw.Write("<Row>");
                                foreach (DataColumn dc in dt.Columns)
                                    sw.Write(
                                        string.Format("<Cell ss:StyleID=\"BoldColumn\"><Data ss:Type=\"String\">{0}</Data></Cell>",
                                            replaceXmlChar(dc.ColumnName)));
                                sw.Write("</Row>\r\n");
                            }
                            sw.Write("<Row>\r\n");
                            foreach (DataColumn dc in dt.Columns)
                                sw.Write(
                                    string.Format(
                                        "<Cell ss:StyleID=\"s62\"><Data ss:Type=\"String\">{0}</Data></Cell>",
                                        		replaceXmlChar
    					(dt.Rows[i][dc.ColumnName].ToString())));
                            sw.Write("</Row>\r\n");
                        }
                        sw.Write("</Table></Worksheet>");
                    }
                }
    
                return sw.ToString();
            }
            public static string GetExcelXml(DataTable dtInput, string filename)
            {
                var excelTemplate = getWorkbookTemplate();
                var ds = new DataSet();
                ds.Tables.Add(dtInput.Copy());
                var worksheets = getWorksheets(ds);
                var excelXml = string.Format(excelTemplate, worksheets);
                return excelXml;
            }
    
            public static string GetExcelXml(DataSet dsInput, string filename)
            {
                var excelTemplate = getWorkbookTemplate();
                var worksheets = getWorksheets(dsInput);
                var excelXml = string.Format(excelTemplate, worksheets);
                return excelXml;
            }
    
            public static void ToExcel
    		(DataSet dsInput, string filename, HttpResponse response)
            {
                var excelXml = GetExcelXml(dsInput, filename);
                response.Clear();
                response.AppendHeader("Content-Type", "application/vnd.ms-excel");
                response.AppendHeader
    		("Content-disposition", "attachment; filename=" + filename);
                response.Write(excelXml);
                response.Flush();
                response.End();
            }
    
            public static void ToExcel
    		(DataTable dtInput, string filename, HttpResponse response)
            {
                var ds = new DataSet();
                ds.Tables.Add(dtInput.Copy());
                ToExcel(ds, filename, response);
            }

    The helper class is called as below:

        public DataSet getDataSetExportToExcel(OracleConnection con)
            {
                DataSet ds = new DataSet();
    
                ds.Tables.Add(getInfo("select * from table1", con));
                ds.Tables[0].TableName = "Table1";
    
                ds.Tables.Add(getInfo("select * from table2", con));
                ds.Tables[1].TableName = "Table2";
    
                ds.Tables.Add(getInfo("select * from table3", con));
                ds.Tables[2].TableName = "Table3";
    
                ds.Tables.Add(getInfo("select * from table4", con));
                ds.Tables[3].TableName = "Table4";
    
                return ds;
            }
    
      private System.Data.DataTable getInfo(string strQuery, OracleConnection con)
            {
                System.Data.DataTable dt = new System.Data.DataTable();
    
                using (OracleCommand cmd = new OracleCommand(strQuery))
                {
                    using (OracleDataAdapter da = new OracleDataAdapter())
                    {
                        cmd.CommandType = CommandType.Text;
                        cmd.Connection = con;
                        da.SelectCommand = cmd;
                        da.Fill(dt);
    
                    }
                }
                return dt;
            }
    
     DataSet ds = getDataSetExportToExcel(con);
    
     ExcelHelper.ToExcel(ds, filename1, Page.Response);

    How to split the data and check row count and create new sheets within the same excel ?

    Thanks
     

    Thursday, June 20, 2019 5:22 AM

All replies

  • User-943250815 posted

    Bet would be limit number of records right on SQL Query, but you can use Linq on each Datatable to return a limited number of records like

    https://stackoverflow.com/questions/20718484/take-top-10-or-20-rows-from-dynamic-datatable

    newDT = dt.AsEnumerable.Take(60000)

    Just in case, did you considered use ClosedXML or EPPlus to make Excel files, they are available as Nuget packages

    Thursday, June 20, 2019 1:19 PM
  • User-73514677 posted

    Hi.

    I tried ClosedXml, but was hitting the problem of Out of Memory Exception.

    Below is the code tried in ClosedXML

    public DataSet getDataSetExportToExcel(OracleConnection con)
            {
                DataSet ds = new DataSet();
    
                ds.Tables.Add(getInfo("select * from table1", con));
                ds.Tables[0].TableName = "Table1";
    
                ds.Tables.Add(getInfo("select * from table2", con));
                ds.Tables[1].TableName = "Table2";
    
                ds.Tables.Add(getInfo("select * from table3", con));
                ds.Tables[2].TableName = "Table3";
    
                ds.Tables.Add(getInfo("select * from table4", con));
                ds.Tables[3].TableName = "Table4";
    
                return ds;
            }
    
      private System.Data.DataTable getInfo(string strQuery, OracleConnection con)
            {
                System.Data.DataTable dt = new System.Data.DataTable();
    
                using (OracleCommand cmd = new OracleCommand(strQuery))
                {
                    using (OracleDataAdapter da = new OracleDataAdapter())
                    {
                        cmd.CommandType = CommandType.Text;
                        cmd.Connection = con;
                        da.SelectCommand = cmd;
                        da.Fill(dt);
    
                    }
                }
                return dt;
            }
           DataSet ds = getDataSetExportToExcel(con);
      using (XLWorkbook wb = new XLWorkbook())
              {
                  wb.Worksheets.Add(ds);
    
                  string myName = Server.UrlEncode("Test" + "_" +
                  DateTime.Now.ToShortDateString() + ".xlsx");
                  MemoryStream stream = GetStream(wb);
                  Response.Clear();
                  Response.Buffer = true;
                  Response.AddHeader("content-disposition",
                  "attachment; filename=" + myName);
                  Response.ContentType = "application/vnd.ms-excel";
                  Response.BinaryWrite(stream.ToArray());
                  Response.End();
              }
    
    public MemoryStream GetStream(XLWorkbook excelWorkbook)
            {
                MemoryStream fs = new MemoryStream();
                excelWorkbook.SaveAs(fs);
                fs.Position = 0;
                return fs;
            }

    My code is erroring out at the below line:

     wb.Worksheets.Add(ds);
    

    I am getting out of memory exception, as the dataset is too huge. Any suggestions ?

    Thanks

    Friday, June 21, 2019 8:37 AM
  • User-943250815 posted

    I´m assuming Linq expression "dt.AsEnumerable.Take(60000)" answer you main question.
    Just in case Excel limitations are

    Excel Rows Columns
    2003 65.536 256
    >= 2007 1.048.576 16.384


    Back to ClosedXML or EppPlus, (after some search), yes there are issues on memory use.
    IMHO there is no easy way other than try different approaches and test, take a look here https://stackoverflow.com/questions/32690851/export-big-amount-of-data-from-xlsx-outofmemoryexception
    And since you already making Excel file by yourself perhaps you can mix your code with some ideas on thread.

    Friday, June 21, 2019 1:29 PM
  • User-73514677 posted

    Hi.

    Thanks for the reply.

    Is this correct ?

        private System.Data.DataTable getInfo(string strQuery, OracleConnection con, string tblName)
            {
                System.Data.DataTable dt = new System.Data.DataTable();
    
                using (OracleCommand cmd = new OracleCommand(strQuery))
                {
                    using (OracleDataAdapter da = new OracleDataAdapter())
                    {
                        cmd.CommandType = CommandType.Text;
                        cmd.Connection = con;
                        da.SelectCommand = cmd;
                        da.Fill(dt);
    
                    }
                }
                dt.TableName = tblName;
    
                if (dt.Rows.Count > 10000)
                {
                    SplitsData(dt, tblName);
                }
    
                return dt;
            } 
    
     private DataSet SplitsData(DataTable dt, string StrTableReference)
            {
    
                int tableSize = 10000;               // for example
                DataSet allTables = new DataSet();
    
                for (int i = 0; i < dt.Rows.Count; i += tableSize)
                {
                    DataTable tbl = dt.Clone(); // same columns, empty
                    for (int ii = 0; ii < tableSize; ii++)
                    {
                        if (i + ii == dt.Rows.Count) break;
                        tbl.ImportRow(dt.Rows[i + ii]);
                    }
    
                    allTables.Tables.Add(tbl);
                }
                return allTables;
            }
    
        

    I am trying to achieve is - If the datatable has more than 10000 rows, then move the next set of 10000 rows to another datatable and so forth and keep the first 10000 rows in the datatable.

    Example: Table1 has tableName  "Tbl" and contains 45000 rows, then the split should be:

    Tbl - 1 to 10000 rows  ( If tbl has 45000 rows, after split keep only the first 10000 in this and delete other rows)

    Tbl1 - 10001 - 20001 ( move rows and column name. datatable name should be Tbl1 ) 

    Tbl2 - … so forth

    Thanks

    Friday, June 21, 2019 1:58 PM
  • User-943250815 posted

    Instead of checking your code, I propose another short way to achieve what you looking for, you have to import (using in C#) System.Data and System.Data.SqlClient.
    At the end you shoud have a DataSet with a set of Datatables with no more than 10000 recs each.
    For more about CopyToDataTable https://docs.microsoft.com/en-us/dotnet/api/system.data.datatableextensions.copytodatatable?view=netframework-4.8

    Dim zDS As New DataSet
    Using zDT As New System.Data.DataTable
    Dim zConnStr As String = <your connection string> Dim zSqlStr As String = <your query string> Using zSqlDA As New SqlDataAdapter(zSqlStr, zConnStr) zSqlDA.Fill(zDT) ' Fill datatable End Using Dim n As Integer = 0 Dim zDS As New DataSet While zDT.AsEnumerable.Skip(n).Take(10000).Count > 0 Dim zIE As IEnumerable(Of DataRow) = zDT.AsEnumerable.Skip(n).Take(10000).AsEnumerable ' Collect 10000 recs Dim zDTtmp As DataTable = zIE.CopyToDataTable() ' Place all in Datatable zDS.Tables.Add(zDTtmp) ' Add to Datasource n = n + 10000 End While
    End Using

    Friday, June 21, 2019 5:52 PM
  • User-73514677 posted

    Hi.

    I am using VS 2010 and C#, Is this option available in C#  for 2010 version?

    Sunday, June 23, 2019 7:24 AM
  • User-943250815 posted

    Since you targeting .Net Framework 3.5 or above, yes it should be available

    Sunday, June 23, 2019 11:43 AM
  • User-73514677 posted

    Thanks.

    I have tried the below code:

     private static List<DataTable> SplitTable(DataTable originalTable, int batchSize, string tblName)
            { 
    List<DataTable> tables = new List<DataTable>();
                DataTable new_table = new DataTable();
                new_table = originalTable.Clone();
                int j = 0;
                int k = 1;
                if (originalTable.Rows.Count < batchSize)
                {
                    new_table.TableName = tblName + "_" + j;
                    new_table = originalTable.Copy();
                    tables.Add(new_table.Copy());
                }
                else
                {
                    for (int i = 0; i < originalTable.Rows.Count; i++)
                    {
                        new_table.NewRow();
                        new_table.ImportRow(originalTable.Rows[i]);
                        if ((i + 1) == originalTable.Rows.Count)
                        {
                            new_table.TableName = tblName + "_" + j;
                            tables.Add(new_table.Copy());
                            new_table.Rows.Clear(); k++;
                        }
                        else if (++j == batchSize)
                        {
                            new_table.TableName = tblName + "_" + j;
                            tables.Add(new_table.Copy());
                            new_table.Rows.Clear();
                            k++; 
                            j = 0;
                        }
                    }
                }
                return tables;
            }

    Few issues here:

    1) If the datatable contains 53140 rows, after execution of this code I have datatables 0 to 6 with each datatable containing 10000 rows and last one containing 3139. There is one row missing. Not sure, where is the issue.

    2) Each datatable is having tablename as Table_10000 and the last one as Table_3139. I want the TableName as Table_1 , Table_2.. like this

    Thanks

    Monday, June 24, 2019 8:19 AM
  • User-943250815 posted

    Tested converted code (proposed), instead of "AsEnumerable" use "AsEnumerable()" and "Count" to "Count()" it will work, tables will be named Table1, Table2, Table3.....

    private DataSet SplitsData1(DataTable zDT)
    {
    int n = 0;
    DataSet zDS = new DataSet();
    while ((zDT.AsEnumerable().Skip(n).Take(10000).Count() > 0))
    {
    IEnumerable<DataRow> zIE = zDT.AsEnumerable().Skip(n).Take(10000).AsEnumerable();
    // Collect 10000 recs
    DataTable zDTtmp = zIE.CopyToDataTable();
    // Place all in Datatable
    zDS.Tables.Add(zDTtmp);
    // Add to Datasource
    n = (n + 10000);
    }
    return zDS;
    }

    About your last code, to looks somthing is missing, at end all tables have same name....
    Try again code above, it should work (tested here)

    Monday, June 24, 2019 1:03 PM