none
While Exporting Excel data into c# datatable using DocumentFormat Assembly ignores blank cells RRS feed

  • Question

  • I need to export the excel data to database table for that I am using DocumentFormat.OpenXML assembly for that purpose. what I am doing, I collect the excel data and exported into c# datatable then through SqlBulkCopy datatable data copied into database table my question is that I want to have empty cells also in my datatable see e.g.

    CustomarName Code City

    xyz                  c001  

                           c002    Mumbai

    so i want three column and 2 rows with empty value also.

    but its not working with doumentFormat assembly, can anybody see into that.

    please guide me.


    Zishan Ansari .Net Developer

    Saturday, June 30, 2012 5:22 AM

Answers

  • Hi ZeeShan,

    Welcome to the MSDN forum!

    How is it going with the problem currently?

    Apart from Quist's suggestion, I built a solution as follow, and you may give a try:

            private void button1_Click(object sender, EventArgs e)
            {            
                string fileName = @"D:\00-Excel\TestBlankCell.xlsx";
                this.dataGridView1.DataSource=ReadExcelFileDOM(fileName);
            }
    
            DataTable ReadExcelFileDOM(string filename)
            {
                DataTable table;
               
                using (SpreadsheetDocument myDoc = SpreadsheetDocument.Open(filename, true))
                {
                    WorkbookPart workbookPart = myDoc.WorkbookPart;
                    Sheet worksheet = workbookPart.Workbook.Descendants<Sheet>().First();
                    WorksheetPart worksheetPart =
                     (WorksheetPart)(workbookPart.GetPartById(worksheet.Id));
                    SheetData sheetData =
                        worksheetPart.Worksheet.Elements<SheetData>().First();
                    List<List<string>> totalRows = new List<List<string>>();
                    int maxCol = 0;
    
                    foreach (Row r in sheetData.Elements<Row>())
                    {
                        // Add the empty row.
                        string value = null;
                        while (totalRows.Count < r.RowIndex - 1)
                        {
                            List<string> emptyRowValues = new List<string>();
                            for (int i = 0; i < maxCol; i++)
                            {
                                emptyRowValues.Add("");
                            }
                            totalRows.Add(emptyRowValues);
                        }
    
    
                        List<string> tempRowValues = new List<string>();
                        foreach (Cell c in r.Elements<Cell>())
                        {
                            #region get the cell value of c.
                            if (c != null)
                            {
                                value = c.InnerText;
    
                                // If the cell represents a numeric value, you are done. 
                                // For dates, this code returns the serialized value that 
                                // represents the date. The code handles strings and Booleans
                                // individually. For shared strings, the code looks up the 
                                // corresponding value in the shared string table. For Booleans, 
                                // the code converts the value into the words TRUE or FALSE.
                                if (c.DataType != null)
                                {
                                    switch (c.DataType.Value)
                                    {
                                        case CellValues.SharedString:
                                            // For shared strings, look up the value in the shared 
                                            // strings table.
                                            var stringTable = workbookPart.
                                                GetPartsOfType<SharedStringTablePart>().FirstOrDefault();
    
                                            // If the shared string table is missing, something is 
                                            // wrong. Return the index that you found in the cell.
                                            // Otherwise, look up the correct text in the table.
                                            if (stringTable != null)
                                            {
                                                value = stringTable.SharedStringTable.
                                                    ElementAt(int.Parse(value)).InnerText;
                                            }
                                            break;
    
                                        case CellValues.Boolean:
                                            switch (value)
                                            {
                                                case "0":
                                                    value = "FALSE";
                                                    break;
                                                default:
                                                    value = "TRUE";
                                                    break;
                                            }
                                            break;
                                    }
                                }
    
                                Console.Write(value + "  ");
                            }
                            #endregion
    
                            // Add the cell to the row list.
                            int i = Convert.ToInt32(c.CellReference.ToString().ToCharArray().First() - 'A');
    
                            // Add the blank cell in the row.
                            while (tempRowValues.Count < i)
                            {
                                tempRowValues.Add("");
                            }
                            tempRowValues.Add(value);
                        }
    
                        // add the row to the totalRows.
                        maxCol = processList(tempRowValues, totalRows, maxCol);
    
                        Console.WriteLine();
                    }
    
                    table = ConvertListListStringToDataTable(totalRows, maxCol);
                }
                return table;
            }
    
            /// <summary>
            /// Add each row to the totalRows.
            /// </summary>
            /// <param name="tempRows"></param>
            /// <param name="totalRows"></param>
            /// <param name="MaxCol">the max column number in rows of the totalRows</param>
            /// <returns></returns>
            int processList(List<string> tempRows, List<List<string>> totalRows, int MaxCol)
            {
                if (tempRows.Count > MaxCol)
                {
                    MaxCol = tempRows.Count;
                }
    
                totalRows.Add(tempRows);
                return MaxCol;
            }
    
            DataTable ConvertListListStringToDataTable(List<List<string>> totalRows, int maxCol)
            {
                DataTable table = new DataTable();
                for (int i = 0; i < maxCol; i++)
                {
                    table.Columns.Add();
                }
                foreach (List<string> row in totalRows)
                {
                    while (row.Count < maxCol)
                    {
                        row.Add("");
                    }
                    table.Rows.Add(row.ToArray());
                }
                return table;
            }     

    NOTE: Since a blank cell is not recorded in the worksheet.xml file, I add some logic to judge if there are empty cells in each row and if there are empty row for the whole "table".

    Reference from:

    Retrieving the Values of Cells in Excel 2010 Workbooks by Using the Open XML SDK 2.0
    http://msdn.microsoft.com/en-us/library/ff921204.aspx

    Thanks.


    Yoyo Jiang[MSFT]
    MSDN Community Support | Feedback to us

    Wednesday, July 4, 2012 9:10 AM
    Moderator

All replies

  • Hi Zeeshan,

    Thank you for posting in the MSDN Forum.

    If you open your file using the Open XML SDK 2.0 Productivity Tool and traverse the XML down to the cell level you will see that only the cells that have data are going to be in that file.

    Your options are to insert blank data in the range of cells you are going to traverse or programmatically figure out a cell was skipped and adjust your index appropriately.

    Please see this web page for help.
    http://stackoverflow.com/questions/3837981/reading-excel-open-xml-is-ignoring-blank-cells  

    Hope it helps.

    Best regards,
    Quist

    Quist Zhang [MSFT]
    MSDN Community Support | Feedback to us

    Tuesday, July 3, 2012 9:50 AM
    Moderator
  • Hi ZeeShan,

    Welcome to the MSDN forum!

    How is it going with the problem currently?

    Apart from Quist's suggestion, I built a solution as follow, and you may give a try:

            private void button1_Click(object sender, EventArgs e)
            {            
                string fileName = @"D:\00-Excel\TestBlankCell.xlsx";
                this.dataGridView1.DataSource=ReadExcelFileDOM(fileName);
            }
    
            DataTable ReadExcelFileDOM(string filename)
            {
                DataTable table;
               
                using (SpreadsheetDocument myDoc = SpreadsheetDocument.Open(filename, true))
                {
                    WorkbookPart workbookPart = myDoc.WorkbookPart;
                    Sheet worksheet = workbookPart.Workbook.Descendants<Sheet>().First();
                    WorksheetPart worksheetPart =
                     (WorksheetPart)(workbookPart.GetPartById(worksheet.Id));
                    SheetData sheetData =
                        worksheetPart.Worksheet.Elements<SheetData>().First();
                    List<List<string>> totalRows = new List<List<string>>();
                    int maxCol = 0;
    
                    foreach (Row r in sheetData.Elements<Row>())
                    {
                        // Add the empty row.
                        string value = null;
                        while (totalRows.Count < r.RowIndex - 1)
                        {
                            List<string> emptyRowValues = new List<string>();
                            for (int i = 0; i < maxCol; i++)
                            {
                                emptyRowValues.Add("");
                            }
                            totalRows.Add(emptyRowValues);
                        }
    
    
                        List<string> tempRowValues = new List<string>();
                        foreach (Cell c in r.Elements<Cell>())
                        {
                            #region get the cell value of c.
                            if (c != null)
                            {
                                value = c.InnerText;
    
                                // If the cell represents a numeric value, you are done. 
                                // For dates, this code returns the serialized value that 
                                // represents the date. The code handles strings and Booleans
                                // individually. For shared strings, the code looks up the 
                                // corresponding value in the shared string table. For Booleans, 
                                // the code converts the value into the words TRUE or FALSE.
                                if (c.DataType != null)
                                {
                                    switch (c.DataType.Value)
                                    {
                                        case CellValues.SharedString:
                                            // For shared strings, look up the value in the shared 
                                            // strings table.
                                            var stringTable = workbookPart.
                                                GetPartsOfType<SharedStringTablePart>().FirstOrDefault();
    
                                            // If the shared string table is missing, something is 
                                            // wrong. Return the index that you found in the cell.
                                            // Otherwise, look up the correct text in the table.
                                            if (stringTable != null)
                                            {
                                                value = stringTable.SharedStringTable.
                                                    ElementAt(int.Parse(value)).InnerText;
                                            }
                                            break;
    
                                        case CellValues.Boolean:
                                            switch (value)
                                            {
                                                case "0":
                                                    value = "FALSE";
                                                    break;
                                                default:
                                                    value = "TRUE";
                                                    break;
                                            }
                                            break;
                                    }
                                }
    
                                Console.Write(value + "  ");
                            }
                            #endregion
    
                            // Add the cell to the row list.
                            int i = Convert.ToInt32(c.CellReference.ToString().ToCharArray().First() - 'A');
    
                            // Add the blank cell in the row.
                            while (tempRowValues.Count < i)
                            {
                                tempRowValues.Add("");
                            }
                            tempRowValues.Add(value);
                        }
    
                        // add the row to the totalRows.
                        maxCol = processList(tempRowValues, totalRows, maxCol);
    
                        Console.WriteLine();
                    }
    
                    table = ConvertListListStringToDataTable(totalRows, maxCol);
                }
                return table;
            }
    
            /// <summary>
            /// Add each row to the totalRows.
            /// </summary>
            /// <param name="tempRows"></param>
            /// <param name="totalRows"></param>
            /// <param name="MaxCol">the max column number in rows of the totalRows</param>
            /// <returns></returns>
            int processList(List<string> tempRows, List<List<string>> totalRows, int MaxCol)
            {
                if (tempRows.Count > MaxCol)
                {
                    MaxCol = tempRows.Count;
                }
    
                totalRows.Add(tempRows);
                return MaxCol;
            }
    
            DataTable ConvertListListStringToDataTable(List<List<string>> totalRows, int maxCol)
            {
                DataTable table = new DataTable();
                for (int i = 0; i < maxCol; i++)
                {
                    table.Columns.Add();
                }
                foreach (List<string> row in totalRows)
                {
                    while (row.Count < maxCol)
                    {
                        row.Add("");
                    }
                    table.Rows.Add(row.ToArray());
                }
                return table;
            }     

    NOTE: Since a blank cell is not recorded in the worksheet.xml file, I add some logic to judge if there are empty cells in each row and if there are empty row for the whole "table".

    Reference from:

    Retrieving the Values of Cells in Excel 2010 Workbooks by Using the Open XML SDK 2.0
    http://msdn.microsoft.com/en-us/library/ff921204.aspx

    Thanks.


    Yoyo Jiang[MSFT]
    MSDN Community Support | Feedback to us

    Wednesday, July 4, 2012 9:10 AM
    Moderator