none
Reading Data from excel RRS feed

  • Question

  • I have a WPF application, where I have a functionality to read data from excel.
    I was doing this using OLEDB and it was working great, until I found out there was a 255 limit for columns and the data would be truncated unless data > 255 characters is not present in the first 8 rows. Fix for this issue is to update the registry which would mean to update all users registry. So I don't want to go with that approach.

    OLEDB code:

    string strSQL = "SELECT * FROM [Sheet1$]";
     OleDbCommand cmd = new OleDbCommand(strSQL, conn);
     DataSet ds1 = new DataSet();
     OleDbDataAdapter da = new OleDbDataAdapter(cmd);
     da.Fill(ds1);

    As an alternative, I tried Interop.Excel . However, it seems to be slower that OLEDB. The excel sheets that was taking 2 secs to load take about 15 sec to load using Interop.Excel.

    System.Data.DataTable tempTable = new System.Data.DataTable();
    tempTable.TableName = "ResultData";
    Excel.Application app = new Excel.Application();
    Excel.Workbook book = null;
    Excel.Range range = null;
    try
    {
     app.Visible = false;
     app.ScreenUpdating = false;
     app.DisplayAlerts = false;

     book = app.Workbooks.Open(inputFilePath, Missing.Value, Missing.Value, Missing.Value
                                                  , Missing.Value, Missing.Value, Missing.Value, Missing.Value
                                                 , Missing.Value, Missing.Value, Missing.Value, Missing.Value
                                                , Missing.Value, Missing.Value, Missing.Value);
      foreach (Excel.Worksheet sheet in book.Worksheets)
      {
        Logger.LogException("Values for Sheet " + sheet.Index, System.Reflection.MethodBase.GetCurrentMethod().ToString());
        // get a range to work with
        range = sheet.get_Range("A1", Missing.Value);
        // get the end of values to the right (will stop at the first empty cell)
        range = range.get_End(Excel.XlDirection.xlToRight);
        // get the end of values toward the bottom, looking in the last column (will stop at first empty cell)
        range = range.get_End(Excel.XlDirection.xlDown);

         // get the address of the bottom, right cell
         string downAddress = range.get_Address(
         false, false, Excel.XlReferenceStyle.xlA1,
         Type.Missing, Type.Missing);

          // Get the range, then values from a1
          range = sheet.get_Range("A1", downAddress);
                        object[,] values = (object[,])range.Value2;

          //Get the Column Names 
          for (int k = 0; k < values.GetLength(1); )
          {
             tempTable.Columns.Add(Convert.ToString(values[1, ++k]).Trim());
          }

          for (int i = 2; i <= values.GetLength(0); i++)//first row contains the column names, so start from the next row.
          {
          System.Data.DataRow dr = tempTable.NewRow();
             for (int j = 1; j <= values.GetLength(1); j++)//columns
            {
               dr[j - 1] = values[i, j];
             }
                            tempTable.Rows.Add(dr);
                        }
                    }

    Are there other alternatives that I can use which is as fast as OLEDB? The columns and rows are not fixed in the excel sheet.

    Tuesday, September 22, 2015 12:42 AM

Answers

  • Hi,

    The another way we can improve the performance for reading data from Excel is using Open XML. Here is an helpful link for this topic:
    How to: Retrieve the values of cells in a spreadsheet document (Open XML SDK)

    And if the workbook is large, the Open XML also provide SAX to improve the performance. Here are some helpful links for your reference:
    How to: Parse and read a large spreadsheet document (Open XML SDK)

    Parsing and Reading Large Excel Files with the Open XML SDK

    Regards & Fei


    We are trying to better understand customer views on social support experience, so your participation in this interview project would be greatly appreciated if you have time. Thanks for helping make community forums a great place.
    Click HERE to participate the survey.

    Wednesday, September 23, 2015 6:05 AM
    Moderator
  • Thank you for your response.Here is the final code that I used:

     public static DataSet ExtractExcelSheetValuesToDataTable(string xlsxFilePath, string sheetName)
            {
                DataTable dt = new DataTable();
                DataSet ds = new DataSet();
                using (SpreadsheetDocument myWorkbook = SpreadsheetDocument.Open(xlsxFilePath, true))
                {
                    //Access the main Workbook part, which contains data
                    WorkbookPart workbookPart = myWorkbook.WorkbookPart;
                    WorksheetPart worksheetPart = null;
                    if (!string.IsNullOrEmpty(sheetName))
                    {
                        Sheet ss = workbookPart.Workbook.Descendants<Sheet>().Where(s => s.Name == sheetName).SingleOrDefault<Sheet>();
                        worksheetPart = (WorksheetPart)workbookPart.GetPartById(ss.Id);
                    }
                    else
                    {
                        worksheetPart = workbookPart.WorksheetParts.FirstOrDefault();
                    }
                    SharedStringTablePart stringTablePart = workbookPart.SharedStringTablePart;
                    if (worksheetPart != null)
                    {
                        Row lastRow = worksheetPart.Worksheet.Descendants<Row>().LastOrDefault();
                        Row firstRow = worksheetPart.Worksheet.Descendants<Row>().FirstOrDefault();
                        if (firstRow != null)
                        {
                            foreach (Cell c in firstRow.ChildElements)
                            {
                                string value = GetValue(c, stringTablePart);
                                dt.Columns.Add(value);
                            }
                        }
                        if (lastRow != null)
                        {
                            for (int i = 2; i <= lastRow.RowIndex; i++)
                            {
                                DataRow dr = dt.NewRow();
                                bool empty = true;
                                Row row = worksheetPart.Worksheet.Descendants<Row>().Where(r => i == r.RowIndex).FirstOrDefault();
                                int j = 0;
                                if (row != null)
                                {
                                    foreach (Cell c in row.Descendants<Cell>())
                                    {
                                        int? colIndex = GetColumnIndex(((DocumentFormat.OpenXml.Spreadsheet.CellType)(c)).CellReference);
                                        if (colIndex > j)
                                        {
                                            dr[j] = "";
                                            j++;
                                        }
                                        //Get cell value
                                        string value = GetValue(c, stringTablePart);
                                        //if (!string.IsNullOrEmpty(value))
                                        //    empty = false;
                                        dr[j] = value;
                                        j++;
                                        if (j == dt.Columns.Count)
                                            break;
                                    }
                                   
                                    //foreach (Cell c in row.ChildElements)
                                    //{
                                    //    //Get cell value
                                    //    string value = GetValue(c, stringTablePart);
                                    //    //if (!string.IsNullOrEmpty(value))
                                    //    //    empty = false;
                                    //    dr[j] = value;
                                    //    j++;
                                    //    if (j == dt.Columns.Count)
                                    //        break;
                                    //}
                                    //if (empty)
                                    //    break;
                                    dt.Rows.Add(dr);
                                }
                            }
                        }
                    }
                }
                ds.Tables.Add(dt);
                return ds;
            }
            public static string GetValue(Cell cell, SharedStringTablePart stringTablePart)
            {
                if (cell.ChildElements.Count == 0) return null;
                //get cell value
                string value = cell.ElementAt(0).InnerText;//CellValue.InnerText;
                //Look up real value from shared string table
                if ((cell.DataType != null) && (cell.DataType == CellValues.SharedString))
                    value = stringTablePart.SharedStringTable.ChildElements[Int32.Parse(value)].InnerText;
                return value;
            }

            private static int? GetColumnIndex(string cellReference)
            {
                if (string.IsNullOrEmpty(cellReference))
                {
                    return null;
                }

                //remove digits
                string columnReference = Regex.Replace(cellReference.ToUpper(), @"[\d]", string.Empty);

                int columnNumber = -1;
                int mulitplier = 1;

                //working from the end of the letters take the ASCII code less 64 (so A = 1, B =2...etc)
                //then multiply that number by our multiplier (which starts at 1)
                //multiply our multiplier by 26 as there are 26 letters
                foreach (char c in columnReference.ToCharArray().Reverse())
                {
                    columnNumber += mulitplier * ((int)c - 64);

                    mulitplier = mulitplier * 26;
                }

                //the result is zero based so return columnnumber + 1 for a 1 based answer
                //this will match Excel's COLUMN function
                return columnNumber;
            }

    http://www.prowareness.com/blog/reading-data-from-excel-document-using-openxml/

    Monday, September 28, 2015 12:57 AM

All replies

  • Hi,

    The another way we can improve the performance for reading data from Excel is using Open XML. Here is an helpful link for this topic:
    How to: Retrieve the values of cells in a spreadsheet document (Open XML SDK)

    And if the workbook is large, the Open XML also provide SAX to improve the performance. Here are some helpful links for your reference:
    How to: Parse and read a large spreadsheet document (Open XML SDK)

    Parsing and Reading Large Excel Files with the Open XML SDK

    Regards & Fei


    We are trying to better understand customer views on social support experience, so your participation in this interview project would be greatly appreciated if you have time. Thanks for helping make community forums a great place.
    Click HERE to participate the survey.

    Wednesday, September 23, 2015 6:05 AM
    Moderator
  • Thank you for your response.Here is the final code that I used:

     public static DataSet ExtractExcelSheetValuesToDataTable(string xlsxFilePath, string sheetName)
            {
                DataTable dt = new DataTable();
                DataSet ds = new DataSet();
                using (SpreadsheetDocument myWorkbook = SpreadsheetDocument.Open(xlsxFilePath, true))
                {
                    //Access the main Workbook part, which contains data
                    WorkbookPart workbookPart = myWorkbook.WorkbookPart;
                    WorksheetPart worksheetPart = null;
                    if (!string.IsNullOrEmpty(sheetName))
                    {
                        Sheet ss = workbookPart.Workbook.Descendants<Sheet>().Where(s => s.Name == sheetName).SingleOrDefault<Sheet>();
                        worksheetPart = (WorksheetPart)workbookPart.GetPartById(ss.Id);
                    }
                    else
                    {
                        worksheetPart = workbookPart.WorksheetParts.FirstOrDefault();
                    }
                    SharedStringTablePart stringTablePart = workbookPart.SharedStringTablePart;
                    if (worksheetPart != null)
                    {
                        Row lastRow = worksheetPart.Worksheet.Descendants<Row>().LastOrDefault();
                        Row firstRow = worksheetPart.Worksheet.Descendants<Row>().FirstOrDefault();
                        if (firstRow != null)
                        {
                            foreach (Cell c in firstRow.ChildElements)
                            {
                                string value = GetValue(c, stringTablePart);
                                dt.Columns.Add(value);
                            }
                        }
                        if (lastRow != null)
                        {
                            for (int i = 2; i <= lastRow.RowIndex; i++)
                            {
                                DataRow dr = dt.NewRow();
                                bool empty = true;
                                Row row = worksheetPart.Worksheet.Descendants<Row>().Where(r => i == r.RowIndex).FirstOrDefault();
                                int j = 0;
                                if (row != null)
                                {
                                    foreach (Cell c in row.Descendants<Cell>())
                                    {
                                        int? colIndex = GetColumnIndex(((DocumentFormat.OpenXml.Spreadsheet.CellType)(c)).CellReference);
                                        if (colIndex > j)
                                        {
                                            dr[j] = "";
                                            j++;
                                        }
                                        //Get cell value
                                        string value = GetValue(c, stringTablePart);
                                        //if (!string.IsNullOrEmpty(value))
                                        //    empty = false;
                                        dr[j] = value;
                                        j++;
                                        if (j == dt.Columns.Count)
                                            break;
                                    }
                                   
                                    //foreach (Cell c in row.ChildElements)
                                    //{
                                    //    //Get cell value
                                    //    string value = GetValue(c, stringTablePart);
                                    //    //if (!string.IsNullOrEmpty(value))
                                    //    //    empty = false;
                                    //    dr[j] = value;
                                    //    j++;
                                    //    if (j == dt.Columns.Count)
                                    //        break;
                                    //}
                                    //if (empty)
                                    //    break;
                                    dt.Rows.Add(dr);
                                }
                            }
                        }
                    }
                }
                ds.Tables.Add(dt);
                return ds;
            }
            public static string GetValue(Cell cell, SharedStringTablePart stringTablePart)
            {
                if (cell.ChildElements.Count == 0) return null;
                //get cell value
                string value = cell.ElementAt(0).InnerText;//CellValue.InnerText;
                //Look up real value from shared string table
                if ((cell.DataType != null) && (cell.DataType == CellValues.SharedString))
                    value = stringTablePart.SharedStringTable.ChildElements[Int32.Parse(value)].InnerText;
                return value;
            }

            private static int? GetColumnIndex(string cellReference)
            {
                if (string.IsNullOrEmpty(cellReference))
                {
                    return null;
                }

                //remove digits
                string columnReference = Regex.Replace(cellReference.ToUpper(), @"[\d]", string.Empty);

                int columnNumber = -1;
                int mulitplier = 1;

                //working from the end of the letters take the ASCII code less 64 (so A = 1, B =2...etc)
                //then multiply that number by our multiplier (which starts at 1)
                //multiply our multiplier by 26 as there are 26 letters
                foreach (char c in columnReference.ToCharArray().Reverse())
                {
                    columnNumber += mulitplier * ((int)c - 64);

                    mulitplier = mulitplier * 26;
                }

                //the result is zero based so return columnnumber + 1 for a 1 based answer
                //this will match Excel's COLUMN function
                return columnNumber;
            }

    http://www.prowareness.com/blog/reading-data-from-excel-document-using-openxml/

    Monday, September 28, 2015 12:57 AM