none
Reading multiple large files by multiple thread to speed up RRS feed

  • Question

  • here i am pasting my code where i read multiple files in loop. so many files i am reading and file size is huge which takes long time. so tell me best way to read multiple large files simultaneously and append their data in StringBuilder

    i can use parallel.foreach but scared that data may overlap and if i use Lock{} statement then multiple thread make no sense. so please see my code and give me best idea to speed up reading multiple large xml files.

    private string GetXMLFromBrokerBogey()
            {
    
                bool selectedBroker = false;
                string brokerBogeypath = "", strbrokercode = "";
                StringBuilder sbBrokerBogey = new StringBuilder();
                string strBrokerXML = "";
    
                var GridRows = (from row in dgvBrokers.ToDataTable("dtdgvBrokers").AsEnumerable()
                                orderby row["BrokerName"] ascending
                                select row);
    
                DataTable dtdgvBrokers = GridRows.AsDataView().ToTable();
    
                for (int i = 0; i <= dtdgvBrokers.Rows.Count - 1; i++)
                {
                    strbrokercode = dtdgvBrokers.Rows[i]["Brokers"].ToString();
                    brokerBogeypath = Path.Combine(ConfigurationManager.AppSettings["DBPath"].ToString(),
                    Ticker, "###~$$$~Bogey.xml".Replace("###", Ticker).Trim()
                    .Replace("$$$", strbrokercode).Trim());
    
                    selectedBroker = (dtdgvBrokers.AsEnumerable().FirstOrDefault(x => x.Field<string>("Brokers").Equals(strbrokercode))
                    .Field<string>("Select")).Equals("True") ? true : false;
    
                    if (selectedBroker)
                    {
                        if (File.Exists(brokerBogeypath))
                        {
                            DataSet ds = new DataSet();
                            ds.ReadXml(brokerBogeypath);
    
                            if (ds.Tables.Count > 1)
                            {
                                DataSet ds1 = new DataSet();
                                DataSet ds2 = new DataSet();
                                ds1.Tables.Add(ds.Tables[2].Copy());
                                //strBrokerXML = ds1.GetXml();
    
                                IEnumerable<DataRow> result = ds1.Tables[0]
                                .AsEnumerable()
                                .Where(myRow => !string.IsNullOrEmpty(myRow.Field<string>("StandardValue")));
                                ds2.Tables.Add(result.CopyToDataTable());
                                strBrokerXML = ds2.GetXml();
    
                                sbBrokerBogey.Append("<Broker Code=" + '"' + strbrokercode + '"' + ">" + strBrokerXML + "</Broker>");
    
                                ds1.Dispose();
                                ds1 = null;
    
                                ds2.Dispose();
                                ds2 = null;
                            }
    
                            ds.Dispose();
                            ds = null;
                        }
                    }
                }
                return sbBrokerBogey.ToString();
            }

    Wednesday, August 14, 2019 12:27 PM

Answers

  • Hi,

    If reading the data from disk is taking a long time, using multiple threads will not speedup your process. Using multiple threads will help you if your processor is the bottleneck, and not your disk. I'm not sure what is the bottleneck for your case.

    If you want to do some multithreading however, you can use System.Threading.Task to wrap your file operations.

    The following example uses a list of Task objects to keep all running operations. Next it will call Task.WhenAll to wait for al the operations to finish. Each Task will keep its result, so in the end we can combine all the results without using a lock.

    static async void Main(string[] args)
    {
        List<Task<string>> fileOperations = new List<Task<string>>();
        List<string> fileNames = new List<string> { "file1.xml", "file2.xml", "file3.xml" };
    
        foreach (string fileName in fileNames)
        {
            fileOperations.Add(ReadYourXmlFile(fileName));
        }
    
        await Task.WhenAll(fileOperations);
    
        StringBuilder result = new StringBuilder();
    
        foreach (Task<string> fileOperation in fileOperations)
        {
            result.Append(fileOperation.Result);
        }
    }
    
    static Task<string> ReadYourXmlFile(string fileName)
    {
        return Task.Run(() =>
        {
            //
            // Do something with your fileName.
            //
    
            return "your result string";
        });
    }

    Kind regards,

    Johnny Hooyberghs


    • Edited by Hooyberghs Johnny Wednesday, August 14, 2019 12:58 PM
    • Marked as answer by Sudip_inn Wednesday, August 14, 2019 2:13 PM
    Wednesday, August 14, 2019 12:56 PM

All replies

  • Hi,

    If reading the data from disk is taking a long time, using multiple threads will not speedup your process. Using multiple threads will help you if your processor is the bottleneck, and not your disk. I'm not sure what is the bottleneck for your case.

    If you want to do some multithreading however, you can use System.Threading.Task to wrap your file operations.

    The following example uses a list of Task objects to keep all running operations. Next it will call Task.WhenAll to wait for al the operations to finish. Each Task will keep its result, so in the end we can combine all the results without using a lock.

    static async void Main(string[] args)
    {
        List<Task<string>> fileOperations = new List<Task<string>>();
        List<string> fileNames = new List<string> { "file1.xml", "file2.xml", "file3.xml" };
    
        foreach (string fileName in fileNames)
        {
            fileOperations.Add(ReadYourXmlFile(fileName));
        }
    
        await Task.WhenAll(fileOperations);
    
        StringBuilder result = new StringBuilder();
    
        foreach (Task<string> fileOperation in fileOperations)
        {
            result.Append(fileOperation.Result);
        }
    }
    
    static Task<string> ReadYourXmlFile(string fileName)
    {
        return Task.Run(() =>
        {
            //
            // Do something with your fileName.
            //
    
            return "your result string";
        });
    }

    Kind regards,

    Johnny Hooyberghs


    • Edited by Hooyberghs Johnny Wednesday, August 14, 2019 12:58 PM
    • Marked as answer by Sudip_inn Wednesday, August 14, 2019 2:13 PM
    Wednesday, August 14, 2019 12:56 PM
  • basically i need to read multiple xml file parallel instead of one by one
    Wednesday, August 14, 2019 2:14 PM
  • i have done the job this way.....is it good approach or should i follow your approach? please guide me.

    here is my code

            private string GetXMLFromBrokerBogey()
            {
                List<BrokerBogeyInfo> lstFiles = new List<BrokerBogeyInfo>();
                object _lock = new object();
    
                bool selectedBroker = false;
                string brokerBogeypath = "", strbrokercode = "";
                StringBuilder sbBrokerBogey = new StringBuilder();
                string strBrokerXML = "";
    
                var GridRows = (from row in dgvBrokers.ToDataTable("dtdgvBrokers").AsEnumerable()
                                orderby row["BrokerName"] ascending
                                select row);
    
                DataTable dtdgvBrokers = GridRows.AsDataView().ToTable();
                //sbBrokerBogey.Append("<Root>");
    
    
                for (int i = 0; i <= dtdgvBrokers.Rows.Count - 1; i++)
                {
                    strbrokercode = dtdgvBrokers.Rows[i]["Brokers"].ToString();
                    brokerBogeypath = Path.Combine(ConfigurationManager.AppSettings["DBPath"].ToString(),
                    Ticker, "###~$$$~Bogey.xml".Replace("###", Ticker).Trim()
                    .Replace("$$$", strbrokercode).Trim());
    
                    selectedBroker = (dtdgvBrokers.AsEnumerable().FirstOrDefault(x => x.Field<string>("Brokers").Equals(strbrokercode)).Field<string>("Select")).Equals("True") ? true : false;
    
                    if (selectedBroker)
                    {
                        if (File.Exists(brokerBogeypath))
                        {
                            lstFiles.Add(new BrokerBogeyInfo { filepath = brokerBogeypath, brokercode = strbrokercode });
                        }
                    }
                }
    
                Parallel.ForEach(lstFiles, (file) =>
                {
                    lock (_lock)
                    {
                        if (File.Exists(file.filepath))
                        {
                            DataSet ds = new DataSet();
                            ds.ReadXml(file.filepath);
    
                            if (ds.Tables.Count > 1)
                            {
                                DataSet ds1 = new DataSet();
                                DataSet ds2 = new DataSet();
                                ds1.Tables.Add(ds.Tables[2].Copy());
    
                                IEnumerable<DataRow> result = ds1.Tables[0]
                                .AsEnumerable()
                                .Where(myRow => !string.IsNullOrEmpty(myRow.Field<string>("StandardValue")));
                                ds2.Tables.Add(result.CopyToDataTable());
                                strBrokerXML = ds2.GetXml();
                                sbBrokerBogey.Append("<Broker Code=" + '"' + file.brokercode + '"' + ">" + strBrokerXML + "</Broker>");
    
                                ds1.Dispose();
                                ds1 = null;
    
                                ds2.Dispose();
                                ds2 = null;
                            }
    
                            ds.Dispose();
                            ds = null;
                        }
                    }
                });
    
                lstFiles.Clear();
                lstFiles.TrimExcess();
                lstFiles = null;
    
                return sbBrokerBogey.ToString();
            }

    Friday, August 16, 2019 7:23 AM