none
Problem reading a text file from the end to the beginning C#

    Question

  •  
    I have written a Windows App in C# that needs to read a text file over the network, starting from the end of the file and reading backwards toward the beginning (looking for the last occurrence of a couple of strings in one line of text). I do not want to read the entire file, as it is very large, on a highly tasked server, and is updated with hundreds of lines of text every second. So since I am reading backwards, I do a seek, then read, then encode the byte array as a string, split the string into an array of lines, then iterate and search through the string array. My problem is that since the first or 0 element of my string array will almost always be an incomplete line, so to ensure 100% reliability I need to adjust the next seek so that I read the entire line that was incomplete/truncated on the last read (it would ideally be the final element of the next string array after the read,encode,split). So everything works great except that I can't seem to adjust the Seek to always read the whole truncated line. Most of the time it works, but not 100%, and therefore is useless :). I must be doing something wrong with the counting of the bytes or "\r\n"s. Any help would be appreciated (to make this more performant or reliable). Code is below...
     
    1. using (FileStream fs = new FileStream(sFullPathAndFileName, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))
    2. {
    3.    fs.Seek(iBBFileSize+iBytesPerRead, SeekOrigin.Begin);
    4.    byte[] BBLog = new byte[posBytesPerRead];
    5.    System.Text.ASCIIEncoding enc = new System.Text.ASCIIEncoding();
    6.    //lTotalBytes is a negative number used to offset to the position to start read
    7.    //iBytesPerRead is a negative number
    8.    //posBytesPerRead is the positive version of iBytesPerRead
    9.    //iBR represents bytes read
    10.    while (found == 0 && fs.Position>posBytesPerRead)
    11.    {
    12.      iBR = fs.Read(BBLog, 0, posBytesPerRead);
    13.      lTotalBytes -= iBR;
    14.      BBString = enc.GetString(BBLog);
    15.      BBLines = BBString.Split(new string[] { "\r\n" }, StringSplitOptions.None);
    16.      iBBLineCount = BBLines.Length;
    17.      //Removed string array iteration/parsing
    18.      iFirstlinelength = BBLines[0].Length;
    19.      lTotalBytes += iFirstlinelength;
    20.      fs.Seek(iBBFileSize + lTotalBytes + (2*iBBLineCount), SeekOrigin.Begin);
    21.    }
    Thursday, June 05, 2008 11:38 AM

Answers

  • Hi

    I tried to write a backward searching text file application, in general testing, it works, please check the code snippet below.

    private void button1_Click(object sender, EventArgs e)

            {

                find_text_file_backward("F:\\test1.txt");

            }

     

       void find_text_file_backward(string filepath)

            {

                if (!File.Exists(filepath))

                    return;

                FileInfo fi = new FileInfo(filepath);

     

                long fileSize = fi.Length;

                int BufSize = 1024;         //assume there is no line in the file is longer than 1024 bytes

                byte [] buffer = null;         

     

                string toFind = "bug"; //the string want to find in the text

                int hasReaded = 0; //how many bytes has be readed

     

                using (FileStream fs = new FileStream(filepath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))

                {

                    while (hasReaded < fileSize)

                    {

                        string text = "";

                        long offset = 0;

     

                        if ((fileSize - hasReaded) < BufSize)

                            BufSize = System.Convert.ToInt32(fileSize - hasReaded);

     

                        buffer = new byte[BufSize];

                       

                   

                            if ((BufSize + hasReaded) < fileSize)

                                offset = (-1) * BufSize - hasReaded;

                            else

                                offset = (-1) * fileSize;

     

                            fs.Seek(offset, SeekOrigin.End);

                            int reads = fs.Read(buffer, 0, BufSize);                      

     

                            string asciiText = System.Text.ASCIIEncoding.ASCII.GetString(buffer);

     

                            if (!asciiText.Contains("\r\n"))                       

                                hasReaded = (int)fileSize;                       

                            text = asciiText;

                                        

     

                        int idx = -1;

                        int tmp = 0;

                        while ((tmp = text.IndexOf("\r\n", tmp)) >= 0)

                        {

                            idx = tmp;

                            tmp++;

                        }

                        if (idx > -1)

                            text = text.Substring(idx);

                        else

                            text = "";

     

                        hasReaded += text.Length;

     

                        if (text.Contains(toFind))

                        {

                            long position = fileSize - hasReaded + text.IndexOf(toFind);

                            MessageBox.Show("find in the last occurence: " + position.ToString());

                            return;

                        }

                    }

     

                }

                MessageBox.Show("find failed");

        }

    Regards,

    Xun



    Please remember to mark the replies as answers if they help and unmark them if they provide no help.
    • Edited by jack 321 Tuesday, June 10, 2008 3:04 AM edit a bug
    • Marked as answer by jack 321 Tuesday, June 10, 2008 10:28 AM
    Monday, June 09, 2008 11:20 AM

All replies

  •  I originally coded my final seek without the (2*iBBLineCount) part, but added it to account for cr/lf characters.  After I realized that the cr/lf characters were included in the length of my strings, I reverted back to the original.  It still doesn't work properly, although it does seem to allow me to start reading where I stopped previously (if I read 1,000,000 or 10,000 bytes).  When I set the bytes to read at 1000, I get some strange results.  Since I am reading a number of bytes, I almost always have an incomplete line as the first item in my BBLines array.  It seems logical that I would be able to subtract the length from the amount of bytes I have read thus far and have it re-read the final x number of bytes, giving me a complete line of text.  Strange.
    Monday, June 09, 2008 8:05 AM
  • Hi stoogots2,

    Sorry, would you please clarify one thing, since the text file is dynamically appended with hundreds of lines in every second, how you can check the last occurrence of specified strings?

    Have you locked the file to read-only before you read it?

    Regards,

    Xun


    Please remember to mark the replies as answers if they help and unmark them if they provide no help.
    Monday, June 09, 2008 8:38 AM
  • Hi

    I tried to write a backward searching text file application, in general testing, it works, please check the code snippet below.

    private void button1_Click(object sender, EventArgs e)

            {

                find_text_file_backward("F:\\test1.txt");

            }

     

       void find_text_file_backward(string filepath)

            {

                if (!File.Exists(filepath))

                    return;

                FileInfo fi = new FileInfo(filepath);

     

                long fileSize = fi.Length;

                int BufSize = 1024;         //assume there is no line in the file is longer than 1024 bytes

                byte [] buffer = null;         

     

                string toFind = "bug"; //the string want to find in the text

                int hasReaded = 0; //how many bytes has be readed

     

                using (FileStream fs = new FileStream(filepath, FileMode.Open, FileAccess.Read, FileShare.ReadWrite))

                {

                    while (hasReaded < fileSize)

                    {

                        string text = "";

                        long offset = 0;

     

                        if ((fileSize - hasReaded) < BufSize)

                            BufSize = System.Convert.ToInt32(fileSize - hasReaded);

     

                        buffer = new byte[BufSize];

                       

                   

                            if ((BufSize + hasReaded) < fileSize)

                                offset = (-1) * BufSize - hasReaded;

                            else

                                offset = (-1) * fileSize;

     

                            fs.Seek(offset, SeekOrigin.End);

                            int reads = fs.Read(buffer, 0, BufSize);                      

     

                            string asciiText = System.Text.ASCIIEncoding.ASCII.GetString(buffer);

     

                            if (!asciiText.Contains("\r\n"))                       

                                hasReaded = (int)fileSize;                       

                            text = asciiText;

                                        

     

                        int idx = -1;

                        int tmp = 0;

                        while ((tmp = text.IndexOf("\r\n", tmp)) >= 0)

                        {

                            idx = tmp;

                            tmp++;

                        }

                        if (idx > -1)

                            text = text.Substring(idx);

                        else

                            text = "";

     

                        hasReaded += text.Length;

     

                        if (text.Contains(toFind))

                        {

                            long position = fileSize - hasReaded + text.IndexOf(toFind);

                            MessageBox.Show("find in the last occurence: " + position.ToString());

                            return;

                        }

                    }

     

                }

                MessageBox.Show("find failed");

        }

    Regards,

    Xun



    Please remember to mark the replies as answers if they help and unmark them if they provide no help.
    • Edited by jack 321 Tuesday, June 10, 2008 3:04 AM edit a bug
    • Marked as answer by jack 321 Tuesday, June 10, 2008 10:28 AM
    Monday, June 09, 2008 11:20 AM
  • wow, great answer, just i'm in a similar predicament but i'm looking for a different result.  how would i take the last say 20 strings in a text file and assign them to a tempstrings variable? i have a log file that i only need the last 20 strings that were created every second or so.

    searchbackwards, null to null filling in strings.

    no idea, i get the code above kinda, but not enough to manipulate it for my own means like this.  any tips would be appreciated.


    Tuesday, July 22, 2008 12:16 AM
  • nm, found another solution.
    Tuesday, July 22, 2008 4:31 AM
  •  This might help you. Use it as reader

    BackwardReader br = new BackwardReader();
    while(!br.SOF)
    {
        br.Readline();
    }

    public
    class BackwardReader

    {

    private string path;

    private FileStream fs = null;

    public BackwardReader(string path)

    {

    this.path = path;

    fs = new FileStream(path, FileMode.Open, FileAccess.Read, FileShare.ReadWrite);

    fs.Seek(0, SeekOrigin.End);

    }

    public string Readline()

    {

    byte[] line;

    byte[] text = new byte[1];

    long position = 0;

    int count;

    fs.Seek(0, SeekOrigin.Current);

    position = fs.Position;

    //do we have trailing \r\n?

    if (fs.Length > 1)

    {

    byte[] vagnretur = new byte[2];

    fs.Seek(-2, SeekOrigin.Current);

    fs.Read(vagnretur, 0, 2);

    if (ASCIIEncoding.ASCII.GetString(vagnretur).Equals("\r\n"))

    {

    //move it back

    fs.Seek(-2, SeekOrigin.Current);

    position = fs.Position;

    }

    }

    while (fs.Position > 0)

    {

    text.Initialize();

    //read one char

    fs.Read(text, 0, 1);

    string asciiText = ASCIIEncoding.ASCII.GetString(text);

    //moveback to the charachter before

    fs.Seek(-2, SeekOrigin.Current);

    if (asciiText.Equals("\n"))

    {

    fs.Read(text,0,1);

    asciiText = ASCIIEncoding.ASCII.GetString(text);

    if (asciiText.Equals("\r"))

    {

    fs.Seek(1, SeekOrigin.Current);

    break;

    }

    }

    }

    count = int.Parse((position - fs.Position ).ToString());

    line = new byte[count];

    fs.Read(line, 0, count);

    fs.Seek(-count, SeekOrigin.Current);

    return ASCIIEncoding.ASCII.GetString(line);

    }

    public bool SOF

    {

    get

    {

    return fs.Position == 0;

    }

    }

    public void Close()

    {

    fs.Close();

    }

    }

    }

    Tuesday, August 05, 2008 9:31 AM