none
find only whole word, not string within word. VS 2008 vb RRS feed

  • Question

  • I am trying to create a function that searches each line of a richtextbox and returns that row if one of many search phrases are found.   The app works wonderfully when used with longer words or muli word phrases as the search criteria.

    The problem I have is if the search word/criteria contains a small string such as  bae   or  sp  then I get alot of false positives.  Anytime bae or sp is found within any word it returns the results.  I need the function to stop looking within the words for results and instead look at the word as a whole for a match.

    Below you can see that I am taking each line of text in the richtextbox, removing most non-alpha numberic characters and replacing them with spaces.  Then I have it remove any double spaces with single spaces.  For the search term list I have it take the text before the delimiter ;  and do the same.

    Trouble I am having is,  if the search term is  SP   and the search phrase is  "This is a space shuttle lauch"
    it will return the sentence since the sentence contains  the word "space" which starts with sp.  I hope this makes sense.
    Public Function CleanString(ByVal strSource As String) As String
            On Error GoTo CleanStringErr
            strSource = Replace(strSource, vbTab, " ")
            strSource = Replace(strSource, vbCrLf, " ")
            Do While (InStr(strSource, "  "))
                strSource = Replace(strSource, "  ", " ")
            Loop
            CleanString = Trim(strSource)
            Exit Function
    CleanStringErr:
            ' Insert error-handling code here
        End Function
    
    
        Function TxwCompare(ByVal SrchString As String, ByVal InputFromFileString As String) As String
            Dim a As String, aa As String, b As String, bb As String, i As Integer
            a = LCase(SrchString) : b = LCase(InputFromFileString) : aa = "" : bb = ""
            For i = 1 To Len(a)
                'a through z
                If Asc(Mid$(a, i, 1)) > 96 And Asc(Mid$(a, i, 1)) < 123 Then aa = aa & Mid$(a, i, 1)
                '0 through 9
                If Asc(Mid$(a, i, 1)) > 47 And Asc(Mid$(a, i, 1)) < 58 Then aa = aa & Mid$(a, i, 1)
                'change anthing else to a space
                If Asc(Mid$(a, i, 1)) > -1 And Asc(Mid$(a, i, 1)) < 48 Then aa = aa & " "
                If Asc(Mid$(a, i, 1)) > 57 And Asc(Mid$(a, i, 1)) < 97 Then aa = aa & " "
                If Asc(Mid$(a, i, 1)) > 122 Then aa = aa & " "
            Next i
            aa = CleanString(aa)
            'Unremark next line to see the result of the stripped search.
            'MsgBox(aa)
            For i = 1 To Len(b)
                If Asc(Mid$(b, i, 1)) > 96 And Asc(Mid$(b, i, 1)) < 123 Then bb = bb & Mid$(b, i, 1)
                If Asc(Mid$(b, i, 1)) > 47 And Asc(Mid$(b, i, 1)) < 58 Then bb = bb & Mid$(b, i, 1)
                If Asc(Mid$(b, i, 1)) > -1 And Asc(Mid$(b, i, 1)) < 48 Then bb = bb & " "
                If Asc(Mid$(b, i, 1)) > 57 And Asc(Mid$(b, i, 1)) < 97 Then bb = bb & " "
                If Asc(Mid$(b, i, 1)) > 122 Then bb = bb & " "
            Next i
            bb = CleanString(bb)
            'Unremark next line to see the result of the stripped input.
            'MsgBox(bb)
            'If InStr(1, bb, aa) > 0 Then TxwCompare = InputFromFileString Else TxwCompare = ""
            If bb.Contains(aa) Then TxwCompare = InputFromFileString Else TxwCompare = ""
        End Function
    
    
    Private Sub SubmitBTN_Click(ByVal sender As System.Object, ByVal e As System.EventArgs) Handles SubmitBTN.Click
            RichTextBox2.Clear()
            RichTextBox3.Clear()
            altlist.Clear()
            prilist.Clear()
            Word_List_Reload()
            Dim frm As New WaitBox
            frm.Show()
            Dim startindex As Integer = 0
            For Each L As String In RichTextBox1.Lines
                For Each S As String In WordList
                    Dim SS = Split(S, ";")
                    If TxwCompare(SS(0), L).Length > 0 Then
                        If SS.Length > 1 Then
                            RichTextBox2.AppendText(L & vbCrLf)
                            RichTextBox3.AppendText(SS(0) & vbCrLf)
                            altlist.Add(SS(0))
                            prilist.Add(SS(1))
                        Else : RichTextBox2.AppendText(L & vbCrLf)
                            RichTextBox3.AppendText(SS(0) & vbCrLf)
                            altlist.Add(SS(0))
                            prilist.Add("UNKNOWN")
                        End If
    
                        Exit For
                    End If
                Next
            Next
            frm.Close()
            RichTextBox1.Clear()
        End Sub
    Monday, July 20, 2009 3:14 PM

Answers

  • Alright guys... This is what I came up with.

    It uses the least amount of code from what I originally posted.

    I added 

    Imports

     

    System.Text.RegularExpressions

    to the top of the form

    and then changed line

    If bb.Contains(aa) Then TxwCompare = InputFromFileString Else TxwCompare = ""

    to

    If

     

    Regex.IsMatch(bb, "\b" & aa & "\b") Then TxwCompare = InputFromFileString Else TxwCompare = ""

    I am not sure how resource intensive this is compared to the other 2 correct answers given.  But I at least understand the mechanics behind this method  :)

    Thanks to both who helped.  +1 to both of you!

    • Marked as answer by Txwalker Monday, July 20, 2009 5:39 PM
    Monday, July 20, 2009 4:23 PM

All replies

  • Hi TXWalker,

    i would solve it in this way:

    1. Take one line from the rtb
    2. Check if Line contains search word by .IndexOF Method
    3. check if character after found word is space

    Like:

        Private Function ContainsSearchWord(ByVal sLine As String, ByVal sSearch As String) As Boolean
            Dim iPos As Integer = sLine.IndexOf(sSearch)
            If iPos > 0 Then
                If sLine.Substring(iPos + sSearch.Length, 1) = " " Then
                    Return True
                Else
                    Return False
                End If
            Else
                Return False
            End If
        End Function



    Mark the thread as answered if the answer helps you. This helps others who have the same problem !
    Monday, July 20, 2009 3:28 PM
  • that would still leave an issue if the search term was found on the end of the word and not the whole word correct?

    I am looking into regex and word boundries right now...  not sure if it works with a phrase as well as a singular word though.

    Monday, July 20, 2009 3:41 PM
  • you can use RichTextBox.Find function to simplify your life. One of the overloads is WholeWord which will stop what you are seeing. There is also None, which will do what you are seeing, MatchCase to only case matches.

    In the code below search is the term to search for


    Dim index As Integer = 0
            While index <> -1
                index = Me.RichTextBox1.Find(search, index, RichTextBoxFinds.WholeWord)
                If index <> -1 Then
                    Dim lineindex As Integer = Me.RichTextBox1.GetLineFromCharIndex(index)
                    RichTextBox1.SelectionStart = RichTextBox1.GetFirstCharIndexOfCurrentLine()
                    RichTextBox1.SelectionLength = RichTextBox1.Lines(lineindex).Length
                    ListBox1.Items.Add(RichTextBox1.SelectedText)
                    index += 1
                End If
            End While
    Monday, July 20, 2009 3:42 PM
  • yes, you are right. A little modification will do the job:

        Private Function ContainsSearchWord(ByVal sLine As String, ByVal sSearch As String) As Boolean
            Dim iPos As Integer = sLine.IndexOf(sSearch)
            Select Case iPos
                Case Is < 0
                    Return False
                Case 0
                    If iPos + sSearch.Length = sLine.Length Then
                        Return True
                    End If
                    If sLine.Substring(iPos + sSearch.Length, 1) = " " Then
                        Return True
                    Else
                        Return False
                    End If
                Case Is > 0
                    
                    If sLine.Substring(iPos - 1, 1) = " " Then
                        If iPos + sSearch.Length = sLine.Length Then
                            Return True
                        Else
                            If sLine.Substring(iPos + sSearch.Length, 1) = " " Then
                                Return True
                            Else
                                Return False
                            End If
                        End If
                    Else
                        Return False
                    End If
                    
            End Select
        End Function

    Mark the thread as answered if the answer helps you. This helps others who have the same problem !
    Monday, July 20, 2009 3:50 PM
  • Alright guys... This is what I came up with.

    It uses the least amount of code from what I originally posted.

    I added 

    Imports

     

    System.Text.RegularExpressions

    to the top of the form

    and then changed line

    If bb.Contains(aa) Then TxwCompare = InputFromFileString Else TxwCompare = ""

    to

    If

     

    Regex.IsMatch(bb, "\b" & aa & "\b") Then TxwCompare = InputFromFileString Else TxwCompare = ""

    I am not sure how resource intensive this is compared to the other 2 correct answers given.  But I at least understand the mechanics behind this method  :)

    Thanks to both who helped.  +1 to both of you!

    • Marked as answer by Txwalker Monday, July 20, 2009 5:39 PM
    Monday, July 20, 2009 4:23 PM
  • Hi Heslacher,

    using method IndexOf is very good idea, but iPos is legal for 0 (zero value) too - this mean position of substring in a string.
    Better is test for iPos to -1 (0xffff) value. This at your sample used method for location in STRING start index of finded SUBSTRING returned result (if found) as position pointed by TEXT method compare only. So returned result have got same value as  non-case and/or case sensitive finding substrings in string.

    small sc path for first whole word occurence from first position of a string - and it's finish:
    (sample is divided for filtered conditions for first and all next substring occurence w/out latest. If substring in occurence is as last word(-s) on position located, this possibility is filtered at next ElseIf condition tree )

        Private Function ContainsSearchWord(ByVal sLine As String, ByVal sSearch As String) As Boolean
            Dim iPos As Integer = sLine.IndexOf(sSearch)
            ' -1 value indicated, that substring (param. sSearch) not found
            If iPos >= 0 Then
                If (sLine.Substring(iPos + sSearch.Length, 1) = " ") And (iPos + sSearch.Length<len(sLine)) Then
                    Return True
                ElseIf (sLine.Substring(iPos + sSearch.Length, 1) = "") And (iPos + sSearch.Length=len(sLine)) Then
                    Return True
                End If
            Else
                Return False
            End If
        End Function

     
    If I need find all occurrence of sSearch in string sLine as whole words with possibilitz as match case (case sensitive) and with all chars code inside the substring too, this construction is wrong (better is seaching method for binary, non text)
    In this case will needed use diferrent method or function for searching. For example Instr  function and/or regular expression method of searching will be OK.

    Cosmetics replacement in next :

        Private Function ContainsSearchWord(ByVal sLine As String, ByVal sSearch As String) As Boolean
            Dim iPos As Integer = InStr(1,sLine,sSearch,CompareMethod.Binary)
            ' 0 value indicated, that substring (param. sSearch) not found
            If iPos > 0 Then
                If sLine.Substring((iPos - 1) + sSearch.Length, 1) = " " Then
                    Return True
                ElseIf sLine.Substring((iPos - 1) + sSearch.Length, 1) = "" Then
                    Return True
                End If
            Else
                Return False
            End If
        End Function

    Or as a Regular Expression sample, please visit MSDN page with described paragraphs or use Google or different Web navigator for find a regular expression samples on the Web.
    -MV-

    Tuesday, September 15, 2009 9:06 AM
  • Some words arround existing lines in a RTB given from character position of a RTB text field (it work with realy position on a rtb text) :

    Private Function FindLine(ByVal txtSource As String) As String()
      strLine =
    Nothing
      ReDim Preserve strLine(0)
      Dim CrrLine As Integer = 0
      Dim J As Integer = 0
      For i As Integer = 1 To Len(txtSource)
             LineIdx = RichTextBox1.GetLineFromCharIndex(i)
             If LineIdx > CrrLine Then
                  CrrLine = LineIdx
                  J += 1
                  ReDim Preserve strLine(J)
             End If
             If ((Asc(Mid(txtSource, i, 1)) <> Asc(vbLf)) _
                 Xor (Asc(Mid(txtSource, i, 1)) <> Asc(vbCr)) _
                 Xor (Asc(Mid(txtSource, i, 1)) <> Asc(vbCrLf))) Then
                      strLine(J) += Mid(txtSource, i, 1)
             End If
      Next
      Return strLine
    End Function

    Is nedeed study, how to ... but reading is very fast. function retured field of strings
    -MV-

    Tuesday, September 15, 2009 9:23 AM
  • So and one sample for better description :
    We have a string for examples with any user-defined function.
    1. We needed search if header of user-defined function is found as one record of inside the hash table
    2. If user-defined function or header is at contens of hash table, make a colorchange of this header

    first step will directed to any new function with some input parameters and returned numbers value, represents the start position of header occurence in explored string. For sample on this also :

    Private Function FindTrue(ByVal wrd As String, Optional ByVal shift As Long = 0) As Integer
            Dim teststr As String = ""
            teststr = RichTextBox1.Text.Replace(vbCrLf, " ").Replace("(", " ").Replace(")", " ").Replace("/", " ")
            teststr = teststr.Replace("*", " ").Replace("+)", " ").Replace("-", " ")
            teststr = teststr.Replace(".", " ").Replace(",", " ")
            Return teststr.IndexOf(wrd, CInt(shift))
    End Function

    - in first is replaced all unare signs, mathematical signs and enter command if present to space only. Index of words position stay same as in priors, but more words (as possible if present more of user-defined headers or programm keywords)

    And now is necesary a procedure or function create, so routine for calling of FindTrue function.
    source code (some lines from this for examlple only  KeysWordsOrder - string field created bz split function as :
    rtfb - is mean new RichTextBox object session
    .......
       rtfb.SelectionStart = 0
       rtfb.SelectionLength = 0
       rtfb.SelectionColor = System.Drawing.Color.Black
       startpt = 1
       Dim KeysWordsOrder() As String = Nothing
       KeysWordsOrder = Split((rtfb.text.replace(vbLf, " ").replace(".", " ").replace("(", " ").replace(")", " ")), " ")
    .......

    On last line is remaked explored string to normalized first - all words are boundared strongly w/spaces only - at secondary is called VB Split function for create one dimensional field from string with normalized delimiters  inside)

    Sample of calling FindTrue function described on top of this chapter. Part of function seach for each word from one dimensional field, stored under KeysWordsOrder field object id and compared this if in hash table key content exist. If key occurence is passed, this word (user-defined function header or different keyword from programm language for example) is catched and selected. This selected word is immediately with new selection color filled and process continued for next occurence same key in a keyfield. is repeated so long, if latest word from keyword field wasn't compared with all hash tables for key in content existing.


    For lngIdx As Long = 0 To UBound(KeysWordsOrder)
          Dim lngStart As Long = startpt - 1
          Dim fnd As Integer = -1
          Do
              If FindTrue(wrd, lngStart) > -1 Then
                  fnd = FindTrue(wrd, lngStart)
                  rtfb.SelectionStart = InStr(startpt, rtfb.Text, wrd, CompareMethod.Binary) - 1
                  rtfb.SelectionLength = Len(wrd)
                  startpt = rtfb.SelectionStart + Len(wrd) + 1
                  If Not ((rtfb.SelectionColor = System.Drawing.Color.DarkGreen) _
                          Or (rtfb.SelectionColor = System.Drawing.Color.Gray) _
                          Or (rtfb.SelectionColor = System.Drawing.Color.Blue) _
                          Or (rtfb.SelectionColor = System.Drawing.Color.Red) -
                          Or (rtfb.SelectionColor = System.Drawing.Color.Green)) Then
                       rtfb.SelectionColor = System.Drawing.Color.Gray
                  End If
                  rtfb.SelectionStart = 0
                  rtfb.SelectionLength = 0
                  rtfb.SelectionColor = System.Drawing.Color.Black
                  rtfb.Update()
                  lngStart = fnd + Len(wrd)
                  If lngStart > Len(RichTextBox1.Text) Then
                        lngStart = 0
                        Exit Do
                  End If
               End If
         Loop While FindTrue(wrd, lngStart) > -1
         startpt = 1
     Next

      -MV-

    Wednesday, September 23, 2009 2:54 PM