locked
EDIT TEXT IN PDF USING VBA RRS feed

  • Question

  • The following is a wonderful vba code to extract text from pdf file. Shall be grateful if somebody let me know that how can I replace searched word with new word  by adding this line [jso.getPageNthWord(i, j = "New Word")].

    The original code is as under:     

    Sub SearchWordInPDF()

        '----------------------------------------------------------------------------------------
        'This macro can be used to find a specific WORD in a PDF document (one word ONLY -> in
        'case you search two words for example it doesn't find anything, just opens the file).
        'The macro opens the PDF, finds the first appearance of the specified word, scrolls
        'so that it is visible and highlights it.

        'The code uses late binding, so no reference to external library is required.
        'However, the code works ONLY with Adobe Professional, so don't try to use it with
        'Adobe Reader because you will get an "ActiveX component can't create object" error.

        'Written by:    Christos Samaras
        'Date:          04/05/2014
        'e-mail:        xristos.samaras@gmail.com
        'site:          http://www.myengineeringworld.net
        '--------------------------------------------------------------------------------------

        'Declaring the necessary variables.
        Dim WordToFind  As String
        Dim PDFPath     As String
        Dim app         As Object
        Dim AVDoc       As Object
        Dim pdDoc       As Object
        Dim jso         As Object
        Dim i           As Long
        Dim j           As Long
        Dim Word        As Variant
        Dim result      As Integer

        'Specify the text you want to search.
        'WordToFind = "Engineering"
        'Using a range:
        WordToFind = ThisWorkbook.Sheets("PDF Search").Range("C12").Value

        'Specify the path of the sample PDF form.
        'Full path example:
        'PDFPath = "C:\Users\Christos\Desktop\How Software Companies Die.pdf"
        'Using workbook path:
        'PDFPath = ThisWorkbook.Path & "\" & "How Software Companies Die.pdf"
        'Using a range:
        PDFPath = ThisWorkbook.Sheets("PDF Search").Range("C14").Value

        'Check if the file exists.
        If Dir(PDFPath) = "" Then
            MsgBox "Cannot find the PDF file!" & vbCrLf & "Check the PDF path and retry.", _
                    vbCritical, "File Path Error"
            Exit Sub
        End If

        'Check if the input file is a PDF file.
        If LCase(Right(PDFPath, 3)) <> "pdf" Then
            MsgBox "The input file is not a PDF file!", vbCritical, "File Type Error"
            Exit Sub
        End If

        On Error Resume Next

        'Initialize Acrobat by creating the App object.
        Set app = CreateObject("AcroExch.App")

        'Check if the object was created. In case of error release the objects and exit.
        If err.Number <> 0 Then
            MsgBox "Could not create the Adobe Application object!", vbCritical, "Object Error"
            Set app = Nothing
            Exit Sub
        End If

        'Create the AVDoc object.
        Set AVDoc = CreateObject("AcroExch.AVDoc")

        'Check if the object was created. In case of error release the objects and exit.
        If err.Number <> 0 Then
            MsgBox "Could not create the AVDoc object!", vbCritical, "Object Error"
            Set AVDoc = Nothing
            Set app = Nothing
            Exit Sub
        End If

        On Error GoTo 0

        'Open the PDF file.
        If AVDoc.Open(PDFPath, "") = True Then

            'Open successful, bring the PDF document to the front.
            AVDoc.BringToFront

            'Set the PDDoc object.
            Set pdDoc = AVDoc.GetPDDoc

            'Set the JS Object - Java Script Object.
            Set jso = pdDoc.GetJSObject

            'Search for the word.
            If Not jso Is Nothing Then

                'Loop through all the pages of the PDF.
                For i = 0 To jso.numPages - 1

                    'Loop through all the words of each page.
                    For j = 0 To jso.getPageNumWords(i) - 1

                        'Get a single word.
                        Word = jso.getPageNthWord(i, j)
                        'JSO.replacePageNthWord(i, j) = "xxx"

                        'If the word is string...
                        If VarType(Word) = vbString Then

                            'Compare the word with the text to be found.
                            result = StrComp(Word, WordToFind, vbTextCompare)

                            'If both strings are the same.
                            If result = 0 Then
                                'Select the word and exit.
                                Call jso.selectPageNthWord(i, j)
                                Exit Sub
                            End If

                        End If

                    Next j

                Next i

                'Word was not found, close the PDF file without saving the changes.
                AVDoc.Close True

                'Close the Acrobat application.
                app.Exit

                'Release the objects.
                Set jso = Nothing
                Set pdDoc = Nothing
                Set AVDoc = Nothing
                Set app = Nothing

                'Inform the user.
                MsgBox "The word '" & WordToFind & "' could not be found in the PDF file!", vbInformation, "Search Error"

            End If

        Else

            'Unable to open the PDF file, close the Acrobat application.
            app.Exit

            'Release the objects.
            Set AVDoc = Nothing
            Set app = Nothing

            'Inform the user.
            MsgBox "Could not open the PDF file!", vbCritical, "File error"

        End If

    End Sub

    Kimwoo4


    • Edited by KazimJee Monday, March 4, 2019 11:10 AM
    Monday, March 4, 2019 11:05 AM