none
Convert word to html RRS feed

  • Question

  • html,body{padding:0;margin:0;font-family:Verdana,Geneva,sans-serif;background:#fff;}html{font-size:100%}body{font-size:.75em;line-height:1.5;padding-top:1px;margin-top:-1px;}h1{font-size:2em;margin:.67em 0}h2{font-size:1.5em}h3{font-size:1.16em}h4{font-size:1em}h5{font-size:.83em}h6{font-size:.7em}p{margin:0 0 1em;padding:0 .2em}.t-marker{display:none;}.t-paste-container{;left:-10000px;width:1px;height:1px;overflow:hidden}ul,ol{padding-left:2.5em}a{color:#00a}code, pre{font-size:1.23em}
    Hi all

       I am developing E-learning application that provides facility to convert word document into html.Am able to convert word to html but alignment is not proper after converting into html and also color of image is changing.Here is my code

     


    protectedApplicationobjWord = newApplication();

           


    //This creates new object of Word.ApplicationClass


    protectedstringstrPathToUpload;

           


    //Path to upload files "Uploaded"


    protectedstringstrPathToConvert;

           


    //Path to convert uploaded files and save


    objectfltDocFormat = 10;

           


    //For filtered HTML Output


    protectedobjectmissing = System.Reflection.Missing.Value;

           


    //Is just to skeep the parameters which are passed as boject reference, these are seems to be optional parameters


    protectedobjectreadOnly = false;

           


    protectedobjectisVisible = false;


      protected void btnUpload_Click(object sender, EventArgs e)
            {
                //Code to check if user has selected any file on the form

                if (!(fUpload.HasFile))
                {
                    lblMessage.Text = "Please choose file to upload";
                }

                else
                {

                    try
                    {

                        //To check the file extension if it is word document or something else

                        string strFileName = fUpload.FileName;

                        string[] strSep = fUpload.FileName.Split('.');

                        int arrLength = strSep.Length - 1;

                        string strExt = strSep[arrLength].ToString().ToUpper();

                        //Save the uploaded file to the folder

                        strPathToUpload = Server.MapPath("Files");

                        //Map-path to the folder where html to be saved

                        strPathToConvert = Server.MapPath("WordToHtml");

                        object FileName = strPathToUpload + "\\" + fUpload.FileName;

                        object FileToSave = strPathToConvert + "\\" + strSep[0] + ".htm";

                        if (strExt.ToUpper().Equals("DOC") || strExt.ToUpper().Equals("DOCX"))
                        {

                            fUpload.SaveAs(strPathToUpload + "\\" + fUpload.FileName);

                            lblMessage.Text = "File uploaded successfully";

                            //open the file internally in word. In the method all the parameters should be passed by object reference

                            objWord.Documents.Open(ref FileName, ref readOnly, ref missing, ref missing, ref missing, ref missing,
                            ref missing, ref  missing, ref missing, ref missing, ref isVisible, ref missing, ref missing, ref missing,
                            ref missing, ref missing);

                            //Do the background activity

                            objWord.Visible = false;

                            Microsoft.Office.Interop.Word.Document oDoc = objWord.ActiveDocument;
                            objWord.ActiveDocument.OMathLeftMargin = 70;
                            oDoc.PageSetup.Application.ActiveDocument.OMathLeftMargin = 70;

                            oDoc.SaveAs(ref FileToSave, ref fltDocFormat, ref missing, ref missing, ref missing, ref missing,

                            ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing, ref missing,

                            ref missing, ref missing);

                            lblMessage.Text = fUpload.FileName + " converted to HTML successfully";

                        }

                        else
                        {

                            lblMessage.Text = "Invalid file selected!";

                        }

                        //Close/quit word

                        objWord.Quit(ref missing, ref missing, ref missing);

                    }

                    catch (Exception ex)
                    {

                        Response.Write(ex.Message);

                    }

                }
            }

    //Aspx code goes here..............................

    <div>

    <asp:FileUploadID="fUpload"runat="server"/>


    <asp:ButtonID="btnUpload"runat="server"OnClick="btnUpload_Click"Text="Upload"/>

    <asp:Label

     

     

     

     

     

    Wednesday, October 30, 2013 4:30 AM

Answers

  • Saving a Word file to HTML file format was mainly introduced in order to be able to 1) post a Word document as a web page and later 2) in order to "round-trip" Word documents for editing between IE and the Word application. Word's HTML file format is undocumented and unsupported.

    The best you can hope to do is to test with the various "Web" options available in the UI (all have a corresponding Options property in Word's API) to see if you can find one that gives the result you require.

    If you would like to discuss these options, I recommend you ask in end-user forum, where you're more likely to find people who use this kind of conversion and have a feel for how the conversion works and can be tweaked. When you post, you do need to describe exactly what kind of problem you're encountering and the version of Word you're working with.

    http://answers.microsoft.com/en-us/office/forum/word

    It's possible that Word's conversion filter for HTML simply doesn't do what you'd like it to do. Since I see OMath in your code, you must be targeting a relatively recent version of Word. It's possible that something has changed in that version's layout engine that hasn't been included in the HTML converter. If that is the case, you'll need to analyze the settings in the Word APIs for the object, then makes changes in the resulting HTML in order to bring it in line with what you expect.


    Cindy Meister, VSTO/Word MVP, my blog

    Wednesday, October 30, 2013 3:02 PM
    Moderator

All replies

  • Saving a Word file to HTML file format was mainly introduced in order to be able to 1) post a Word document as a web page and later 2) in order to "round-trip" Word documents for editing between IE and the Word application. Word's HTML file format is undocumented and unsupported.

    The best you can hope to do is to test with the various "Web" options available in the UI (all have a corresponding Options property in Word's API) to see if you can find one that gives the result you require.

    If you would like to discuss these options, I recommend you ask in end-user forum, where you're more likely to find people who use this kind of conversion and have a feel for how the conversion works and can be tweaked. When you post, you do need to describe exactly what kind of problem you're encountering and the version of Word you're working with.

    http://answers.microsoft.com/en-us/office/forum/word

    It's possible that Word's conversion filter for HTML simply doesn't do what you'd like it to do. Since I see OMath in your code, you must be targeting a relatively recent version of Word. It's possible that something has changed in that version's layout engine that hasn't been included in the HTML converter. If that is the case, you'll need to analyze the settings in the Word APIs for the object, then makes changes in the resulting HTML in order to bring it in line with what you expect.


    Cindy Meister, VSTO/Word MVP, my blog

    Wednesday, October 30, 2013 3:02 PM
    Moderator
  • If you do not mind to have a plugin in your APP, aceoffix can be one of your alternative. With aceoffix , you can save documents to server as HTML format. With this function, those documents can be viewed by the users who do not install Microsoft Office on their local computers.
    Friday, September 18, 2015 2:46 AM
  • If you need to convert multiple Word files to other formats, like TXT, RTF, HTML or PDF, run the script below.

    Option Explicit
    
    Sub ChangeDocsToTxtOrRTFOrHTML()
    'with export to PDF in Word 2007
        Dim fs As Object
        Dim oFolder As Object
        Dim tFolder As Object
        Dim oFile As Object
        Dim strDocName As String
        Dim intPos As Integer
        Dim locFolder As String
        Dim fileType As String
        On Error Resume Next
        locFolder = InputBox("Enter the folder path to DOCs", "File Conversion", "C:\myDocs")
        Select Case Application.Version
            Case Is < 12
                Do
                    fileType = UCase(InputBox("Change DOC to TXT, RTF, HTML", "File Conversion", "TXT"))
                Loop Until (fileType = "TXT" Or fileType = "RTF" Or fileType = "HTML")
            Case Is >= 12
                Do
                    fileType = UCase(InputBox("Change DOC to TXT, RTF, HTML or PDF(2007+ only)", "File Conversion", "TXT"))
                Loop Until (fileType = "TXT" Or fileType = "RTF" Or fileType = "HTML" Or fileType = "PDF")
        End Select
        Application.ScreenUpdating = False
        Set fs = CreateObject("Scripting.FileSystemObject")
        Set oFolder = fs.GetFolder(locFolder)
        Set tFolder = fs.CreateFolder(locFolder and "Converted")
        Set tFolder = fs.GetFolder(locFolder and "Converted")
        For Each oFile In oFolder.Files
            Dim d As Document
            Set d = Application.Documents.Open(oFile.Path)
            strDocName = ActiveDocument.Name
            intPos = InStrRev(strDocName, ".")
            strDocName = Left(strDocName, intPos - 1)
            ChangeFileOpenDirectory tFolder
            Select Case fileType
            Case Is = "TXT"
                strDocName = strDocName and ".txt"
                ActiveDocument.SaveAs FileName:=strDocName, FileFormat:=wdFormatText
            Case Is = "RTF"
                strDocName = strDocName and ".rtf"
                ActiveDocument.SaveAs FileName:=strDocName, FileFormat:=wdFormatRTF
            Case Is = "HTML"
                strDocName = strDocName and ".html"
                ActiveDocument.SaveAs FileName:=strDocName, FileFormat:=wdFormatFilteredHTML
            Case Is = "PDF"
                strDocName = strDocName and ".pdf"
                ActiveDocument.ExportAsFixedFormat OutputFileName:=strDocName, ExportFormat:=wdExportFormatPDF
                
            End Select
            d.Close
            ChangeFileOpenDirectory oFolder
        Next oFile
        Application.ScreenUpdating = True
    End Sub


    Knowledge is the only thing that I can give you, and still retain, and we are both better off for it.


    • Edited by ryguy72 Thursday, January 7, 2016 3:42 AM
    Thursday, January 7, 2016 3:32 AM