locked
XPS Document extract text and location (xy) of text RRS feed

  • Question

  • Hello,

     

    I am using the following code to extract text from a xps document. The text extraction works great but that is not enough; I need the x,y location too preferably in pixels. I know there is a originX, originY like so. But I don't know if I could use this as the XY location. If I could use these as the XY , then what units are they in.. If they are in points how do convert them to pixels? Can RenderTransformOrigin with Originx get me the Xlocation? similarly with OriginY get Ylocation.

    pageContentReader.GetAttribute("OriginX");
        "898.4"
        pageContentReader.GetAttribute("OriginY");
        "2128.16"

    private  void ExtractTextFromXps(XpsDocument xpsDocument)
        {
    
            var fixedDocSeqReader = xpsDocument.FixedDocumentSequenceReader;
            if (fixedDocSeqReader == null)
                return ;
    
            const string UnicodeString = "UnicodeString";
            const string GlyphsString = "Glyphs";
    
    
            foreach (IXpsFixedDocumentReader fixedDocumentReader in fixedDocSeqReader.FixedDocuments)
            {
                foreach (IXpsFixedPageReader pageReader in fixedDocumentReader.FixedPages)
                {
                    var pageContentReader = pageReader.XmlReader;
                    if (pageContentReader == null)
                        continue;
                    var texts = new List<string>();
    
                    while (pageContentReader.Read())
                    {
                        if (pageContentReader.Name != GlyphsString)
                            continue;
                        if (!pageContentReader.HasAttributes)
                            continue;
                        if (pageContentReader.GetAttribute(UnicodeString) != null)
                            texts.Add(pageContentReader.GetAttribute(UnicodeString));
                    }
    
                    string pageText = String.Join(",", texts);
                    pageTextList.Add(pageText);
    
    
                }
            }
    
            xpsDocument.Close();
        }


    Thursday, May 21, 2020 11:59 PM