Hello,
I am using the following code to extract text from a xps document. The text extraction works great but that is not enough; I need the x,y location too preferably in pixels. I know there is a originX, originY like so. But I don't know if I could use this as
the XY location. If I could use these as the XY , then what units are they in.. If they are in points how do convert them to pixels? Can RenderTransformOrigin with Originx get me the Xlocation? similarly with OriginY get Ylocation.
pageContentReader.GetAttribute("OriginX");
"898.4"
pageContentReader.GetAttribute("OriginY");
"2128.16"
private void ExtractTextFromXps(XpsDocument xpsDocument)
{
var fixedDocSeqReader = xpsDocument.FixedDocumentSequenceReader;
if (fixedDocSeqReader == null)
return ;
const string UnicodeString = "UnicodeString";
const string GlyphsString = "Glyphs";
foreach (IXpsFixedDocumentReader fixedDocumentReader in fixedDocSeqReader.FixedDocuments)
{
foreach (IXpsFixedPageReader pageReader in fixedDocumentReader.FixedPages)
{
var pageContentReader = pageReader.XmlReader;
if (pageContentReader == null)
continue;
var texts = new List<string>();
while (pageContentReader.Read())
{
if (pageContentReader.Name != GlyphsString)
continue;
if (!pageContentReader.HasAttributes)
continue;
if (pageContentReader.GetAttribute(UnicodeString) != null)
texts.Add(pageContentReader.GetAttribute(UnicodeString));
}
string pageText = String.Join(",", texts);
pageTextList.Add(pageText);
}
}
xpsDocument.Close();
}