none
Delete content between code symbol in word RRS feed

  • Question

  • I use C# to process some word documents, some code symbol in it , for example [start]code here[end], I want to just delete the content between. Then how to keep just these  symbols?  Not only [] but can also be like <>,//....

    Any solution? Thx.

     
    • Edited by JWX123 Friday, April 24, 2015 2:10 AM keep->delete
    Friday, April 24, 2015 1:51 AM

Answers

  • Just change the code as

     class Program
        {
            static void Main(string[] args)
            {
                Document doc = new Document();
                doc.LoadFromFile(@"..\..\test.docx");
                DocumentObject start = doc.FindString("[start]",true,true).GetAsOneRange();
                DocumentObject end = doc.FindString("[end]", true, true).GetAsOneRange();
                RemoveRange(start,end);
                doc.SaveToFile(@"..\..\result.docx", FileFormat.Docx);
                System.Diagnostics.Process.Start(@"..\..\result.docx");
            }
            public static void RemoveRange(DocumentObject start, DocumentObject end)
            {
                HashSet<DocumentObject> endElements = new HashSet<DocumentObject>();
                DocumentObject parent = end;
                while (parent != null)
                {
                    endElements.Add(parent);
                    parent = parent.Owner;
                }
    
                parent = start.Owner;
                DocumentObject current = start;
                DocumentObject lastStart = start;
                while (parent != null)
                {
                    ICompositeObject container = (parent as ICompositeObject);
                    DocumentObjectCollection objs = container.ChildObjects;
                    int index = objs.IndexOf(current) + 1;
                    while (objs.Count > index)
                    {
                        DocumentObject element = objs[index];
                        if (endElements.Contains(element))
                        {
                            parent = null;
                            lastStart = current;
                            break;
                        }
                        objs.RemoveAt(index);
                    }
    
                    if (parent != null)
                    {
                        if (parent.DocumentObjectType == DocumentObjectType.Body)
                        {
                            lastStart = parent.Owner;
                            break;
                        }
                        current = parent;
                        parent = parent.Owner;
                    }
                }
    
                parent = end.Owner;
                current = end;
                while (parent != null)
                {
                    ICompositeObject container = (parent as ICompositeObject);
                    DocumentObjectCollection objs = container.ChildObjects;
                    int index = objs.IndexOf(current) - 1;
                    while (index >= 0)
                    {
                        DocumentObject element = objs[index];
                        if (lastStart == element)
                        {
                            parent = null;
                            break;
                        }
                        objs.RemoveAt(index);
                        index--;
                    }
    
                    if (parent != null)
                    {
                        current = parent;
                        parent = parent.Owner;
                    }
                }
            }
        }

    • Marked as answer by L.HlModerator Wednesday, May 6, 2015 8:49 AM
    Friday, April 24, 2015 2:22 AM

All replies

  • I use C# to process some word documents, some code symbol in it , for example [start]code here[end], I want to just keep the content between. Then how to delete these  symbols?  Not only [] but can also be like <>,//....

    Any solution? Thx.

     
    I wonder what they look like? In the same line or in separated lines?Or disorderly and unsystematic distribution. Please offer detailed information.

    Friday, April 24, 2015 1:55 AM
  •  They all like following(only take  [] for example):

    [start]

    code here

    [end]

    -----------------

    I want them look like

    [start]

    [end]

    Then how? Thank u in advance.

    Friday, April 24, 2015 1:58 AM
  •  They all like following(only take  [] for example):

    [start]

    code here

    [end]

    -----------------

    I want them look like

    [start]

    [end]

    Then how? Thank u in advance.

    Friday, April 24, 2015 2:00 AM
  •  They all like following(only take  [] for example):

    [start]

    code here

    [end]

    -----------------

    I want them look like

    [start]

    [end]

    Then how? Thank u in advance.

    Try the following code:

    static void Main(string[] args)
            {
                Document doc = new Document();
                doc.LoadFromFile("test.docx");
                foreach (Section sec in doc.Sections)
                {
                    int StartIndex = GetIndex(sec, "[start]");
                    int EndIndex = GetIndex(sec, "[end]");
                    int i = StartIndex + 1;
                    while (i < EndIndex)
                    {
                        sec.Paragraphs.Remove(sec.Paragraphs[i]);
                        EndIndex -= 1;
                    }
                }
                doc.SaveToFile("result.docx", FileFormat.Docx);
                System.Diagnostics.Process.Start("result.docx");
    }
    public static int GetIndex(Section sec, string SpecialString)
    {
                int index = 0;
                int i = 0;
                foreach (Paragraph para in sec.Paragraphs)
                {
                    if (para.Text == SpecialString)
                    {
                        index = i;
                    }
                    i++;
                }
                return index;
    }
    As <>,you can replace in the code,but first you need to add this DOC component 's .dll.  Just try.

    Friday, April 24, 2015 2:04 AM
  • Whether it worked fine ,just let me know.

    • Edited by jujubeee Friday, April 24, 2015 2:16 AM Ask in advance
    Friday, April 24, 2015 2:14 AM
  • Whether it worked fine ,just let me know.

    I just try that though a little inconvenient (have to add those things),but worked fine. Then I have another question, what if i want to display in the same line?

    As 

    [start][end]. Any improved method?

    Friday, April 24, 2015 2:19 AM
  • Just change the code as

     class Program
        {
            static void Main(string[] args)
            {
                Document doc = new Document();
                doc.LoadFromFile(@"..\..\test.docx");
                DocumentObject start = doc.FindString("[start]",true,true).GetAsOneRange();
                DocumentObject end = doc.FindString("[end]", true, true).GetAsOneRange();
                RemoveRange(start,end);
                doc.SaveToFile(@"..\..\result.docx", FileFormat.Docx);
                System.Diagnostics.Process.Start(@"..\..\result.docx");
            }
            public static void RemoveRange(DocumentObject start, DocumentObject end)
            {
                HashSet<DocumentObject> endElements = new HashSet<DocumentObject>();
                DocumentObject parent = end;
                while (parent != null)
                {
                    endElements.Add(parent);
                    parent = parent.Owner;
                }
    
                parent = start.Owner;
                DocumentObject current = start;
                DocumentObject lastStart = start;
                while (parent != null)
                {
                    ICompositeObject container = (parent as ICompositeObject);
                    DocumentObjectCollection objs = container.ChildObjects;
                    int index = objs.IndexOf(current) + 1;
                    while (objs.Count > index)
                    {
                        DocumentObject element = objs[index];
                        if (endElements.Contains(element))
                        {
                            parent = null;
                            lastStart = current;
                            break;
                        }
                        objs.RemoveAt(index);
                    }
    
                    if (parent != null)
                    {
                        if (parent.DocumentObjectType == DocumentObjectType.Body)
                        {
                            lastStart = parent.Owner;
                            break;
                        }
                        current = parent;
                        parent = parent.Owner;
                    }
                }
    
                parent = end.Owner;
                current = end;
                while (parent != null)
                {
                    ICompositeObject container = (parent as ICompositeObject);
                    DocumentObjectCollection objs = container.ChildObjects;
                    int index = objs.IndexOf(current) - 1;
                    while (index >= 0)
                    {
                        DocumentObject element = objs[index];
                        if (lastStart == element)
                        {
                            parent = null;
                            break;
                        }
                        objs.RemoveAt(index);
                        index--;
                    }
    
                    if (parent != null)
                    {
                        current = parent;
                        parent = parent.Owner;
                    }
                }
            }
        }

    • Marked as answer by L.HlModerator Wednesday, May 6, 2015 8:49 AM
    Friday, April 24, 2015 2:22 AM
  • ok thanks 

    Wednesday, April 29, 2015 2:46 AM