Friday, April 18, 2014

Reading xml/*.docx files in C#.net without using office interop



Reading  xml/*.docx files in C#.net without using office interop


Option 1
Convert the extension of  *.docx file to *.xml and save
Option 2:
Convert the extension of  *docx file to *.zip and save
Now extract the file and check the files inside for document.xml

When *.xml files are ready, we can use

1. XDocument  class 

2. XmlTextReader

// Using  XDocument -


using System;
using System.Collections.Generic;
using System.Xml.Linq;

namespace WinwordRead
{
    class Program
    {
        static void Main(string[] args)
        {

            try
            {

                //  string path = @"..\..\RAC\word\document.xml";
                string path = @"..\..\Test.xml";
                string opath = @"..\..\result.txt";

                //load the xml
                XDocument xdoc = XDocument.Load(path);

                //get nodes
                var xnodelist = xdoc.Nodes();
                foreach (XNode x in xnodelist)
                {
                    // to print whole doc
                    Console.WriteLine(x.NodeType.ToString());
                }

                // get elements

                IEnumerable<XElement> xelist = xdoc.Elements();
                foreach (XElement xex in xelist)
                {
                    Console.WriteLine(xex.Value.ToString());
                    Console.WriteLine(xex.Name.ToString());
                }

                #endregion
                Console.Read();

            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.ToString());

            }

        }
    }
}


//  Source code using XmlTextReader
             

                XmlTextReader txtreader = new XmlTextReader(path);
                var stringlist = new List<string>();

                while (txtreader.Read())
                {

                    var d = txtreader.NodeType.ToString().ToLower().Contains("grid");

                    if (!String.IsNullOrEmpty(txtreader.Value.ToString()) && !String.IsNullOrWhiteSpace(txtreader.Value)
                        && txtreader.NodeType.ToString().ToLower().Equals("text"))
                    {
                        string content = txtreader.Value.ToString() + "\n";


                        stringlist.Add(content);
                    }

                }



No comments:

Post a Comment