Friday, April 18, 2014

Reading xml/*.docx files in C#.net without using office interop



Reading  xml/*.docx files in C#.net without using office interop


Option 1
Convert the extension of  *.docx file to *.xml and save
Option 2:
Convert the extension of  *docx file to *.zip and save
Now extract the file and check the files inside for document.xml

When *.xml files are ready, we can use

1. XDocument  class 

2. XmlTextReader

// Using  XDocument -


using System;
using System.Collections.Generic;
using System.Xml.Linq;

namespace WinwordRead
{
    class Program
    {
        static void Main(string[] args)
        {

            try
            {

                //  string path = @"..\..\RAC\word\document.xml";
                string path = @"..\..\Test.xml";
                string opath = @"..\..\result.txt";

                //load the xml
                XDocument xdoc = XDocument.Load(path);

                //get nodes
                var xnodelist = xdoc.Nodes();
                foreach (XNode x in xnodelist)
                {
                    // to print whole doc
                    Console.WriteLine(x.NodeType.ToString());
                }

                // get elements

                IEnumerable<XElement> xelist = xdoc.Elements();
                foreach (XElement xex in xelist)
                {
                    Console.WriteLine(xex.Value.ToString());
                    Console.WriteLine(xex.Name.ToString());
                }

                #endregion
                Console.Read();

            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.ToString());

            }

        }
    }
}


//  Source code using XmlTextReader
             

                XmlTextReader txtreader = new XmlTextReader(path);
                var stringlist = new List<string>();

                while (txtreader.Read())
                {

                    var d = txtreader.NodeType.ToString().ToLower().Contains("grid");

                    if (!String.IsNullOrEmpty(txtreader.Value.ToString()) && !String.IsNullOrWhiteSpace(txtreader.Value)
                        && txtreader.NodeType.ToString().ToLower().Equals("text"))
                    {
                        string content = txtreader.Value.ToString() + "\n";


                        stringlist.Add(content);
                    }

                }



How to read winword (*.docx) files using C#.net using Office Inerop

How to read winword (*.docx) files using C#.net using Office Inerop



//Reading *.docx files using C#

//Source Code:

using System;
using Microsoft.Office.Interop.Word;

namespace WinwordRead
{
    class Program
    {
        static void Main(string[] args)
        {

            try
            {
                //Load Word file.
                // Import  the Interop Services and  Microsoft.Office.Interop.Word;
      
                Application word = new Application();
                object miss = System.Reflection.Missing.Value;
                object path = @"..\RACode.docx";
                object readOnly = true;

                // Read the word document
                Document docs = word.Documents.Open(ref path, ref miss, ref readOnly,
                ref miss,ref miss, ref miss, ref miss, ref miss, ref miss, ref miss,
                ref miss, ref miss, ref miss, ref miss, ref miss, ref miss);

                string totaltext = "";

                // Iterate through paragraphs in docs
                for (int i = 0; i < docs.Paragraphs.Count; i++)
                {
                    totaltext += " \r\n " + docs.Paragraphs[i + 1].Range.Text.ToString();
                }
                Console.WriteLine(totaltext);

                //Close the docs

                docs.Close();
                word.Quit();
            }
            catch (Exception ex)
            {
                Console.WriteLine(ex.ToString());

            }

        }
    }
}