C#使用Aspose.Word读取word文档里的文本域

来源:互联网 发布:网络诈骗报案流程 编辑:程序博客网 时间:2024/06/10 03:42

这是开博客第一篇分享,记录代码的文字


C#使用Aspose.Word读取word文档里的文本域


using Aspose.Words;using System;using System.Collections.Generic;using System.Linq;using System.Text;using System.Threading.Tasks;using System.Text.RegularExpressions;namespace ContractDocumentMaster {    public class Program {        public static string DocPath = @"E:\work_api\";        public static string DocName = "contractTemplate.doc";        /// <summary>        /// 读取Word文档,提前文档里的输入域。从«工程名称» 中查找工程名称        /// 使用破解版Aspose.Word,版本6.5.0.0        /// </summary>        public static void Main(string[] args) {            Console.WriteLine("Program running...wait...\n\n");            //PrintAllAsposeWordNodeType();            Run();            //TestRegExp();            Console.WriteLine("\n\nProgram Finished. Press any key to exit....");            Console.ReadKey();        }        /// <summary>        /// 测试正则表达式,多行模式        /// </summary>        public static void TestRegExp() {            var str = "«工程名称»---------«工程__名称»";            var reg = new Regex(@"«([^\s»]*)»", RegexOptions.Multiline);            var match = reg.Match(str);            while(match.Success) {                Console.WriteLine("Match : " + match.Groups[1].Value);                match = match.NextMatch();            }        }        /// <summary>        /// 主函数        /// </summary>        public static void Run() {            Aspose.Words.Document doc = new Aspose.Words.Document(DocPath + DocName);            var nodeTypeCollection = new Dictionary<string, int>();            var mergedFieldCollection = new Dictionary<string, int>();            var emptyLine = 0;            //var section = (Section)doc.ChildNodes[0];            //var body = section.Body;            var body = doc.FirstSection.Body;            var c = 0;            foreach(var node in body.ChildNodes) {                var nt = node.ToString().Trim();                if(!nodeTypeCollection.Keys.Contains(nt)) {                    nodeTypeCollection.Add(nt, 1);                } else {                    var i = nodeTypeCollection[nt]+1;                    nodeTypeCollection[nt] = i;                }                if(c++ > 30) break;                var showNodeType = true;                var value = string.Empty;                                if(node is Paragraph) {                    var pg = (Paragraph)node;                    if(pg.Runs != null && pg.Runs.Count > 0) {                        value = pg.Runs[0].Text.Trim();                        //var s = ((Section)node).GetAncestor(NodeType.Section);                        //HightLinePrint(s.GetText());                    } else {                        showNodeType = false;                        emptyLine++;                    }                } else if(node is Aspose.Words.Tables.Table) {                    var table = (Aspose.Words.Tables.Table)node;                    //foreach(Aspose.Words.Tables.Row row in table.Rows) {                    //    foreach(Aspose.Words.Tables.Cell cell in row.Cells) {                    //        sCellValue = cell.Paragraphs[0].Runs[0].Text;                    //        listWord.Add(sCellValue);                    //    }                    //}                }                CollectMergeField((Aspose.Words.Node)node, mergedFieldCollection);                if(showNodeType) {                    Console.WriteLine(nt.Replace("Aspose.Words.",""));                    Console.WriteLine("\t\t" + value);                }            }            Console.WriteLine("\n\n");            Console.WriteLine("Total Empty Line :" + emptyLine);            Console.WriteLine("Total Node Type :" + nodeTypeCollection.Count);            var ntc = 1;            foreach(var key in nodeTypeCollection.Keys){                Console.WriteLine(ntc++ +" : "+ key + "  ==>  "+ nodeTypeCollection[key]);            }            Console.WriteLine("\n\n");            Console.WriteLine("Total Field Node Type :" + mergedFieldCollection.Count);            ntc = 1;            foreach(var key in mergedFieldCollection.Keys) {                Console.WriteLine(ntc++ + " : " + key + "  ==>  " + mergedFieldCollection[key]);            }        }        /// <summary>        /// 收集所有输入域        /// </summary>        public static void CollectMergeField(Node node, Dictionary<string, int> dic){            var reg = new Regex(@"«([^\s»]*)»", RegexOptions.Multiline);            if(node is Paragraph) {                var pg = (Paragraph)node;                var txt = pg.GetText();                var match = reg.Match(txt);                while(match.Success) {                    var mergedFieldName = match.Groups[1].Value.ToString();                    if(!dic.Keys.Contains(mergedFieldName)) {                        dic.Add(mergedFieldName, 1);                    } else {                        dic[mergedFieldName] = dic[mergedFieldName] + 1;                    }                    match = match.NextMatch();                }            }        }        public static void HightLinePrint(object o) {            Console.WriteLine("########\t\t" + o.ToString());        }        /// <summary>        /// 打印Aspose.Word.NodeType 所有分类        /// </summary>        public static void PrintAllAsposeWordNodeType() {            foreach(var node in Enum.GetValues(typeof(NodeType))) {                Console.WriteLine(node);            }        }    }}


0 0