POI之Word转化为Markdown-yellowcong
来源:互联网 发布:如何选购家具 知乎 编辑:程序博客网 时间:2024/06/09 15:35
Markdown最近特别的火,同时我也想把自己以前做的笔记(用doc写的)分享出来,所以我想将DOC解析,然后生成Markdown文件,然后通过代码直接将图片上传到七牛云 ,将文本数据传到野狗云和CSND,为啥我喜欢用七牛和野狗呢,因为他们都有免费的份额。PS(没有写完)
环境搭建
<!-- excel --> <dependency> <groupId>org.apache.poi</groupId> <artifactId>poi</artifactId> <version>3.17</version></dependency><!-- word --><dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-scratchpad</artifactId> <version>3.17</version></dependency><!-- xlsx --><dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml</artifactId> <version>3.17</version></dependency><!-- xlsx 依赖这个包 --><dependency> <groupId>org.apache.poi</groupId> <artifactId>poi-ooxml-schemas</artifactId> <version>3.17</version></dependency>
获取标题等级
Doc中的标题有等级,对应的我们的Markdown中也存在等级的操作
/**一级标题 begin */private static final String STYLE_TITLE_FONT_FONTNAME_1 ="宋体";private static final Integer STYLE_TITLE_FONT_SIZE_1 =48;private static final Integer TITLE_LV_1 = 1;/**一级标题 end *//**二级标题 begin */private static final String STYLE_TITLE_FONT_FONTNAME_2 ="Arial";private static final Integer STYLE_TITLE_FONT_SIZE_2 =32;private static final Integer TITLE_LV_2 = 2;/**二级标题 begin *//**三级标题 begin */private static final String STYLE_TITLE_FONT_FONTNAME_3 ="Times New Roman";private static final Integer STYLE_TITLE_FONT_SIZE_3 =32;private static final Integer TITLE_LV_3 = 3;/**三级标题 begin *//**普通的文本,不是标题*/private static final Integer TITLE_LV_0 = 0;/** * 获取标题的等级 * @param paragraph * @return 1、2、3三个等级,如果没有数据返回,那么等级为0 ,是普通文本 */private static Integer getTitleLvl(Paragraph paragraph) { //获取这一段文字的数目 int charCnts = paragraph.numCharacterRuns(); int titleLv = TITLE_LV_0; if(charCnts == 0){ return TITLE_LV_0; } CharacterRun characterRun = paragraph.getCharacterRun(0); if(characterRun != null){ //字体的名称 String fontName =characterRun.getFontName(); //字体大小 int fontSize = characterRun.getFontSize(); //一级标题的情况 if(fontName.equals(STYLE_TITLE_FONT_FONTNAME_1) && fontSize == STYLE_TITLE_FONT_SIZE_1){ //System.out.println("一级标题\t"+paragraph.text().trim()); titleLv= TITLE_LV_1; }else if(fontName.equals(STYLE_TITLE_FONT_FONTNAME_2) && fontSize == STYLE_TITLE_FONT_SIZE_2){ titleLv= TITLE_LV_2;// System.out.println("\t二级标题\t"+paragraph.text().trim()); }else if(fontName.equals(STYLE_TITLE_FONT_FONTNAME_3) && fontSize == STYLE_TITLE_FONT_SIZE_3){ titleLv= TITLE_LV_3; //System.out.println("\t\t三级标题\t"+paragraph.text().trim()); } } return titleLv;}
获取每行文字的每段落字符的样式
Markdown中的文字都是存在样式 的,所以每一行都是有不同的样式,通过
Range
获取CharacterRun
(这个类中包含了字体信息)
// 获取段落数,一个回车符号就是一个段落了int paraNum = range.numParagraphs();System.out.println(paraNum);for (int i = 0; i < paraNum; i++) { //获取段落 Paragraph paragraph =range.getParagraph(i); //获取这一段文字的数目 int charCnts = paragraph.numCharacterRuns(); if(charCnts == 0){ return; } //获取标题的级别,是一级 二级,三级的情况 , 0是普通文本 int titleLev = getTitleLvl(paragraph); //源码中,有类似的写法,具体为啥判断是否》=-1不是特别清楚 int skipUntil = -1; for (int c = 0; c < range.numCharacterRuns(); c++) { //CharacterRun 会根据字体和样式,自动的字符分割开 CharacterRun characterRun = range.getCharacterRun(c); if (characterRun != null && characterRun.getStartOffset() >= skipUntil) { String text = characterRun.text(); String fontName = characterRun.getFontName(); int fontSize = characterRun.getFontSize(); int fontColor = characterRun.getColor(); boolean bold = characterRun.isBold(); boolean italic= characterRun.isItalic(); System.out.printf("当前文字的字体名称:%s,字体大小%d,字体颜色%d,加粗%b,斜体%b\r\n",text,fontName,fontSize,fontColor,bold,italic); } } //图片处理 //表格处理}
完整代码
这部分代码没有完全的写完
package com.yellowcong.test;import java.io.File;import java.io.FileInputStream;import java.io.FileOutputStream;import java.io.IOException;import java.io.InputStream;import java.io.OutputStream;import java.util.UUID;import org.apache.poi.hwpf.HWPFDocument;import org.apache.poi.hwpf.usermodel.Bookmark;import org.apache.poi.hwpf.usermodel.Bookmarks;import org.apache.poi.hwpf.usermodel.CharacterRun;import org.apache.poi.hwpf.usermodel.Paragraph;import org.apache.poi.hwpf.usermodel.Range;import org.apache.poi.hwpf.usermodel.Section;import org.apache.poi.hwpf.usermodel.Table;import org.apache.poi.hwpf.usermodel.TableCell;import org.apache.poi.hwpf.usermodel.TableIterator;import org.apache.poi.hwpf.usermodel.TableRow;import com.sun.xml.internal.messaging.saaj.util.ByteInputStream;import com.yellowcong.utils.DocUtils;public class DocTest { private static final String BASE_PATH = "D:\\笔记\\服务器学习\\bae\\"; /**一级标题 begin */ private static final String STYLE_TITLE_FONT_FONTNAME_1 ="宋体"; private static final Integer STYLE_TITLE_FONT_SIZE_1 =48; private static final Integer TITLE_LV_1 = 1; /**一级标题 end */ /**二级标题 begin */ private static final String STYLE_TITLE_FONT_FONTNAME_2 ="Arial"; private static final Integer STYLE_TITLE_FONT_SIZE_2 =32; private static final Integer TITLE_LV_2 = 2; /**二级标题 begin */ /**三级标题 begin */ private static final String STYLE_TITLE_FONT_FONTNAME_3 ="Times New Roman"; private static final Integer STYLE_TITLE_FONT_SIZE_3 =32; private static final Integer TITLE_LV_3 = 3; /**三级标题 begin */ /**普通的文本,不是标题*/ private static final Integer TITLE_LV_0 = 0; public static void main(String[] args) throws Exception, IOException { File file = new File(BASE_PATH + "BAE 服务器文件目录问题.doc"); HWPFDocument doc = new HWPFDocument(new FileInputStream(file)); DocUtils.copyDocToHtml(file); Range range = doc.getRange(); printInfo(range); // HWPFDocumentCore // HWPFDocumentCore doc = WordToHtmlUtils.loadDoc(file); } /** * 插入内容到Range,这里只会写到内存中 * @param range */ private static void insertInfo(Range range) { range.insertAfter("Hello"); } /** * 在Word中,一个回车符就是一个段落了 * 输出Range * * @param range */ private static void printInfo(Range range) { // 获取段落数,一个回车符号就是一个段落了 int paraNum = range.numParagraphs(); System.out.println(paraNum); for (int i = 0; i < paraNum; i++) { //获取段落 Paragraph paragraph =range.getParagraph(i); //获取这一段文字的数目 int charCnts = paragraph.numCharacterRuns(); if(charCnts == 0){ return; } //获取标题的级别,是一级 二级,三级的情况 , 0是普通文本 int titleLev = getTitleLvl(paragraph); int skipUntil = -1; for (int c = 0; c < range.numCharacterRuns(); c++) { //CharacterRun 会根据字体和样式,自动的字符分割开 CharacterRun characterRun = range.getCharacterRun(c); if (characterRun != null && characterRun.getStartOffset() >= skipUntil) { String text = characterRun.text(); String fontName = characterRun.getFontName(); int fontSize = characterRun.getFontSize(); int fontColor = characterRun.getColor(); boolean bold = characterRun.isBold(); boolean italic= characterRun.isItalic(); System.out.printf("当前文字的字体名称:%s,字体大小%d,字体颜色%d,加粗%b,斜体%b\r\n",text,fontName,fontSize,fontColor,bold,italic); } } //图片处理 //表格处理 } } /** * 用于装每一个 返回的文字的样式信息 * @author yellowcong * @date 2017年7月15日 */ class FontStyle{ private String fontColor; private String fontName; private Integer fontSize; private boolean bold; //粗体 private boolean italic; //斜体 public String getFontColor() { return fontColor; } public void setFontColor(String fontColor) { this.fontColor = fontColor; } public String getFontName() { return fontName; } public void setFontName(String fontName) { this.fontName = fontName; } public Integer getFontSize() { return fontSize; } public void setFontSize(Integer fontSize) { this.fontSize = fontSize; } public boolean isBold() { return bold; } public void setBold(boolean bold) { this.bold = bold; } public boolean isItalic() { return italic; } public void setItalic(boolean italic) { this.italic = italic; } } /** * 获取标题的等级 * @param paragraph * @return 1、2、3三个等级,如果没有数据返回,那么等级为0 ,是普通文本 */ private static Integer getTitleLvl(Paragraph paragraph) { //获取这一段文字的数目 int charCnts = paragraph.numCharacterRuns(); int titleLv = TITLE_LV_0; if(charCnts == 0){ return TITLE_LV_0; } CharacterRun characterRun = paragraph.getCharacterRun(0); if(characterRun != null){ //字体的名称 String fontName =characterRun.getFontName(); //字体大小 int fontSize = characterRun.getFontSize(); //一级标题的情况 if(fontName.equals(STYLE_TITLE_FONT_FONTNAME_1) && fontSize == STYLE_TITLE_FONT_SIZE_1){ //System.out.println("一级标题\t"+paragraph.text().trim()); titleLv= TITLE_LV_1; }else if(fontName.equals(STYLE_TITLE_FONT_FONTNAME_2) && fontSize == STYLE_TITLE_FONT_SIZE_2){ titleLv= TITLE_LV_2;// System.out.println("\t二级标题\t"+paragraph.text().trim()); }else if(fontName.equals(STYLE_TITLE_FONT_FONTNAME_3) && fontSize == STYLE_TITLE_FONT_SIZE_3){ titleLv= TITLE_LV_3; //System.out.println("\t\t三级标题\t"+paragraph.text().trim()); } } return titleLv; } private static void getFontLive(){ } @SuppressWarnings("resource") public static void copyByteToFile(byte[] imgByte) throws Exception { InputStream in = new ByteInputStream(imgByte, 0, imgByte.length); byte[] buff = new byte[1024]; String fileName = UUID.randomUUID().toString().substring(0, 6); OutputStream out = new FileOutputStream(new File(BASE_PATH + fileName + ".jpg")); int len = 0; while ((len = in.read(buff)) > 0) { out.write(buff, 0, len); } out.flush(); out.close(); in.close(); } /** * 输出书签信息 * @param bookmarks */ private void printInfo(Bookmarks bookmarks) { int count = bookmarks.getBookmarksCount(); System.out.println("书签数量:" + count); Bookmark bookmark; for (int i=0; i<count; i++) { bookmark = bookmarks.getBookmark(i); System.out.println("书签" + (i+1) + "的名称是:" + bookmark.getName()); System.out.println("开始位置:" + bookmark.getStart()); System.out.println("结束位置:" + bookmark.getEnd()); } } /** * 读表格 * 每一个回车符代表一个段落,所以对于表格而言,每一个单元格至少包含一个段落,每行结束都是一个段落。 * @param range */ private static void readTable(Range range) { //遍历range范围内的table。 TableIterator tableIter = new TableIterator(range); Table table; TableRow row; TableCell cell; while (tableIter.hasNext()) { table = tableIter.next(); int rowNum = table.numRows(); for (int j=0; j<rowNum; j++) { row = table.getRow(j); int cellNum = row.numCells(); for (int k=0; k<cellNum; k++) { cell = row.getCell(k); //输出单元格的文本 System.out.println(cell.text().trim()); } } } } /** * 读列表 * @param range */ private static void readList(Range range) { int num = range.numParagraphs(); Paragraph para; for (int i=0; i<num; i++) { para = range.getParagraph(i); if (para.isInList()) { System.out.println("list: " + para.text()); } } } }
阅读全文
0 0
- POI之Word转化为Markdown-yellowcong
- POI之Word转化为Html-yellowcong
- POI之Word文档读取-yellowcong
- POI之根据模板导出word-yellowcong
- simplejson之JSON转化为对象-yellowcong
- poi将word docx转化为html
- Jgrid之将所有数据转化为JSON-yellowcong
- POI之读写Excel-yellowcong
- 使用poi将sql脚本转化为word文档
- java引用POI将Word转化为HTML
- HTML表单Form转化为JSON-yellowcong
- POI之自定义注解生成文档-yellowcong
- word转化为PDF
- word转化为html
- word转化为图片
- Word转化为PDF
- LPTSTR 转化为 WORD
- word转化为swf
- 恋爱与学习
- __stdcall、__cdcel和__fastcall三者的区别
- python 的__future__ 使用
- LeetCode 86. Partition List
- Simple Library Management System
- POI之Word转化为Markdown-yellowcong
- PAT程序设计考题——甲级1012( The best rank ) C++实现
- MFC编程--MFC中UpdateData()函数的使用
- MarkDown语法初识
- 李洋疯狂C语言之n个人报数,报到3的退出,最后留在场上的是原来的第几位(约瑟夫环)
- c# 调用 C++ dll
- 自定义ViewGroup实现左滑效果
- Dreamweaver cc 2017安装破解教程
- Java之文件压缩工具类-yellowcong