Lucene05---Highlighter
来源:互联网 发布:面向对象编程的语言 编辑:程序博客网 时间:2024/09/21 08:44
前面讲了分词器,但是我们在搜索的时候是不是还有一个效果就是高亮和一段文本。那么这里我们就来介绍一下Highlighter。
Highlighter:
可以截取一段文本,并且让关键字高亮显示(通过指定前缀和后缀实现,因为是在网页中显示,指定<font color=’red’></font>就会在网页中显示为红色)。
FirstLucene03ByHighlighter.java:
Java代码
- package com.iflytek.lucene;
- import java.io.File;
- import org.apache.lucene.analysis.Analyzer;
- import org.apache.lucene.analysis.standard.StandardAnalyzer;
- import org.apache.lucene.document.Document;
- import org.apache.lucene.index.IndexReader;
- import org.apache.lucene.index.IndexWriter;
- import org.apache.lucene.index.IndexWriterConfig;
- import org.apache.lucene.queryParser.MultiFieldQueryParser;
- import org.apache.lucene.queryParser.QueryParser;
- import org.apache.lucene.search.Filter;
- import org.apache.lucene.search.IndexSearcher;
- import org.apache.lucene.search.Query;
- import org.apache.lucene.search.ScoreDoc;
- import org.apache.lucene.search.TopDocs;
- import org.apache.lucene.search.highlight.Formatter;
- import org.apache.lucene.search.highlight.Fragmenter;
- import org.apache.lucene.search.highlight.Highlighter;
- import org.apache.lucene.search.highlight.QueryScorer;
- import org.apache.lucene.search.highlight.Scorer;
- import org.apache.lucene.search.highlight.SimpleFragmenter;
- import org.apache.lucene.search.highlight.SimpleHTMLFormatter;
- import org.apache.lucene.store.Directory;
- import org.apache.lucene.store.FSDirectory;
- import org.apache.lucene.store.RAMDirectory;
- import org.apache.lucene.util.Version;
- /**
- * @author xudongwang 2012-2-10
- *
- * Email:xdwangiflytek@gmail.com
- */
- public class FirstLucene03ByHighlighter {
- /**
- * 源文件路径
- */
- private String filePath01 = "F:\\Workspaces\\workspaceSE\\BlogDemo\\luceneDatasource\\HelloLucene01.txt";
- /**
- * 索引路径
- */
- private String indexPath = "F:\\Workspaces\\workspaceSE\\BlogDemo\\luceneIndex";
- /**
- * 分词器,这里我们使用默认的分词器,标准分析器(好几个,但对中文的支持都不好)
- */
- private Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_35);
- private Directory ramDir = null;
- /**
- * 搜索
- *
- * @param queryStr
- * 搜索的关键词
- * @throws Exception
- */
- public void search(String queryStr) throws Exception {
- // 1、把要搜索的文本解析为Query对象
- String[] fields = { "name", "content" };
- QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_35, fields, analyzer);
- Query query = queryParser.parse(queryStr);
- // 2、进行查询
- IndexReader indexReader = IndexReader.open(ramDir);
- IndexSearcher indexSearcher = new IndexSearcher(indexReader);
- Filter filter = null;
- TopDocs topDocs = indexSearcher.search(query, filter, 10000);
- System.out.println("总共有【" + topDocs.totalHits + "】条匹配的结果");// 注意这里的匹配结果是指文档的个数,而不是文档中包含搜索结果的个数
- // 准备高亮器
- Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");
- Scorer scorer = new QueryScorer(query);
- Highlighter highlighter = new Highlighter(formatter, scorer);
- Fragmenter fragmenter = new SimpleFragmenter(10);// 指定10个字符
- highlighter.setTextFragmenter(fragmenter);// 决定是否生成摘要,以及摘要有多长
- // 3、取出数据,并打印结果
- for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
- int docSn = scoreDoc.doc;// 文档内部编号
- Document document = indexSearcher.doc(docSn);// 根据文档编号取出相应的文档
- // 进行高亮处理
- // 返回高亮后的结果,如果当前属性值中没有出现关键字,会返回null
- String highlighterStr = highlighter.getBestFragment(analyzer, "content", document.get("content"));
- if (highlighterStr == null) {
- String content = document.get("content");
- int endIndex = Math.min(20, content.length());
- highlighterStr=content.substring(0, endIndex);//最多前20个字符
- }
- document.getField("content").setValue(highlighterStr);
- File2Document.printDocumentInfo(document);// 打印出文档信息
- }
- }
- /**
- * 优化创建索引,将索引存在在内存和磁盘配合使用
- *
- * @throws Exception
- */
- public void createIndexByYouHua() throws Exception {
- File indexFile = new File(indexPath);
- Directory fsDir = FSDirectory.open(indexFile);
- // 1、启动时,将磁盘中的索引读取到内存中
- ramDir = new RAMDirectory(fsDir);
- IndexWriterConfig ramConf = new IndexWriterConfig(Version.LUCENE_35, analyzer);
- // 运行程序时操作内存中的索引
- IndexWriter ramIndexWriter = new IndexWriter(ramDir, ramConf);
- Document document = File2Document.file2Document(filePath01);
- ramIndexWriter.addDocument(document);
- ramIndexWriter.close();
- // 2、退出时将内存中的索引保存到磁盘中
- IndexWriterConfig fsConf = new IndexWriterConfig(Version.LUCENE_35, analyzer);
- IndexWriter fsIndexWriter = new IndexWriter(fsDir, fsConf);
- fsIndexWriter.addIndexes(ramDir);// 把另外几个索引库中的所有索引数据合并到当前的索引库中
- fsIndexWriter.commit();
- // fsIndexWriter.optimize();//对索引文件进行优化,从而减少IO操作
- fsIndexWriter.forceMerge(1);
- fsIndexWriter.close();
- }
- public static void main(String[] args) throws Exception {
- FirstLucene03ByHighlighter lucene = new FirstLucene03ByHighlighter();
- lucene.createIndexByYouHua();
- lucene.search("iteye");
- }
- }
package com.iflytek.lucene;import java.io.File;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.standard.StandardAnalyzer;import org.apache.lucene.document.Document;import org.apache.lucene.index.IndexReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.queryParser.MultiFieldQueryParser;import org.apache.lucene.queryParser.QueryParser;import org.apache.lucene.search.Filter;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.Query;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import org.apache.lucene.search.highlight.Formatter;import org.apache.lucene.search.highlight.Fragmenter;import org.apache.lucene.search.highlight.Highlighter;import org.apache.lucene.search.highlight.QueryScorer;import org.apache.lucene.search.highlight.Scorer;import org.apache.lucene.search.highlight.SimpleFragmenter;import org.apache.lucene.search.highlight.SimpleHTMLFormatter;import org.apache.lucene.store.Directory;import org.apache.lucene.store.FSDirectory;import org.apache.lucene.store.RAMDirectory;import org.apache.lucene.util.Version;/** * @author xudongwang 2012-2-10 * * Email:xdwangiflytek@gmail.com */public class FirstLucene03ByHighlighter {/** * 源文件路径 */private String filePath01 = "F:\\Workspaces\\workspaceSE\\BlogDemo\\luceneDatasource\\HelloLucene01.txt";/** * 索引路径 */private String indexPath = "F:\\Workspaces\\workspaceSE\\BlogDemo\\luceneIndex";/** * 分词器,这里我们使用默认的分词器,标准分析器(好几个,但对中文的支持都不好) */private Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_35);private Directory ramDir = null;/** * 搜索 * * @param queryStr * 搜索的关键词 * @throws Exception */public void search(String queryStr) throws Exception {// 1、把要搜索的文本解析为Query对象String[] fields = { "name", "content" };QueryParser queryParser = new MultiFieldQueryParser(Version.LUCENE_35, fields, analyzer);Query query = queryParser.parse(queryStr);// 2、进行查询IndexReader indexReader = IndexReader.open(ramDir);IndexSearcher indexSearcher = new IndexSearcher(indexReader);Filter filter = null;TopDocs topDocs = indexSearcher.search(query, filter, 10000);System.out.println("总共有【" + topDocs.totalHits + "】条匹配的结果");// 注意这里的匹配结果是指文档的个数,而不是文档中包含搜索结果的个数// 准备高亮器Formatter formatter = new SimpleHTMLFormatter("<font color='red'>", "</font>");Scorer scorer = new QueryScorer(query);Highlighter highlighter = new Highlighter(formatter, scorer);Fragmenter fragmenter = new SimpleFragmenter(10);// 指定10个字符highlighter.setTextFragmenter(fragmenter);// 决定是否生成摘要,以及摘要有多长// 3、取出数据,并打印结果for (ScoreDoc scoreDoc : topDocs.scoreDocs) {int docSn = scoreDoc.doc;// 文档内部编号Document document = indexSearcher.doc(docSn);// 根据文档编号取出相应的文档// 进行高亮处理// 返回高亮后的结果,如果当前属性值中没有出现关键字,会返回nullString highlighterStr = highlighter.getBestFragment(analyzer, "content", document.get("content"));if (highlighterStr == null) {String content = document.get("content");int endIndex = Math.min(20, content.length());highlighterStr=content.substring(0, endIndex);//最多前20个字符}document.getField("content").setValue(highlighterStr);File2Document.printDocumentInfo(document);// 打印出文档信息}}/** * 优化创建索引,将索引存在在内存和磁盘配合使用 * * @throws Exception */public void createIndexByYouHua() throws Exception {File indexFile = new File(indexPath);Directory fsDir = FSDirectory.open(indexFile);// 1、启动时,将磁盘中的索引读取到内存中ramDir = new RAMDirectory(fsDir);IndexWriterConfig ramConf = new IndexWriterConfig(Version.LUCENE_35, analyzer);// 运行程序时操作内存中的索引IndexWriter ramIndexWriter = new IndexWriter(ramDir, ramConf);Document document = File2Document.file2Document(filePath01);ramIndexWriter.addDocument(document);ramIndexWriter.close();// 2、退出时将内存中的索引保存到磁盘中IndexWriterConfig fsConf = new IndexWriterConfig(Version.LUCENE_35, analyzer);IndexWriter fsIndexWriter = new IndexWriter(fsDir, fsConf);fsIndexWriter.addIndexes(ramDir);// 把另外几个索引库中的所有索引数据合并到当前的索引库中fsIndexWriter.commit();// fsIndexWriter.optimize();//对索引文件进行优化,从而减少IO操作fsIndexWriter.forceMerge(1);fsIndexWriter.close();}public static void main(String[] args) throws Exception {FirstLucene03ByHighlighter lucene = new FirstLucene03ByHighlighter();lucene.createIndexByYouHua();lucene.search("iteye");}}
运行结果:
总共有【1】条匹配的结果
name -->HelloLucene01.txt
content --> in <font color='red'>iteye</font> blog
path -->F:\Workspaces\workspaceSE\BlogDemo\luceneDatasource\HelloLucene01.txt
size -->84
- Lucene05---Highlighter
- Highlighter高亮器
- clucene的highlighter
- Lucene的高亮器Highlighter
- Lucene中的highlighter
- Sublime Text: PeopleCode Syntax Highlighter
- Syntax Highlighter for HTML and VBulletin
- Lucene+HighLighter高亮显示实例
- Code Syntax Highlighter Plugin for TiddlyWiki
- Google Syntax Highlighter for WordPress的使用
- solr Highlighter (高亮)显示分析
- Lucene+HighLighter 搜索关键字高亮显示
- solr Highlighter (高亮)显示分析
- sublime 的Bracket Highlighter插件配置文件
- 修改 highlighter.net-1.4.0 一处Bug
- 修改 lucene Highlighter.net 2.0 版本一处Bug
- lucene-使用Highlighter高亮显示查询项
- jQuery 关键字高亮插件 - jQuery Highlighter v1.0.0 发布
- Http 请求
- 全文检索与Lucene学习
- usleep函数
- 发送arp包获取mac
- ubuntu 32 bit系统下编译android 2.3
- Lucene05---Highlighter
- unix下面kill oracle里面已经处于killed状态的session
- Android多媒体分析(四)AudioManager
- 双链表的实现
- Lucene06---查询
- Mysql复制表结构、表数据
- Commons Pool处理对象池化分析(1)
- onInterceptTouchEvent和onTouchEvent
- linux程序设计笔记12:POSIX线程