lucene6.2.1测试用例

来源:互联网 发布:万网二手域名交易平台 编辑:程序博客网 时间:2024/06/02 11:57

          最近项目中要用到lucene,就用了最新的版本6.2.1,尽管之前用了4.4的版本,但是在用6.2.1时还是遇到了问题,在用中文分词时,paoding用不了了,是lucene在接口方面做了变动,导致paoding直接就不能用了。

         下面是一个用例,对paoding做了修改,使之与lucene6.2.1能配合调用。


      java测试代码

import java.io.StringReader;import net.paoding.analysis.analyzer.PaodingAnalyzer;import org.apache.lucene.analysis.Analyzer;import org.apache.lucene.analysis.TokenStream;import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;import org.apache.lucene.document.Document;import org.apache.lucene.document.Field.Store;import org.apache.lucene.document.TextField;import org.apache.lucene.index.DirectoryReader;import org.apache.lucene.index.IndexWriter;import org.apache.lucene.index.IndexWriterConfig;import org.apache.lucene.queryparser.classic.QueryParser;import org.apache.lucene.search.IndexSearcher;import org.apache.lucene.search.ScoreDoc;import org.apache.lucene.search.TopDocs;import org.apache.lucene.store.Directory;import org.apache.lucene.store.RAMDirectory;  public class Test {        public static void main(String[] args) throws Exception {          String text="生成analyzer实例  将项目中的dic复制到工程的classpath下,默认配置";          testSplitChinese(text);          System.out.println("==============");          testDemo(text);      }      /**      * 分词测试      */      public static void testSplitChinese(String text) throws Exception{          // 生成analyzer实例 将项目中的dic复制到工程的根下,若修改paoding.dic.home,更换位置          Analyzer analyzer = new PaodingAnalyzer();          // 取得Token流          TokenStream tokenizer = analyzer.tokenStream("text", new StringReader(text));          tokenizer.reset();          // 添加工具类 注意:以下这些与之前lucene2.x版本不同的地方          CharTermAttribute offAtt = (CharTermAttribute) tokenizer.addAttribute(CharTermAttribute.class);          // 循环打印出分词的结果,及分词出现的位置          while (tokenizer.incrementToken()) {              System.out.print(offAtt.toString() + "\t");          }          tokenizer.close();      }            private static Document createDocument(String title, String content) {          Document doc = new Document();          doc.add(new TextField("title", title, Store.YES));          doc.add(new TextField("content", content, Store.YES));          return doc;      }            /**      * lucene简单实例      */      public static void testDemo(String text) throws Exception{          Analyzer analyzer = new PaodingAnalyzer();          Directory idx = new RAMDirectory();          IndexWriterConfig iwc = new IndexWriterConfig(analyzer);          IndexWriter writer = new IndexWriter(idx, iwc);          writer.addDocument(createDocument("维基百科:关于中文维基百科99999999999999999999999999999999999999", "维基百科:关于中文维基百科9999999999999999999999999999999"));          writer.addDocument(createDocument("维基百科:关于中文维基百科99999999999999999999999999999999999999", "维基百科:关于中文维基百科88888888888"));          writer.commit();          writer.close();          IndexSearcher searcher = new IndexSearcher(DirectoryReader.open(idx));          System.out.println("命中个数:"+searcher.search(new QueryParser("title", analyzer).parse("title:'维基'"), 10).totalHits);                  TopDocs topdoc =  searcher.search(new QueryParser("title", analyzer).parse("title:'维基'"), 10);        ScoreDoc[] hits=  topdoc.scoreDocs;                  if(hits!=null && hits.length>0){             for(int i = 0; i < hits.length; i++){               Document hitDoc = searcher.doc(hits[i].doc);              System.out.println(hitDoc.get("content"));                                }                     }                  }  }

pom文件为:

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"  xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">  <modelVersion>4.0.0</modelVersion>  <groupId>com.cn.lucene.search</groupId>  <artifactId>lucene-search</artifactId>  <version>0.0.1-SNAPSHOT</version>  <packaging>jar</packaging>  <name>lucene-search</name>  <url>http://maven.apache.org</url>  <properties>    <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>    <lucene.version>6.2.1</lucene.version>  </properties>  <dependencies>        <dependency>              <groupId>commons-logging</groupId>              <artifactId>commons-logging</artifactId>              <version>1.1.1</version>          </dependency>          <dependency>              <groupId> org.apache.lucene</groupId>              <artifactId>lucene-core</artifactId>              <version> ${lucene.version}</version>          </dependency>          <dependency>              <groupId> org.apache.lucene</groupId>              <artifactId>lucene-analyzers-common</artifactId>              <version> ${lucene.version}</version>          </dependency>          <dependency>              <groupId> org.apache.lucene</groupId>              <artifactId>lucene-queryparser</artifactId>              <version> ${lucene.version}</version>          </dependency>          <dependency>              <groupId> org.apache.lucene</groupId>              <artifactId>lucene-highlighter</artifactId>              <version> ${lucene.version}</version>          </dependency>          <dependency>              <groupId>net.paoding</groupId>              <artifactId>paoding-analysis</artifactId>              <version>4.6.0</version>              <scope>system</scope>              <systemPath>${project.basedir}/lib/paoding-analysis.jar</systemPath>          </dependency>    </dependencies></project>

改良之后的paoding下载地址 http://download.csdn.net/detail/riapgypm/9648301


0 0
原创粉丝点击