lucene4.4 索引的增删改查
来源:互联网 发布:福建省软件评测中心 编辑:程序博客网 时间:2024/06/10 02:48
package com.lucene.test;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Date;
import org.apache.log4j.Logger;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.document.IntField;
import org.apache.lucene.document.StringField;
import org.apache.lucene.document.TextField;
import org.apache.lucene.index.DirectoryReader;
import org.apache.lucene.index.DocsAndPositionsEnum;
import org.apache.lucene.index.Fields;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.MultiFields;
import org.apache.lucene.index.Term;
import org.apache.lucene.index.Terms;
import org.apache.lucene.index.TermsEnum;
import org.apache.lucene.queryparser.classic.ParseException;
import org.apache.lucene.queryparser.classic.QueryParser;
import org.apache.lucene.search.DocIdSetIterator;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.BytesRef;
import org.apache.lucene.util.Version;
publicclass IndexUtil {
privatestaticfinal Logger LOGGER = Logger.getLogger(IndexUtil.class);
private Directorydirectory = null;
private DirectoryReaderreader = null;
private IndexWriterConfigconfig = null;
private IndexWriterwriter = null;
publicstaticfinal IndexUtilInstance =new IndexUtil();
private IndexUtil() {
try {
directory = FSDirectory.open(new File("D:/lucene/index"));
config =new IndexWriterConfig(Version.LUCENE_44,
new StandardAnalyzer(Version.LUCENE_44));
} catch (IOException e) {
e.printStackTrace();
}
}
/**
*添加索引
*@throwsIOException
*/
publicvoid index()throws IOException {
writer =new IndexWriter(directory,config);
File file = new File("D:\\lucene\\example");
Document document = null;
int id = 0;
long start =new Date().getTime();
LOGGER.info("添加索引…………………………");
for (File f : file.listFiles()) {
document = new Document();
document.add(new StringField("name",f.getName(), Store.YES));
document.add(new IntField("id", id++,Store.YES));
document.add(new StringField("path",f.getAbsolutePath(), Store.YES));
document.add(new TextField("context",new FileReader(f)));
writer.addDocument(document);
}
long end =new Date().getTime();
LOGGER.info("添加索引完成,用时:" + (end - start) / 1000.0 +"s…………………………");
writer.close();
}
/**
*查询索引
*@throwsIOException
*@throwsParseException
*/
publicvoid search()throws IOException, ParseException {
reader = DirectoryReader.open(directory);
QueryParser parser = newQueryParser(Version.LUCENE_44,"context",
new StandardAnalyzer(Version.LUCENE_44));
Query query = parser.parse("lucene");
IndexSearcher searcher =new IndexSearcher(reader);
TopDocs docs = searcher.search(query,100);
/**
*reader.maxDoc()包含索引文档的总数包含可用的和已经删除的数量
*reader.numDocs()当前可用的索引文档的数量不包含已经删除的
*reader.numDeletedDocs()删除的索引文档的数量
*/
LOGGER.info("总记录:" + docs.totalHits + " 命中文档数:" + docs.scoreDocs.length
+ " 最大的文档数maxDoc:" +reader.maxDoc() +" 删除文件数numDeletedDocs:"
+ reader.numDeletedDocs() + " numDocs" +reader.numDocs());
for (ScoreDoc doc : docs.scoreDocs) {
Document document = reader.document(doc.doc);
LOGGER.info("id:" +document.get("id") +" name:"
+ document.get("name") +" path:" + document.get("path"));
}
reader.close();
}
/**
*更新索引
*@throwsIOException
*/
publicvoid update()throws IOException {
writer =new IndexWriter(directory,config);
Document document = new Document();
document.add(new StringField("name","新文件", Store.YES));
document.add(new IntField("id", 12, Store.YES));
document.add(new StringField("path","D:\\lucene\\example\\新文件.txt", Store.YES));
writer.updateDocument(new Term("id","2"),document);
writer.commit();
writer.close();
}
/**
*删除索引删除的索引会保存到一个新的文件中(以del为结尾的文件相当于删除到回收站)
*@throwsIOException
*/
publicvoid delete()throws IOException {
writer =new IndexWriter(directory,config);
writer.deleteDocuments(new Term("name","11.txt"));
writer.close();
}
/**
*删除所有的索引删除的索引会保存到一个新的文件中(以del为结尾的文件相当于删除到回收站)
*@throwsIOException
*/
publicvoid deleteAll()throws IOException {
writer =new IndexWriter(directory,config);
writer.deleteAll();
writer.close();
}
/**
*删除已经删除的索引对应上一个删除方法删除回收站的文件
*@throwsIOException
*/
publicvoid forceMergeDeletes()throws IOException {
writer =new IndexWriter(directory,config);
writer.forceMergeDeletes();//清空回收站
writer.close();
}
/**
*显示所有的索引
*@throwsIOException
*/
publicvoid showIndex()throws IOException {
reader = DirectoryReader.open(directory);
Fields fields = MultiFields.getFields(reader);//获取directory中所有的field
for (String field : fields) {
LOGGER.info(field);
}
//显示 field中 context的所有的分词
Terms terms = fields.terms("context");
TermsEnum termsEnum = terms.iterator(null);
BytesRef term = null;
while ((term=termsEnum.next()) !=null) {
System.out.print(term.utf8ToString()+"\t");//分词的内容
System.out.print(termsEnum.docFreq()+"\t");//出现该分词的有文档的数量
System.out.print(termsEnum.totalTermFreq()+"\t");//分词的总数
DocsAndPositionsEnumdocsAndPositionsEnum = termsEnum.docsAndPositions(null,null);
//如果要查询的字段没有被分词,docsAndPositionsEnum就会为空继续循环
if(docsAndPositionsEnum==null){
continue;
}
int docId ;
while ((docId = docsAndPositionsEnum.nextDoc())!= DocIdSetIterator.NO_MORE_DOCS) {
Document document =reader.document(docId);//获取document对象
System.out.print(docId+"\t");//分词的总数
System.out.print(document.get("name")+"\t");//可以获取document中field的值
int freq = docsAndPositionsEnum.freq();//该document中该分词出现的次数
for (int i = 0; i < freq; i++) {
System.out.print(docsAndPositionsEnum.nextPosition()+":");//分词的位置
System.out.print("["+docsAndPositionsEnum.startOffset()+"");//分词起始偏移量的位置
System.out.print(docsAndPositionsEnum.endOffset()+"],");//分词结束偏移量的位置
System.out.print(docsAndPositionsEnum.getPayload()+"\t");
}
}
System.out.println();
}
reader.close();
}
}
- lucene4.4 索引的增删改查
- 全文检索(二)-基于lucene4.10的增删改查
- Lucene4.10使用教程(三):lucene的增删改查
- Lucene4.10使用教程(三):lucene的增删改查
- lucene4.10.2实例(增删改查)
- lucene索引的增删改查
- 02-lucene索引的增删改查
- mysql索引的增删改查
- Lucene4.10使用教程(三):lucene的增删改查(转载)
- Solr索引增删改查
- 增删改查索引库
- lucene索引的增删改查/lucene索引维护
- Lucene4.3开发之增删改查例子
- mysql数据库表字段增删改查及索引的增删改查
- Solr的index索引的增删改查
- lucene索引库的增删改查操作
- 分布式搜索elasticsearch 索引文档的增删改查 入门
- 分布式搜索elasticsearch 索引文档的增删改查 入门
- Javascript定义”类“
- hdu2119 Matrix (最小顶点覆盖)
- 四种主要浏览器内核
- 安装DirectXSDK时提示Error Code s1023
- [poj 1436]Horizontally Visible Segments[线段树]
- lucene4.4 索引的增删改查
- 提高班第二年总结
- oracle 设置
- 百度地图之UI控制
- Lazy Theta*: Faster Any-Angle Path Planning
- poj 3348 包凸求面积
- Sumdiv
- linux核心(kernel)版本与发行(distribution)版本
- java 文件下载