Lucene七(搜索)

来源:互联网 发布:js 特殊字符校验 编辑:程序博客网 时间:2024/06/10 20:22

通过代码详细讲解Lucene3.5的各种搜索功能,包括TermQuery精确匹配,TermRangeQuery范围匹配,NumericRangeQuery数字类型的范围匹配,PrefixQuery按前缀匹配,WildcardQuery通配符匹配,BooleanQuery连接多个条件匹配,PhraseQuery短语匹配,FuzzyQuery模糊匹配,最后讲解了通过查询解析器QueryParser来查询,这种方式几乎能实现前面的所有查询,是最强大的查询方式,也是项目中最常用的。所有代码和测试如下:

package cn.liuys.lucene.index;


import java.io.IOException;
import java.text.ParseException;
import java.text.SimpleDateFormat;
import java.util.Date;
import java.util.HashMap;
import java.util.Map;


import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.NumericField;
import org.apache.lucene.index.CorruptIndexException;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause.Occur;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.FuzzyQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.NumericRangeQuery;
import org.apache.lucene.search.PhraseQuery;
import org.apache.lucene.search.PrefixQuery;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TermRangeQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.LockObtainFailedException;
import org.apache.lucene.store.RAMDirectory;
import org.apache.lucene.util.Version;


public class SearcherUtil {

private String[] ids = {"1","2","3","4","5","6"};
private String[] emails = {"aa@itat.org","bb@itat.org","cc@cc.org","dd@sina.org","ee@zttc.edu","ff@itat.org"};
private String[] contents = {
"welcome to visited the space,I like book",
"hello boy, I like pingpeng ball",
"my name is cc I like game",
"I like football",
"I like football and I like basketball too",
"I like movie and swim"
};
private Date[] dates = null;
private int[] attachs = {2,3,1,4,5,5};
private String[] names = {"zhangsan","lisi","john","jetty","mike","jake"};
//用于存放加权信息
private Map<String,Float> scores = new HashMap<String,Float>();


private Directory directory;
private IndexReader reader;

public SearcherUtil() {
directory = new RAMDirectory();
setDates();
index();
}

private void setDates() {
SimpleDateFormat sdf = new SimpleDateFormat("yyyy-MM-dd");
try {
dates = new Date[ids.length];
dates[0] = sdf.parse("2010-02-19");
dates[1] = sdf.parse("2012-01-11");
dates[2] = sdf.parse("2011-09-19");
dates[3] = sdf.parse("2010-12-22");
dates[4] = sdf.parse("2012-01-01");
dates[5] = sdf.parse("2011-05-19");
} catch (ParseException e) {
e.printStackTrace();
}
}

/**
* 建立索引
*/
public void index(){
IndexWriter writer = null;
try {
writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_35, new StandardAnalyzer(Version.LUCENE_35)));
//创建前先删除索引
writer.deleteAll();
Document doc = null;
for(int i = 0; i < ids.length; i++){
doc = new Document();
doc.add(new Field("id", ids[i], Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
doc.add(new Field("email",emails[i],Field.Store.YES,Field.Index.NOT_ANALYZED));
doc.add(new Field("content",contents[i],Field.Store.NO,Field.Index.ANALYZED));
doc.add(new Field("name",names[i],Field.Store.YES,Field.Index.NOT_ANALYZED_NO_NORMS));
//存储数字   第三个参数表示是否索引
doc.add(new NumericField("attach",Field.Store.YES,true).setIntValue(attachs[i]));
//存储日期
doc.add(new NumericField("date",Field.Store.YES,true).setLongValue(dates[i].getTime()));
String et = emails[i].substring(emails[i].lastIndexOf("@")+1);
//加权操作。默认为1.0f
if(scores.containsKey(et)) {
doc.setBoost(scores.get(et));
} else {
doc.setBoost(0.5f);
}
writer.addDocument(doc);
}
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (LockObtainFailedException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally{
try {
if(writer != null) writer.close();
} catch (CorruptIndexException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
}
}
}

public IndexSearcher getSearcher(){
try {
if(reader == null){
reader = IndexReader.open(directory);
}else{
IndexReader tr = IndexReader.openIfChanged(reader);
if(tr != null){
reader.close();
reader = tr;
}
}
return new IndexSearcher(reader);
} catch (IOException e) {
e.printStackTrace();
}
return null;
}

/**
* @param field
* @param name
* @param num
* 精确匹配
*/
public void searchByTerm(String field,String name,int num){
try {
IndexSearcher searcher = getSearcher();
Query query = new TermQuery(new Term(field, name));
TopDocs tds = searcher.search(query, num);
System.out.println("总共查询到:"+tds.totalHits);
for(ScoreDoc sd : tds.scoreDocs){
Document doc = searcher.doc(sd.doc);
System.out.println(doc.get("id")+"---->"+
doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+
doc.get("attach")+","+doc.get("date"));
}
searcher.close();
} catch (IOException e) {
e.printStackTrace();
}

}

/**
* @param field
* @param start
* @param end
* @param num
* 范围匹配
*/
public void searchByTermRange(String field,String start,String end,int num){
try {
IndexSearcher searcher = getSearcher();
//最后两个参数分别表示是否包含开始和结尾
Query query = new TermRangeQuery(field, start, end, true, true);
TopDocs tds = searcher.search(query, num);
System.out.println("总共查询到:"+tds.totalHits);
for(ScoreDoc sd : tds.scoreDocs){
Document doc = searcher.doc(sd.doc);
System.out.println(doc.get("id")+"---->"+
doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+
doc.get("attach")+","+doc.get("date"));
}
searcher.close();
} catch (IOException e) {
e.printStackTrace();
}

}

/**
* @param field
* @param start
* @param end
* @param num
* 数字类型Field的范围匹配
*/
public void searchByNumericRange(String field,int start,int end,int num){
try {
IndexSearcher searcher = getSearcher();
Query query = NumericRangeQuery.newIntRange(field, start, end, true, true);
TopDocs tds = searcher.search(query, num);
System.out.println("总共查询到:"+tds.totalHits);
for(ScoreDoc sd : tds.scoreDocs){
Document doc = searcher.doc(sd.doc);
System.out.println(doc.get("id")+"---->"+
doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+
doc.get("attach")+","+doc.get("date"));
}
searcher.close();
} catch (IOException e) {
e.printStackTrace();
}

}

/**
* @param field
* @param value
* @param num
* 按前缀搜索
*/
public void searchByPrefix(String field,String value,int num){
try {
IndexSearcher searcher = getSearcher();
Query query = new PrefixQuery(new Term(field, value));
TopDocs tds = searcher.search(query, num);
System.out.println("总共查询到:"+tds.totalHits);
for(ScoreDoc sd : tds.scoreDocs){
Document doc = searcher.doc(sd.doc);
System.out.println(doc.get("id")+"---->"+
doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+
doc.get("attach")+","+doc.get("date"));
}
searcher.close();
} catch (IOException e) {
e.printStackTrace();
}

}

/**
* @param field
* @param value
* @param num
* 通配符搜索
*/
public void searchByWildcard(String field,String value,int num){
try {
IndexSearcher searcher = getSearcher();
Query query = new WildcardQuery(new Term(field, value));
TopDocs tds = searcher.search(query, num);
System.out.println("总共查询到:"+tds.totalHits);
for(ScoreDoc sd : tds.scoreDocs){
Document doc = searcher.doc(sd.doc);
System.out.println(doc.get("id")+"---->"+
doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+
doc.get("attach")+","+doc.get("date"));
}
searcher.close();
} catch (IOException e) {
e.printStackTrace();
}

}

/**
* @param num
* 连接多个条件查询
*/
public void searchByBoolean(int num){
try {
IndexSearcher searcher = getSearcher();
BooleanQuery query = new BooleanQuery();
//通过add方法连接条件
query.add(new TermQuery(new Term("name", "mike")), Occur.MUST_NOT);
query.add(NumericRangeQuery.newIntRange("attach", 4, 5, false, true), Occur.MUST);
TopDocs tds = searcher.search(query, num);
System.out.println("总共查询到:"+tds.totalHits);
for(ScoreDoc sd : tds.scoreDocs){
Document doc = searcher.doc(sd.doc);
System.out.println(doc.get("id")+"---->"+
doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+
doc.get("attach")+","+doc.get("date"));
}
searcher.close();
} catch (IOException e) {
e.printStackTrace();
}

}

/**
* @param num
* 短语查询,不适用于中文
*/
public void searchByPhrase(int num){
try {
IndexSearcher searcher = getSearcher();
PhraseQuery query = new PhraseQuery();
//设置跳数
query.setSlop(1);
//连接短语
query.add(new Term("content", "i"));
//产生距离之后的第二个term
query.add(new Term("content", "football"));
TopDocs tds = searcher.search(query, num);
System.out.println("总共查询到:"+tds.totalHits);
for(ScoreDoc sd : tds.scoreDocs){
Document doc = searcher.doc(sd.doc);
System.out.println(doc.get("id")+"---->"+
doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+
doc.get("attach")+","+doc.get("date"));
}
searcher.close();
} catch (IOException e) {
e.printStackTrace();
}

}


/**
* @param num
* 模糊查询
*/
public void searchByFuzzy(int num){
try {
IndexSearcher searcher = getSearcher();
//可以看到mike和jack都被查询出来了
Query query = new FuzzyQuery(new Term("name", "make"));
TopDocs tds = searcher.search(query, num);
System.out.println("总共查询到:"+tds.totalHits);
for(ScoreDoc sd : tds.scoreDocs){
Document doc = searcher.doc(sd.doc);
System.out.println(doc.get("id")+"---->"+
doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+
doc.get("attach")+","+doc.get("date"));
}
searcher.close();
} catch (IOException e) {
e.printStackTrace();
}

}


/**
* @param query
* @param num
* 通过查询解析器查询,功能强大使用最多
*/
public void searchByQueryParser(Query query, int num){
try {
IndexSearcher searcher = getSearcher();
TopDocs tds = searcher.search(query, num);
System.out.println("总共查询到:"+tds.totalHits);
for(ScoreDoc sd : tds.scoreDocs){
Document doc = searcher.doc(sd.doc);
System.out.println(doc.get("id")+"---->"+
doc.get("name")+"["+doc.get("email")+"]-->"+doc.get("id")+","+
doc.get("attach")+","+doc.get("date"));
}
searcher.close();
} catch (IOException e) {
e.printStackTrace();
}

}
}




package cn.liuys.test;


import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.queryParser.ParseException;
import org.apache.lucene.queryParser.QueryParser;
import org.apache.lucene.queryParser.QueryParser.Operator;
import org.apache.lucene.search.Query;
import org.apache.lucene.util.Version;
import org.junit.Before;
import org.junit.Test;


import cn.liuys.lucene.index.SearcherUtil;


public class TestSearch {
private SearcherUtil su;

@Before
public void init(){
su = new SearcherUtil();
}


@Test
public void testSearchByTerm(){
su.searchByTerm("name", "zhangsan", 10);
}

@Test
public void testSearchByTermRange(){
su.searchByTermRange("id","1","3",10);
//数字类型的field使用TermRangeQuery查询不出来
su.searchByTermRange("attach", "1", "10", 5);
}

@Test
public void testSearchByNumericRange(){
su.searchByNumericRange("attach", 2, 10, 10);
}

@Test
public void testSearchByPrefix(){
su.searchByPrefix("name", "j", 10);
su.searchByPrefix("content", "s", 10);
}

@Test
public void testSearchByWildcard(){
su.searchByWildcard("email", "*it??.org", 10);
}

@Test
public void testSearchByBoolean(){
su.searchByBoolean(10);
}

@Test
public void testSearchByPhrase(){
su.searchByPhrase(10);
}

@Test
public void testSearchByFuzzy(){
su.searchByFuzzy(10);
}

@Test
public void testSearchByQueryParser() throws ParseException{
QueryParser parser = new QueryParser(Version.LUCENE_35, "content", new StandardAnalyzer(Version.LUCENE_35));
//搜索content中包含like的
Query query = parser.parse("like");
su.searchByQueryParser(query, 10);
System.out.println("=====================================");
//搜索content中包含I或者football的,空格的默认操作符是OR,可以通过parser修改
//parser.setDefaultOperator(Operator.AND);
query = parser.parse("I football");
su.searchByQueryParser(query, 10);
System.out.println("=====================================");
//搜索content中包含I和football的
query = parser.parse("I AND football");
su.searchByQueryParser(query, 10);
System.out.println("=====================================");
//改变默认搜索域
query = parser.parse("name:j*");
su.searchByQueryParser(query, 10);
System.out.println("=====================================");
//通配符默认不允许放在首位匹配,消耗大,需要允许:
parser.setAllowLeadingWildcard(true);
query = parser.parse("email:*@it??.org");
su.searchByQueryParser(query, 10);
System.out.println("=====================================");
//name中不能有mike但是content中有football并且email包含dd的
query = parser.parse("-name:mike +football +email:dd*");
su.searchByQueryParser(query, 10);
System.out.println("=====================================");
//id为1-3的,开区间
query = parser.parse("id:[1 TO 3]");
su.searchByQueryParser(query, 10);
System.out.println("=====================================");
//闭区间
query = parser.parse("id:{1 TO 3}");
su.searchByQueryParser(query, 10);
System.out.println("=====================================");
//完全匹配字符
query = parser.parse("\"I like football\"");
su.searchByQueryParser(query, 10);
System.out.println("=====================================");
//匹配i和football之间有1个单词的
query = parser.parse("\"I football\"~1");
su.searchByQueryParser(query, 10);
System.out.println("=====================================");
//模糊查询
query = parser.parse("name:make~");
su.searchByQueryParser(query, 10);
System.out.println("=====================================");
//不能匹配数字范围,需要自定义parser
query = parser.parse("attach:[2 TO 6]");
su.searchByQueryParser(query, 10);
System.out.println("=====================================");
}
}

0 0
原创粉丝点击