Lucene创建索引入门案例
来源:互联网 发布:win7 64 php环境搭建 编辑:程序博客网 时间:2024/06/10 15:52
原:http://blog.csdn.net/zwx19921215/article/details/32936395
最近在学习lucene,参考网上的资料写了一个简单搜索demo;
项目jar包:
//索引关键类
[java] view plain copy
1. <pre name="code" class="java">package com.lucene.index;
2.
3. import java.io.File;
4. import java.io.IOException;
5. import java.io.StringReader;
6. import java.util.ArrayList;
7. import java.util.List;
8.
9. import org.apache.lucene.analysis.Analyzer;
10.import org.apache.lucene.analysis.TokenStream;
11. import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;
12.import org.apache.lucene.document.Document;
13. import org.apache.lucene.document.Field;
14.import org.apache.lucene.index.CorruptIndexException;
15. import org.apache.lucene.index.IndexReader;
16.import org.apache.lucene.index.IndexWriter;
17. import org.apache.lucene.index.IndexWriterConfig;
18.import org.apache.lucene.queryParser.ParseException;
19. import org.apache.lucene.queryParser.QueryParser;
20.import org.apache.lucene.search.IndexSearcher;
21. import org.apache.lucene.search.Query;
22.import org.apache.lucene.search.TopDocs;
23. import org.apache.lucene.store.Directory;
24.import org.apache.lucene.store.FSDirectory;
25. import org.apache.lucene.store.LockObtainFailedException;
26.import org.apache.lucene.util.Version;
27. import org.wltea.analyzer.lucene.IKAnalyzer;
28.
29. import com.lucene.vo.User;
30.
31. /**
32. * * lucene 检索内存索引 非常简单的例子 * * @author Administrator *
33. */
34.public class searchIndex {
35. private String[] ids = { "1", "2", "3", "4", "5", "6" };
36. private String[] emails = { "aa@itat.org", "bb@itat.org", "cc@cc.org", "dd@sina.org", "ee@zttc.edu", "ff@itat.org" };
37. // private String[] contents = { "welcome to visited the space,I like book", "hello boy, I like pingpeng ball", "my name is cc I like game", "I like football",
38.// "I like football and I like basketball too", "I like movie and swim" };
39. private String[] contents = { "创建一个内存目录对象,所以这里生成的索引会放在磁盘中,而不是在内存中", "创建索引写入对象,该对象既可以把索引写入到磁盘中也可以写入到内存中", "分词器,分词器就是将检索的关键字分割成一组组词组, 它是lucene检索查询的一大特色之一", "这个是分词器拆分最大长度,因为各种不同类型的分词器拆分的字符颗粒细化程度不一样,所以需要设置一个最长的拆分长度",
40. "文档对象,在lucene中创建的索引可以看成数据库中的一张表,表中也可以有字段,往里面添加内容之后可以根据字段去匹配查询", "I like movie and swim" };
41. private String[] names = { "zhangsan", "lisi", "john", "jetty", "mike", "jake" };
42. // 创建一个内存目录对象,所以这里生成的索引会放在磁盘中,而不是在内存中。
43. private Directory directory = null;
44. //IK分词器
45. IKAnalyzer analyzer = null;
46. public searchIndex() {
47. try {
48. directory = FSDirectory.open(new File("H:/lucene/index"));
49. analyzer = new IKAnalyzer(true);
50. } catch (IOException e) {
51. // TODO Auto-generated catch block
52. e.printStackTrace();
53. }
54. }
55.
56. public void index() {
57. /*
58. * 创建索引写入对象,该对象既可以把索引写入到磁盘中也可以写入到内存中。
59. */
60. IndexWriter writer;
61. try {
62. writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36, analyzer));
63. //创建之前先删除
64. writer.deleteAll();
65. // 创建Document
66. // 文档对象,在lucene中创建的索引可以看成数据库中的一张表,表中也可以有字段,往里面添加内容之后可以根据字段去匹配查询
67.
68. Document doc =null;
69.
70. for(int i=0;i<ids.length;i++){
71. doc = new Document();
72. doc.add(new Field("id", ids[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
73. doc.add(new Field("email", emails[i], Field.Store.YES, Field.Index.NOT_ANALYZED));
74. doc.add(new Field("content", contents[i], Field.Store.NO, Field.Index.ANALYZED));
75. doc.add(new Field("name", names[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));
76. writer.addDocument(doc);
77. }
78. writer.close();
79. } catch (CorruptIndexException e) {
80. // TODO Auto-generated catch block
81. e.printStackTrace();
82. } catch (LockObtainFailedException e) {
83. // TODO Auto-generated catch block
84. e.printStackTrace();
85. } catch (IOException e) {
86. // TODO Auto-generated catch block
87. e.printStackTrace();
88. }
89. }
90.
91. public List<User> search(String keyword) {
92. long startTime = System.currentTimeMillis();
93. System.out.println("*****************检索开始**********************");
94. List<User> userList = new ArrayList<User>();
95. IndexReader reader;
96. try {
97. reader = IndexReader.open(directory);
98.
99. // 创建IndexSearcher 检索索引的对象,里面要传递上面写入的内存目录对象directory
100. IndexSearcher searcher = new IndexSearcher(reader);
101. // 根据搜索关键字 封装一个term组合对象,然后封装成Query查询对象
102.
103. QueryParser queryParser = new QueryParser(Version.LUCENE_36, "content", analyzer);
104. Query query = queryParser.parse(keyword);
105.
106.
107. // 去索引目录中查询,返回的是TopDocs对象,里面存放的就是上面放的document文档对象
108. TopDocs rs = searcher.search(query, null, 10);
109. long endTime = System.currentTimeMillis();
110. System.out.println("总共花费" + (endTime - startTime) + "毫秒,检索到" + rs.totalHits + "条记录。");
111. User user = null;
112. for (int i = 0; i < rs.scoreDocs.length; i++) {
113. // rs.scoreDocs[i].doc 是获取索引中的标志位id, 从0开始记录
114. Document firstHit = searcher.doc(rs.scoreDocs[i].doc);
115. user = new User();
116. user.setId(Long.parseLong(firstHit.get("id")));
117. user.setName(firstHit.get("name"));
118. user.setSex(firstHit.get("sex"));
119. user.setDosomething(firstHit.get("dosometing"));
120. user.setEmail(firstHit.get("email"));
121. user.setContent(firstHit.get("content"));
122. userList.add(user);
123.
124.// System.out.println("name:" + firstHit.get("name"));
125. // System.out.println("sex:" + firstHit.get("sex"));
126.// System.out.println("dosomething:" + firstHit.get("dosometing"));
127. }
128. reader.close();
129. } catch (CorruptIndexException e1) {
130. // TODO Auto-generated catch block
131. e1.printStackTrace();
132. } catch (IOException e1) {
133. // TODO Auto-generated catch block
134. e1.printStackTrace();
135. } catch (ParseException e) {
136. // TODO Auto-generated catch block
137. e.printStackTrace();
138. }
139.
140. System.out.println("*****************检索结束**********************");
141. return userList;
142. }
143.
144.}
[java] view plain copy
1. package com.lucene;
2.
3. import java.io.IOException;
4. import java.util.List;
5.
6. import javax.servlet.ServletException;
7. import javax.servlet.http.HttpServlet;
8. import javax.servlet.http.HttpServletRequest;
9. import javax.servlet.http.HttpServletResponse;
10.
11. import com.lucene.index.searchIndex;
12.import com.lucene.vo.User;
13.
14./**
15. * Servlet implementation class searchServlet
16. */
17. public class searchServlet extends HttpServlet {
18. private static final long serialVersionUID = 1L;
19.
20. /**
21. * Default constructor.
22. */
23. public searchServlet() {
24. // TODO Auto-generated constructor stub
25. }
26.
27. /**
28. * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response)
29. */
30. protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
31. // TODO Auto-generated method stub
32. }
33.
34. /**
35. * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response)
36. */
37. protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {
38. request.setCharacterEncoding("UTF-8");
39. String keyword = request.getParameter("keyword");
40. if("".equals(keyword)){
41. keyword="0";
42. }
43. searchIndex si = new searchIndex();
44. si.index();
45. List<User> userList = si.search(keyword);
46. request.setAttribute("userList", userList);
47. request.getRequestDispatcher("search.jsp").forward(request, response);
48. }
49.
50.}
[java] view plain copy
1. package com.lucene.vo;
2.
3. public class User {
4. private Long id;
5. private String name;
6. private String sex;
7. private String dosomething;
8. private String email;
9. private String content;
10.
11. public Long getId() {
12. return id;
13. }
14. public void setId(Long id) {
15. this.id = id;
16. }
17. public String getName() {
18. return name;
19. }
20. public void setName(String name) {
21. this.name = name;
22. }
23. public String getSex() {
24. return sex;
25. }
26. public void setSex(String sex) {
27. this.sex = sex;
28. }
29. public String getDosomething() {
30. return dosomething;
31. }
32. public void setDosomething(String dosomething) {
33. this.dosomething = dosomething;
34. }
35. public String getEmail() {
36. return email;
37. }
38. public void setEmail(String email) {
39. this.email = email;
40. }
41. public String getContent() {
42. return content;
43. }
44. public void setContent(String content) {
45. this.content = content;
46. }
47.
48.}
[html] view plain copy
1. <%@ page language="java" contentType="text/html; charset=UTF-8" pageEncoding="UTF-8"%>
2. <%@taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%>
3. <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">
4. <html>
5. <head>
6. <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">
7. <title>lucene 全文检索</title>
8. </head>
9. <body style="text-align: center;">
10. <form action="searchServlet.do" method="post">
11. <input type="text" name="keyword" /> <input type="submit" value="搜索" />
12. </form>
13. <div style="height: 10px">
14. </div>
15. <c:if test="${not empty userList}">
16. <div>相关信息:</div>
17. <table border="1" align="center">
18. <tr>
19. <td>ID</td>
20. <td>姓名</td>
21. <td>性别</td>
22. <td>邮箱</td>
23. <td>爱好</td>
24. <td>正文</td>
25. </tr>
26. <c:forEach items="${ userList}" var="user">
27. <tr>
28. <td>${user.id }</td>
29. <td>${user.name }</td>
30. <td>${user.sex }</td>
31. <td>${user.email }</td>
32. <td>${user.dosomething }</td>
33. <td>${user.content }</td>
34. </tr>
35. </c:forEach>
36. </table>
37. </c:if>
38.</body>
39. </html>
代码测试:
- Lucene创建索引入门案例
- Lucene创建索引入门案例
- Lucene创建索引入门
- Lucene入门之创建索引
- 基于lucene的案例开发:创建索引
- lucene索引创建与查询入门例子
- Lucene 入门之创建索引和搜索
- Lucene的入门例子 - 创建索引,利用索引查询
- 全文检索Lucene入门之创建索引及简单搜索
- lucene学习----创建索引
- lucene创建索引
- Java_ABC_3.Lucene创建索引
- Lucene 索引创建
- LUCENE创建索引【鸡蛋】
- Lucene 索引创建
- lucene--创建索引,搜索
- lucene创建索引
- Lucene 4.4 创建索引
- cache源码分析二 读写逻辑分析
- IllegalArgumentException: Unable to locate adbAndroid
- 为什么算法渐进复杂度中对数的底数总为2
- SSH登陆慢的问题
- css3表达式得到焦点即失去焦点
- Lucene创建索引入门案例
- eventsystem源码分析 多线程框架
- hiho第二十四周
- cache源码分析三 evacuate机制的实现
- Android利用tcpdump和wireshark抓取网络数据包
- cache源码分析四 初始化与元数据同步
- 好记性不如烂笔头
- Web前端开发面试题赋答案
- a different object with the same identifier value was already associated with the session