Lucene创建索引入门案例

来源：互联网发布：win7 64 php环境搭建编辑：程序博客网时间：2024/06/10 15:52

原：http://blog.csdn.net/zwx19921215/article/details/32936395

最近在学习lucene，参考网上的资料写了一个简单搜索demo；

项目jar包：

//索引关键类

[java] view plain copy

1. <pre name="code" class="java">package com.lucene.index;

3. import java.io.File;

4. import java.io.IOException;

5. import java.io.StringReader;

6. import java.util.ArrayList;

7. import java.util.List;

9. import org.apache.lucene.analysis.Analyzer;

10.import org.apache.lucene.analysis.TokenStream;

11. import org.apache.lucene.analysis.tokenattributes.CharTermAttribute;

12.import org.apache.lucene.document.Document;

13. import org.apache.lucene.document.Field;

14.import org.apache.lucene.index.CorruptIndexException;

15. import org.apache.lucene.index.IndexReader;

16.import org.apache.lucene.index.IndexWriter;

17. import org.apache.lucene.index.IndexWriterConfig;

18.import org.apache.lucene.queryParser.ParseException;

19. import org.apache.lucene.queryParser.QueryParser;

20.import org.apache.lucene.search.IndexSearcher;

21. import org.apache.lucene.search.Query;

22.import org.apache.lucene.search.TopDocs;

23. import org.apache.lucene.store.Directory;

24.import org.apache.lucene.store.FSDirectory;

25. import org.apache.lucene.store.LockObtainFailedException;

26.import org.apache.lucene.util.Version;

27. import org.wltea.analyzer.lucene.IKAnalyzer;

28.

29. import com.lucene.vo.User;

30.

31. /**

32. * * lucene 检索内存索引非常简单的例子 * * @author Administrator *

33. */

34.public class searchIndex {

35. private String[] ids = { "1", "2", "3", "4", "5", "6" };

36. private String[] emails = { "aa@itat.org", "bb@itat.org", "cc@cc.org", "dd@sina.org", "ee@zttc.edu", "ff@itat.org" };

37. // private String[] contents = { "welcome to visited the space,I like book", "hello boy, I like pingpeng ball", "my name is cc I like game", "I like football",

38.// "I like football and I like basketball too", "I like movie and swim" };

39. private String[] contents = { "创建一个内存目录对象，所以这里生成的索引会放在磁盘中，而不是在内存中", "创建索引写入对象，该对象既可以把索引写入到磁盘中也可以写入到内存中", "分词器，分词器就是将检索的关键字分割成一组组词组，它是lucene检索查询的一大特色之一", "这个是分词器拆分最大长度，因为各种不同类型的分词器拆分的字符颗粒细化程度不一样，所以需要设置一个最长的拆分长度",

40. "文档对象，在lucene中创建的索引可以看成数据库中的一张表，表中也可以有字段,往里面添加内容之后可以根据字段去匹配查询", "I like movie and swim" };

41. private String[] names = { "zhangsan", "lisi", "john", "jetty", "mike", "jake" };

42. // 创建一个内存目录对象，所以这里生成的索引会放在磁盘中，而不是在内存中。

43. private Directory directory = null;

44. //IK分词器

45. IKAnalyzer analyzer = null;

46. public searchIndex() {

47. try {

48. directory = FSDirectory.open(new File("H:/lucene/index"));

49. analyzer = new IKAnalyzer(true);

50. } catch (IOException e) {

51. // TODO Auto-generated catch block

52. e.printStackTrace();

53. }

54. }

55.

56. public void index() {

57. /*

58. * 创建索引写入对象，该对象既可以把索引写入到磁盘中也可以写入到内存中。

59. */

60. IndexWriter writer;

61. try {

62. writer = new IndexWriter(directory, new IndexWriterConfig(Version.LUCENE_36, analyzer));

63. //创建之前先删除

64. writer.deleteAll();

65. // 创建Document

66. // 文档对象，在lucene中创建的索引可以看成数据库中的一张表，表中也可以有字段,往里面添加内容之后可以根据字段去匹配查询

67.

68. Document doc =null;

69.

70. for(int i=0;i<ids.length;i++){

71. doc = new Document();

72. doc.add(new Field("id", ids[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));

73. doc.add(new Field("email", emails[i], Field.Store.YES, Field.Index.NOT_ANALYZED));

74. doc.add(new Field("content", contents[i], Field.Store.NO, Field.Index.ANALYZED));

75. doc.add(new Field("name", names[i], Field.Store.YES, Field.Index.NOT_ANALYZED_NO_NORMS));

76. writer.addDocument(doc);

77. }

78. writer.close();

79. } catch (CorruptIndexException e) {

80. // TODO Auto-generated catch block

81. e.printStackTrace();

82. } catch (LockObtainFailedException e) {

83. // TODO Auto-generated catch block

84. e.printStackTrace();

85. } catch (IOException e) {

86. // TODO Auto-generated catch block

87. e.printStackTrace();

88. }

89. }

90.

91. public List<User> search(String keyword) {

92. long startTime = System.currentTimeMillis();

93. System.out.println("*****************检索开始**********************");

94. List<User> userList = new ArrayList<User>();

95. IndexReader reader;

96. try {

97. reader = IndexReader.open(directory);

98.

99. // 创建IndexSearcher 检索索引的对象，里面要传递上面写入的内存目录对象directory

100. IndexSearcher searcher = new IndexSearcher(reader);

101. // 根据搜索关键字封装一个term组合对象，然后封装成Query查询对象

102.

103. QueryParser queryParser = new QueryParser(Version.LUCENE_36, "content", analyzer);

104. Query query = queryParser.parse(keyword);

105.

106.

107. // 去索引目录中查询，返回的是TopDocs对象，里面存放的就是上面放的document文档对象

108. TopDocs rs = searcher.search(query, null, 10);

109. long endTime = System.currentTimeMillis();

110. System.out.println("总共花费" + (endTime - startTime) + "毫秒，检索到" + rs.totalHits + "条记录。");

111. User user = null;

112. for (int i = 0; i < rs.scoreDocs.length; i++) {

113. // rs.scoreDocs[i].doc 是获取索引中的标志位id, 从0开始记录

114. Document firstHit = searcher.doc(rs.scoreDocs[i].doc);

115. user = new User();

116. user.setId(Long.parseLong(firstHit.get("id")));

117. user.setName(firstHit.get("name"));

118. user.setSex(firstHit.get("sex"));

119. user.setDosomething(firstHit.get("dosometing"));

120. user.setEmail(firstHit.get("email"));

121. user.setContent(firstHit.get("content"));

122. userList.add(user);

123.

124.// System.out.println("name:" + firstHit.get("name"));

125. // System.out.println("sex:" + firstHit.get("sex"));

126.// System.out.println("dosomething:" + firstHit.get("dosometing"));

127. }

128. reader.close();

129. } catch (CorruptIndexException e1) {

130. // TODO Auto-generated catch block

131. e1.printStackTrace();

132. } catch (IOException e1) {

133. // TODO Auto-generated catch block

134. e1.printStackTrace();

135. } catch (ParseException e) {

136. // TODO Auto-generated catch block

137. e.printStackTrace();

138. }

139.

140. System.out.println("*****************检索结束**********************");

141. return userList;

142. }

143.

144.}

[java] view plain copy

1. package com.lucene;

3. import java.io.IOException;

4. import java.util.List;

6. import javax.servlet.ServletException;

7. import javax.servlet.http.HttpServlet;

8. import javax.servlet.http.HttpServletRequest;

9. import javax.servlet.http.HttpServletResponse;

10.

11. import com.lucene.index.searchIndex;

12.import com.lucene.vo.User;

13.

14./**

15. * Servlet implementation class searchServlet

16. */

17. public class searchServlet extends HttpServlet {

18. private static final long serialVersionUID = 1L;

19.

20. /**

21. * Default constructor.

22. */

23. public searchServlet() {

24. // TODO Auto-generated constructor stub

25. }

26.

27. /**

28. * @see HttpServlet#doGet(HttpServletRequest request, HttpServletResponse response)

29. */

30. protected void doGet(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {

31. // TODO Auto-generated method stub

32. }

33.

34. /**

35. * @see HttpServlet#doPost(HttpServletRequest request, HttpServletResponse response)

36. */

37. protected void doPost(HttpServletRequest request, HttpServletResponse response) throws ServletException, IOException {

38. request.setCharacterEncoding("UTF-8");

39. String keyword = request.getParameter("keyword");

40. if("".equals(keyword)){

41. keyword="0";

42. }

43. searchIndex si = new searchIndex();

44. si.index();

45. List<User> userList = si.search(keyword);

46. request.setAttribute("userList", userList);

47. request.getRequestDispatcher("search.jsp").forward(request, response);

48. }

49.

50.}

[java] view plain copy

1. package com.lucene.vo;

3. public class User {

4. private Long id;

5. private String name;

6. private String sex;

7. private String dosomething;

8. private String email;

9. private String content;

10.

11. public Long getId() {

12. return id;

13. }

14. public void setId(Long id) {

15. this.id = id;

16. }

17. public String getName() {

18. return name;

19. }

20. public void setName(String name) {

21. this.name = name;

22. }

23. public String getSex() {

24. return sex;

25. }

26. public void setSex(String sex) {

27. this.sex = sex;

28. }

29. public String getDosomething() {

30. return dosomething;

31. }

32. public void setDosomething(String dosomething) {

33. this.dosomething = dosomething;

34. }

35. public String getEmail() {

36. return email;

37. }

38. public void setEmail(String email) {

39. this.email = email;

40. }

41. public String getContent() {

42. return content;

43. }

44. public void setContent(String content) {

45. this.content = content;

46. }

47.

48.}

[html] view plain copy

1. <%@ page language="java" contentType="text/html; charset=UTF-8" pageEncoding="UTF-8"%>

2. <%@taglib uri="http://java.sun.com/jsp/jstl/core" prefix="c"%>

3. <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3.org/TR/html4/loose.dtd">

4. <html>

5. <head>

6. <meta http-equiv="Content-Type" content="text/html; charset=ISO-8859-1">

7. <title>lucene 全文检索</title>

8. </head>

9. <body style="text-align: center;">

10. <form action="searchServlet.do" method="post">

11. <input type="text" name="keyword" /> <input type="submit" value="搜索" />

12. </form>

13. <div style="height: 10px">

14. </div>

15. <c:if test="${not empty userList}">

16. <div>相关信息：</div>

17. <table border="1" align="center">

18. <tr>

19. <td>ID</td>

20. <td>姓名</td>

21. <td>性别</td>

22. <td>邮箱</td>

23. <td>爱好</td>

24. <td>正文</td>

25. </tr>

26. <c:forEach items="${ userList}" var="user">

27. <tr>

28. <td>${user.id }</td>

29. <td>${user.name }</td>

30. <td>${user.sex }</td>

31. <td>${user.email }</td>

32. <td>${user.dosomething }</td>

33. <td>${user.content }</td>

34. </tr>

35. </c:forEach>

36. </table>

37. </c:if>

38.</body>

39. </html>

代码测试：

0 0