以前做的都是一些应用系统,这次还是第一次接触搜索引擎开发的问题,这个搜索引擎我用的是比较通用的Lucence实现,自己也不是多懂,反正是马马虎虎弄出来了,欢迎高手丢砖。上代码:

1.加lucence核心jar包(我这里用的是Maven管理项目的,所以截图吧,lucence用的是2.4.0)

 

ecshop项目中用到的Lucence搜索引擎例子(多字段,全模糊,关键字高亮)

 

2.写lucence工具类:

import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.document.Field.Index;
import org.apache.lucene.document.Field.Store;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.Term;
import org.apache.lucene.queryParser.MultiFieldQueryParser;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.Filter;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.Query;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.Searcher;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.search.WildcardQuery;

public class Digest {
  private static String indexPath = null;


       /**
  * 构造lucence的document集合,将数据以document对象写入lucence 索引库目录
  *
  * @param rs
  * @return
  * @throws SQLException
  */
 public static void goodsListToLucence(List<Goods> goodslist,//这个goodslist是从数据库里查询出来的Model的列表,也就是要放入索引库的数据(可以随便弄)
   ServletContext context) {
  indexPath = context.getRealPath("") + "/lucence/";
  /* lucence 索引库目录 */
  File fileDir = new File(indexPath);
  if (!fileDir.exists()) {
   fileDir.mkdirs();
  }

  List<Document> list = new ArrayList<Document>();
  Document doc = null;
              //将数据便利放入Document中,并创建List<Document>
  for (Goods goods : goodslist) {
   doc = new Document();
   doc.add(new Field("id", goods.getId() + "", Store.COMPRESS,
     Index.ANALYZED));  
   doc.add(new Field("code", goods.getCode(), Store.COMPRESS,
     Index.ANALYZED));
   doc.add(new Field("goodstitle", goods.getGoodstitle(),
     Store.COMPRESS, Index.ANALYZED));   
   list.add(doc);
  }

           /**
            *上面的都是在构造Document的list数据,因为Lucence好像只会去解析Document
            *下面的才是真正的重头戏把数据写入lucence 索引库目录
            *
           */
  IndexWriter indexWriter;
  try {
   indexWriter = new IndexWriter(indexPath, new StandardAnalyzer(), true,
     IndexWriter.MaxFieldLength.UNLIMITED);
   for (Document lucendoc : list) {
    indexWriter.addDocument(lucendoc);
   }
   // optimize()方法是对索引进行优化
   indexWriter.optimize();
   indexWriter.close();
  } catch (Exception e) {
   e.printStackTrace();
  }
 }


 /**
  * 测试 写入lucence和从lucence查询含有关键词的数据,并将关键词高亮显示
  * (这里搜索的是goods表中的goodstitle和code,也就是拿着关键词到这两个字段里的数据去找,其中goodstitle要求对其关键字全模糊并且关键字高亮(这里高亮我没有用那个什么highar插件,自己随便写的字符串截取的方式实现的))
  * 这里List<goods>就胡乱写些数据
  *
  */
         public static void main(String args[]) {
            /*
             *测试把数据写入lucence索引库目录
             *
            */
          List<Goods> goodsList = new ArrayList<Goods>();
          Goods goods = new Goods();
          goods.setId(1);goods.setGoodstitle("sdsdsdsddsdsds");goods.setCode("1233code");
          goodsList.add(goods);
          goodsListToLucence(goodsList,ServletContext context);//第二个数据是lucence索引的目录路径(写入完成)
          /*
             *测试把数据从lucence索引库目录里根据关键字拿出来
             *
            */
            String[] fields={"code", "goodstitle"};//要参与关键字查询的字段
            String querie="关键字";
            BooleanClause.Occur[] clauses = { BooleanClause.Occur.SHOULD,BooleanClause.Occur.SHOULD};
            Query query;
       try {
         Query query = MultiFieldQueryParser.parse(querie, fields, clauses, new StandardAnalyzer());//多字段查询
   Query mquery = new WildcardQuery(new Term("goodstitle", "*"+querie+"*"));//需要进行模糊查询的字段
   BooleanQuery bquery = new BooleanQuery();//构造多重符合查询方式的query
   bquery.add(query, BooleanClause.Occur.SHOULD);//SHOULD或的意思
   bquery.add(mquery, BooleanClause.Occur.SHOULD);
   Searcher searcher = new IndexSearcher(indexPath);
   Filter filter = null;
   TopDocs topDocs = searcher.search(query, filter, 10);   

Goods goods2 = null;
   // 打印结果
   for (ScoreDoc scoreDoc : topDocs.scoreDocs) {
                          Document document = indexSearcher.doc(scoreDoc.doc); // 根据编号取出相应的文档
    goods2 = new Goods();
    goods2.setId(Long.parseLong(document.get("id")));
    if(document.get("goodsId") !=null && !document.get("goodsId").equals("")) {
     goods2.setGoodsId(Long.parseLong(document.get("goodsId")));
    }
    if(document.get("code") !=null && !document.get("code").equals("")) {
     goods2.setCode(document.get("code"));
    }
    if(document.get("goodstitle") !=null && !document.get("goodstitle").equals("")) {
     goods2.setGoodstitle(document.get("goodstitle").replaceAll(querie,"<font color='red'>"+ querie+ "</font>"));//高亮显示,我不想用那网上的,太复杂
    }
       System.out.println(goods2);//查看从lucence索引目录中获得的数据
   }
                } catch (Exception e) {
   e.printStackTrace();
  }
          }