spring boot+全文搜索框架lucene
1.全文搜索框架lucene和mysql like%对比全文索引是查询完然后建立索引,再对搜索的词拆分,再根据索引查找东西。系统需要维护索引。mysql like%用法是全表遍历一遍,效率相对比较慢。2.github(项目)https://github.com/dajitui/spring-boot-lucene-ik明天再详细看看方法...
·
1.全文搜索框架lucene和mysql like%对比
全文索引是查询完然后建立索引,再对搜索的词拆分,再根据索引查找东西。系统需要维护索引。
mysql like%用法是全表遍历一遍,效率相对比较慢。
2.github(项目)
https://github.com/dajitui/spring-boot-lucene-ik
3.详细的过程
我初衷也是想和数据库结合的,so
利用jpa查询得到数据,由于一般数据量都是众多的,所以不能用数据库的like进行查询!
得到数据后,需要通过关联lucene版本和分值器,再创建一个文本保存索引,然后写入。
Directory directory=null;
IndexWriterConfig config=null;
IndexWriter iwriter=null;
try {
//索引库的存储目录
directory = FSDirectory.open(new File(dir));
//关联当前lucence版本和分值器
config = new IndexWriterConfig(Version.LUCENE_47, analyzer);
//传入目录和分词器
iwriter = new IndexWriter(directory, config);
iwriter.commit();
//写入到目录文件中
iwriter.addDocument(doc);
//提交事务
iwriter.commit();
//关闭流
iwriter.close();
} catch (IOException e) {
e.printStackTrace();
}
把数据写进document,再写入刚刚创建的文件里面
//获取每行数据
Map<String, Object> lineData = queryFood.get(i);
//创建Document对象
Document doc = new Document();
//获取每列数据
Field foodid=new Field("foodid",lineData.get("foodid").toString(),TextField.TYPE_STORED);
Field foodname=new Field("foodname",lineData.get("foodname").toString(),TextField.TYPE_STORED);
Field price=new Field("price",lineData.get("price").toString(),TextField.TYPE_STORED);
Field imagepath=new Field("imagepath",lineData.get("imagepath").toString(),TextField.TYPE_STORED);
//添加到Document中
doc.add(foodid);
doc.add(foodname);
doc.add(price);
doc.add(imagepath);
//调用,创建索引库
indexDemo.write(doc);
当搜索的时候呢?读取文件,通过索引,
//索引库的存储目录
Directory directory = FSDirectory.open(new File(dir));
//读取索引库的存储目录
DirectoryReader ireader = DirectoryReader.open(directory);
//搜索类
IndexSearcher isearcher = new IndexSearcher(ireader);
//lucence查询解析器,用于指定查询的属性名和分词器
QueryParser parser = new QueryParser(Version.LUCENE_47, field, analyzer);
//搜索
Query query = parser.parse(value);
//最终被分词后添加的前缀和后缀处理器,默认是粗体<B></B>
SimpleHTMLFormatter htmlFormatter = new SimpleHTMLFormatter("<font color=red>","</font>");
//高亮搜索的词添加到高亮处理器中
Highlighter highlighter = new Highlighter(htmlFormatter, new QueryScorer(query));
//获取搜索的结果,指定返回document返回的个数
ScoreDoc[] hits = isearcher.search(query, null, 5).scoreDocs;
List<Map> list=new ArrayList<Map>();
//遍历,输出
for (int i = 0; i < hits.length; i++) {
int id = hits[i].doc;
Document hitDoc = isearcher.doc(hits[i].doc);
Map map=new HashMap();
map.put("foodid", hitDoc.get("foodid"));
//获取到foodname
String foodname=hitDoc.get("foodname");
//将查询的词和搜索词匹配,匹配到添加前缀和后缀
TokenStream tokenStream = TokenSources.getAnyTokenStream(isearcher.getIndexReader(), id, "foodname", analyzer);
//传入的第二个参数是查询的值
TextFragment[] frag = highlighter.getBestTextFragments(tokenStream, foodname, false, 10);
String foodValue="";
for (int j = 0; j < frag.length; j++) {
if ((frag[j] != null) && (frag[j].getScore() > 0)) {
//获取 foodname 的值
foodValue=((frag[j].toString()));
}
}
map.put("foodname", foodValue);
map.put("price", hitDoc.get("price"));
map.put("imagepath", hitDoc.get("imagepath"));
list.add(map);
}
ireader.close();
directory.close();
return list;
luncene索引维护
上面是创建索引
增量添加索引
/**
* 增加索引
*
* @throws Exception
*/
public static void insert() throws Exception {
String text5 = "hello,goodbye,man,woman";
Date date1 = new Date();
analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
directory = FSDirectory.open(new File(INDEX_DIR));
IndexWriterConfig config = new IndexWriterConfig(
Version.LUCENE_CURRENT, analyzer);
indexWriter = new IndexWriter(directory, config);
Document doc1 = new Document();
doc1.add(new TextField("filename", "text5", Store.YES));
doc1.add(new TextField("content", text5, Store.YES));
indexWriter.addDocument(doc1);
indexWriter.commit();
indexWriter.close();
Date date2 = new Date();
System.out.println("增加索引耗时:" + (date2.getTime() - date1.getTime()) + "ms\n");
}
删除索引
/**
* 删除索引
*
* @param str 删除的关键字
* @throws Exception
*/
public static void delete(String str) throws Exception {
Date date1 = new Date();
analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
directory = FSDirectory.open(new File(INDEX_DIR));
IndexWriterConfig config = new IndexWriterConfig(
Version.LUCENE_CURRENT, analyzer);
indexWriter = new IndexWriter(directory, config);
indexWriter.deleteDocuments(new Term("filename",str));
indexWriter.close();
Date date2 = new Date();
System.out.println("删除索引耗时:" + (date2.getTime() - date1.getTime()) + "ms\n");
}
更新索引
/**
* 更新索引
*
* @throws Exception
*/
public static void update() throws Exception {
String text1 = "update,hello,man!";
Date date1 = new Date();
analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
directory = FSDirectory.open(new File(INDEX_DIR));
IndexWriterConfig config = new IndexWriterConfig(
Version.LUCENE_CURRENT, analyzer);
indexWriter = new IndexWriter(directory, config);
Document doc1 = new Document();
doc1.add(new TextField("filename", "text1", Store.YES));
doc1.add(new TextField("content", text1, Store.YES));
indexWriter.updateDocument(new Term("filename","text1"), doc1);
indexWriter.close();
Date date2 = new Date();
System.out.println("更新索引耗时:" + (date2.getTime() - date1.getTime()) + "ms\n");
}
根据索引查询
/**
* 关键字查询
*
* @param str
* @throws Exception
*/
public static void search(String str) throws Exception {
directory = FSDirectory.open(new File(INDEX_DIR));
analyzer = new StandardAnalyzer(Version.LUCENE_CURRENT);
DirectoryReader ireader = DirectoryReader.open(directory);
IndexSearcher isearcher = new IndexSearcher(ireader);
QueryParser parser = new QueryParser(Version.LUCENE_CURRENT, "content",analyzer);
Query query = parser.parse(str);
ScoreDoc[] hits = isearcher.search(query, null, 1000).scoreDocs;
for (int i = 0; i < hits.length; i++) {
Document hitDoc = isearcher.doc(hits[i].doc);
System.out.println(hitDoc.get("filename"));
System.out.println(hitDoc.get("content"));
}
ireader.close();
directory.close();
}
更多推荐
已为社区贡献2条内容
所有评论(0)