lucene的简单使用
什么是Lucene?Lucene是一个免费开源的高性能、可伸缩的全文检索引擎。Lucene能做些什么?为文章建立全文索引,提供高效的查询服务。使用Lucene建立索引,并查询索引import java.io.File;import java.io.IOException;import java.util.ArrayList;import java.util.HashMap;import java.
·
什么是Lucene?
Lucene是一个免费开源的高性能、可伸缩的全文检索引擎。
Lucene能做些什么?
为文章建立全文索引,提供高效的查询服务。
使用Lucene建立索引,并查询索引
import java.io.File;
import java.io.IOException;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import org.apache.lucene.analysis.Analyzer;
import org.apache.lucene.analysis.standard.StandardAnalyzer;
import org.apache.lucene.document.Document;
import org.apache.lucene.document.Field;
import org.apache.lucene.index.IndexReader;
import org.apache.lucene.index.IndexWriter;
import org.apache.lucene.index.IndexWriterConfig;
import org.apache.lucene.index.LogByteSizeMergePolicy;
import org.apache.lucene.index.LogMergePolicy;
import org.apache.lucene.index.Term;
import org.apache.lucene.search.BooleanClause;
import org.apache.lucene.search.BooleanQuery;
import org.apache.lucene.search.IndexSearcher;
import org.apache.lucene.search.ScoreDoc;
import org.apache.lucene.search.TermQuery;
import org.apache.lucene.search.TopDocs;
import org.apache.lucene.store.Directory;
import org.apache.lucene.store.FSDirectory;
import org.apache.lucene.util.Version;
public class luceneTest {
private File indexDir;
private IndexWriter indexWriter = null;
private IndexSearcher indexSearcher = null;
private IndexReader reader = null;
public static void main(String[] args) throws Exception {
// 1.创建Index
createIndex();
// 2.查找Index
findIndex();
}
public static void createIndex() throws Exception {
luceneTest test = new luceneTest();
test.initIndexWriter(IndexWriterConfig.OpenMode.CREATE);
List<String[]> lists = new ArrayList<String[]>();
for (int i = 0; i < 1000000; i++) {
lists.add(new String[] { "flsdfjlks" + i, "flksjfkl" + i });
if (i % 50000 == 0 && i > 0) {
test.writeIndex(lists);
lists.clear();
System.out.println(i);
}
}
test.writeIndex(lists);
test.indexWriter.close();
}
public static void findIndex() throws IOException {
luceneTest test = new luceneTest();
Map<String, String> maps = new HashMap<String, String>();
maps.put("key", "flsdfjlks0");
test.find(maps);
}
public String find(Map maps) throws IOException {
StringBuffer buffer = new StringBuffer();
try {
openIndexSearcher();
ScoreDoc[] scoreDocs = indexSearcher(indexSearcher, maps);
if (scoreDocs.length == 0) {
scoreDocs = indexSearcher(indexSearcher, maps);
}
for (ScoreDoc scoreDoc : scoreDocs) {
Document doc = indexSearcher.doc(scoreDoc.doc);
String funcid = doc.get("functionid");
System.out.println(funcid);
}
} catch (Exception e) {
}
if (indexSearcher != null) {
indexSearcher.close();
reader.close();
indexSearcher = null;
reader = null;
}
return buffer.toString();
}
public void openIndexSearcher() {
File indexDir = new File("f://test/");
try {
Directory directory = FSDirectory.open(indexDir);
reader = IndexReader.open(directory);
indexSearcher = new IndexSearcher(reader);
} catch (IOException e) {
}
}
private ScoreDoc[] indexSearcher(IndexSearcher indexSearcher, Map<String, String> maps)
throws Exception {
BooleanQuery query = new BooleanQuery();
for (Map.Entry<String, String> en : maps.entrySet()) {
query.add(new TermQuery(new Term(en.getKey(), en.getValue())), BooleanClause.Occur.MUST);
}
TopDocs doc = null;
doc = indexSearcher.search(query, 100);
return doc.scoreDocs;
}
private void initIndexWriter(IndexWriterConfig.OpenMode openMode) throws Exception {
if (indexWriter != null)
return;
Analyzer analyzer = new StandardAnalyzer(Version.LUCENE_36);
try {
indexDir = new File("f://test/");
Directory directory = FSDirectory.open(indexDir);
IndexWriterConfig indexWriterConfig = new IndexWriterConfig(Version.LUCENE_36, analyzer);
indexWriterConfig.setOpenMode(openMode);
LogMergePolicy mergePolicy = new LogByteSizeMergePolicy();
mergePolicy.setMergeFactor(100);
indexWriterConfig.setMergePolicy(mergePolicy);
indexWriterConfig.setMaxBufferedDocs(100);
indexWriter = new IndexWriter(directory, indexWriterConfig);
} catch (Exception e) {
throw e;
}
}
private int writeIndex(List<String[]> lists) {
try {
Document doc = null;
for (String[] func : lists) {
doc = new Document();
doc.add(new Field("key", func[0], Field.Store.YES, Field.Index.NOT_ANALYZED));
doc.add(new Field("functionid", func[1], Field.Store.YES, Field.Index.NOT_ANALYZED));
indexWriter.addDocument(doc);
}
} catch (Exception e) {
e.printStackTrace();
return -1;
}
return 1;
}
}
上述例子标识,标识建立一个1000000 万量级的数据建立索引,并查找其中的某一条,它的速度是非常高效的,所以它也成为了Elasticsearch的基础。
当然了 ,lucene还可以用来做模糊查询和距离查询等。
扩展阅读
http://www.cnblogs.com/xing901022/p/3933675.html ;
http://lucene.apache.org/
更多推荐
已为社区贡献4条内容
所有评论(0)