代码先贴上~
01 public class Indexer {
02
03 public static String dir = "D:/vod_cache_data/TermTestIndex";
04
05 private static String file = "D:\\_workspace\\search\\itbu-boss-lucene\\in.txt";
06
07 private static final String[] Seed_A = { "中国", "美国", "印度", "法国", "荷兰" };
08
09 private static final String[] Seed_B = { "jot", "zork", "bill", "brain", "david", "baobao" };
10
11 private static void init(int n) throws Exception {
12 BufferedWriter bw = new BufferedWriter(new FileWriter(file));
13 for (int i = 0; i < n; i++) {
14 Random r = new Random(System.currentTimeMillis());
15 int seed = (int) Math.round(Math.random() * r.nextInt() * r.nextFloat() * r.nextFloat() % 10000000) + 12345678;
16 String line = Math.round(r.nextFloat() * Math.random() * n) + "\t" + Seed_A[seed % 5] + Seed_B[seed % 6] + "\t" + seed;
17 bw.write(line);
18 bw.newLine();
19 }
20 bw.flush();
21 bw.close();
22 }
23
24 public static void main(String[] args) throws Exception {
25 long s = System.currentTimeMillis();
26 Indexer.init(10000);
27 Analyzer analyzer = new StandardAnalyzer();
28 IndexWriter iwriter = new IndexWriter(new File(dir), analyzer, true, MaxFieldLength.UNLIMITED);
29 iwriter.setMaxFieldLength(25000);
30 Indexer.addDocument(iwriter);
31 System.out.println("iwriter.maxDoc() : " + iwriter.maxDoc() + " Time use : " + (System.currentTimeMillis() - s));
32 iwriter.close();
33 JotTest.optimizeDir(dir);
34 System.out.println("Down!");
35 }
36
37 public static void addDocument(IndexWriter iwriter) throws Exception {
38 InputStreamReader breader = new InputStreamReader(new FileInputStream((file)));
39 Scanner sca = new Scanner(breader);
40 String id = null, name = null, seed = null;
41 while (sca.hasNext()) {
42 id = sca.next();
43 name = sca.next();
44 seed = sca.next();
45 Document doc = new Document();
46 doc.add(new Field("id", id, Field.Store.YES, Field.Index.ANALYZED));
47 doc.add(new Field("name", name, Field.Store.YES, Field.Index.ANALYZED));
48 doc.add(new Field("seed", seed, Field.Store.YES, Field.Index.ANALYZED));
49 // 用于查询全部
50 doc.add(new Field("all", "all", Field.Store.YES, Field.Index.ANALYZED));
51 iwriter.addDocument(doc);
52 }
53 }
54 }
02
03 public static String dir = "D:/vod_cache_data/TermTestIndex";
04
05 private static String file = "D:\\_workspace\\search\\itbu-boss-lucene\\in.txt";
06
07 private static final String[] Seed_A = { "中国", "美国", "印度", "法国", "荷兰" };
08
09 private static final String[] Seed_B = { "jot", "zork", "bill", "brain", "david", "baobao" };
10
11 private static void init(int n) throws Exception {
12 BufferedWriter bw = new BufferedWriter(new FileWriter(file));
13 for (int i = 0; i < n; i++) {
14 Random r = new Random(System.currentTimeMillis());
15 int seed = (int) Math.round(Math.random() * r.nextInt() * r.nextFloat() * r.nextFloat() % 10000000) + 12345678;
16 String line = Math.round(r.nextFloat() * Math.random() * n) + "\t" + Seed_A[seed % 5] + Seed_B[seed % 6] + "\t" + seed;
17 bw.write(line);
18 bw.newLine();
19 }
20 bw.flush();
21 bw.close();
22 }
23
24 public static void main(String[] args) throws Exception {
25 long s = System.currentTimeMillis();
26 Indexer.init(10000);
27 Analyzer analyzer = new StandardAnalyzer();
28 IndexWriter iwriter = new IndexWriter(new File(dir), analyzer, true, MaxFieldLength.UNLIMITED);
29 iwriter.setMaxFieldLength(25000);
30 Indexer.addDocument(iwriter);
31 System.out.println("iwriter.maxDoc() : " + iwriter.maxDoc() + " Time use : " + (System.currentTimeMillis() - s));
32 iwriter.close();
33 JotTest.optimizeDir(dir);
34 System.out.println("Down!");
35 }
36
37 public static void addDocument(IndexWriter iwriter) throws Exception {
38 InputStreamReader breader = new InputStreamReader(new FileInputStream((file)));
39 Scanner sca = new Scanner(breader);
40 String id = null, name = null, seed = null;
41 while (sca.hasNext()) {
42 id = sca.next();
43 name = sca.next();
44 seed = sca.next();
45 Document doc = new Document();
46 doc.add(new Field("id", id, Field.Store.YES, Field.Index.ANALYZED));
47 doc.add(new Field("name", name, Field.Store.YES, Field.Index.ANALYZED));
48 doc.add(new Field("seed", seed, Field.Store.YES, Field.Index.ANALYZED));
49 // 用于查询全部
50 doc.add(new Field("all", "all", Field.Store.YES, Field.Index.ANALYZED));
51 iwriter.addDocument(doc);
52 }
53 }
54 }
01 @SuppressWarnings("deprecation")
02 public class MySearcher extends Searcher {
03
04 private static final String field = "id";
05
06 public static void main(String[] args) throws Exception {
07 MySearcher searcher = new MySearcher(Indexer.dir);
08 TermQuery query = new TermQuery(new Term("all", "all"));
09 Hits hits = searcher.search(query);
10 System.out.println("#Found " + hits.length() + " indexs in " + searcher.maxDoc() + " indexs.");
11 int showTop = 10;
12 for (int i = 0, l = hits.length() > showTop ? showTop : hits.length(); i < l; i++) {
13 Document document = hits.doc(i);
14 System.out.print(" id: " + document.get("id"));
15 System.out.print("\t name: " + document.get("name"));
16 System.out.println("\t seed: " + document.get("seed"));
17 }
18 }
19
20 public void search(Weight weight, Filter filter, final HitCollector results) throws IOException {
21 Scorer scorer = weight.scorer(reader);
22 if (scorer == null)
23 return;
24 // if (filter == null) {
25 // scorer.score(results);
26 // return;
27 // } else {
28 IndexReader reader = getIndexReader();
29 /** 1、正常查询,存入BitSet */
30 Long s1 = System.currentTimeMillis();
31 // Scorer scorer = weight.scorer(reader);
32 OpenBitSet bits = new OpenBitSet(reader.maxDoc());
33 while (scorer.next()) {
34 bits.set(scorer.doc());
35 }
36 /** 2、遍历Term,BitSet去重 */
37 Long s2 = System.currentTimeMillis();
38 // 从配置文件中读取配置,作为去重字段
39 Term startTerm = new Term(field);
40 TermEnum te = reader.terms(startTerm);
41 if (te != null) {
42 TermDocs td = null;
43 Term currTerm = te.term();
44 boolean flag = false;
45 do {
46 currTerm = te.term();
47 if ((currTerm != null) && (currTerm.field() != startTerm.field())) {
48 System.out.println("#Break at:" + currTerm);
49 break;
50 }
51 if (te.docFreq() > 1) {
52 td = reader.termDocs(currTerm);
53 flag = false;
54 while (td.next()) {
55 if (!flag && bits.get(td.doc())) {
56 flag = true;
57 } else {
58 bits.clear(td.doc());
59 }
60 }
61 }
62 } while (te.next());
63 }
64 /** 3、从BitSet收获结果 */
65 Long s3 = System.currentTimeMillis();
66 scorer = weight.scorer(reader);// 重置scorer
67 DocIdSetIterator bIterator = bits.iterator();
68 while (bIterator.next() && scorer.skipTo(bIterator.doc())) {
69 results.collect(scorer.doc(), scorer.score());
70 }
71 Long s4 = System.currentTimeMillis();
72 System.out.println("#Time 1):" + (s2 - s1) + "\t 2): " + (s3 - s2) + "\t 3): " + (s4 - s3));
73 // }
74 }
75 }
02 public class MySearcher extends Searcher {
03
04 private static final String field = "id";
05
06 public static void main(String[] args) throws Exception {
07 MySearcher searcher = new MySearcher(Indexer.dir);
08 TermQuery query = new TermQuery(new Term("all", "all"));
09 Hits hits = searcher.search(query);
10 System.out.println("#Found " + hits.length() + " indexs in " + searcher.maxDoc() + " indexs.");
11 int showTop = 10;
12 for (int i = 0, l = hits.length() > showTop ? showTop : hits.length(); i < l; i++) {
13 Document document = hits.doc(i);
14 System.out.print(" id: " + document.get("id"));
15 System.out.print("\t name: " + document.get("name"));
16 System.out.println("\t seed: " + document.get("seed"));
17 }
18 }
19
20 public void search(Weight weight, Filter filter, final HitCollector results) throws IOException {
21 Scorer scorer = weight.scorer(reader);
22 if (scorer == null)
23 return;
24 // if (filter == null) {
25 // scorer.score(results);
26 // return;
27 // } else {
28 IndexReader reader = getIndexReader();
29 /** 1、正常查询,存入BitSet */
30 Long s1 = System.currentTimeMillis();
31 // Scorer scorer = weight.scorer(reader);
32 OpenBitSet bits = new OpenBitSet(reader.maxDoc());
33 while (scorer.next()) {
34 bits.set(scorer.doc());
35 }
36 /** 2、遍历Term,BitSet去重 */
37 Long s2 = System.currentTimeMillis();
38 // 从配置文件中读取配置,作为去重字段
39 Term startTerm = new Term(field);
40 TermEnum te = reader.terms(startTerm);
41 if (te != null) {
42 TermDocs td = null;
43 Term currTerm = te.term();
44 boolean flag = false;
45 do {
46 currTerm = te.term();
47 if ((currTerm != null) && (currTerm.field() != startTerm.field())) {
48 System.out.println("#Break at:" + currTerm);
49 break;
50 }
51 if (te.docFreq() > 1) {
52 td = reader.termDocs(currTerm);
53 flag = false;
54 while (td.next()) {
55 if (!flag && bits.get(td.doc())) {
56 flag = true;
57 } else {
58 bits.clear(td.doc());
59 }
60 }
61 }
62 } while (te.next());
63 }
64 /** 3、从BitSet收获结果 */
65 Long s3 = System.currentTimeMillis();
66 scorer = weight.scorer(reader);// 重置scorer
67 DocIdSetIterator bIterator = bits.iterator();
68 while (bIterator.next() && scorer.skipTo(bIterator.doc())) {
69 results.collect(scorer.doc(), scorer.score());
70 }
71 Long s4 = System.currentTimeMillis();
72 System.out.println("#Time 1):" + (s2 - s1) + "\t 2): " + (s3 - s2) + "\t 3): " + (s4 - s3));
73 // }
74 }
75 }
01 public class Termer {
02
03 public static void main(String[] args) throws Exception {
04 IndexReader reader = IndexReader.open(Indexer.dir);
05 System.out.println("—— TermDocs skipTo ——-");
06 TermDocs td = reader.termDocs();
07 td.skipTo(3);
08 while (td.next()) {
09 System.out.print(" td:" + td);
10 System.out.print("\t doc:" + td.doc());
11 System.out.println("\t freq:" + td.freq());
12 }
13 System.out.println("—— TermDocs id ——-");
14 td = reader.termDocs(new Term("id", "009"));
15 while (td.next()) {
16 System.out.print(" td:" + td);
17 System.out.print("\t doc:" + td.doc());
18 System.out.println("\t freq:" + td.freq());
19 }
20 System.out.println("—— TermDocs name ——-");
21 td = reader.termDocs(new Term("name", "jot"));
22 while (td.next()) {
23 System.out.print(" td:" + td);
24 System.out.print("\t doc:" + td.doc());
25 System.out.println("\t freq:" + td.freq());
26 }
27 System.out.println("—— TermEnum ——-");
28 TermEnum tm = reader.terms();
29 tm.skipTo(new Term("id"));
30 while (tm.next()) {
31 System.out.print(" tm:" + tm);
32 System.out.print("\t docFreq:" + tm.docFreq());
33 Term ttm = tm.term();
34 System.out.println("\t tm:" + ttm);
35 td = reader.termDocs(ttm);
36 while (td.next()) {
37 System.out.print(" td:" + td);
38 System.out.print("\t doc:" + td.doc());
39 System.out.println("\t freq:" + td.freq());
40 }
41 }
42 System.out.println("reader.docFreq(new Term(\"id\")) : " + reader.docFreq(new Term("id", "009")));
43 }
44 }
02
03 public static void main(String[] args) throws Exception {
04 IndexReader reader = IndexReader.open(Indexer.dir);
05 System.out.println("—— TermDocs skipTo ——-");
06 TermDocs td = reader.termDocs();
07 td.skipTo(3);
08 while (td.next()) {
09 System.out.print(" td:" + td);
10 System.out.print("\t doc:" + td.doc());
11 System.out.println("\t freq:" + td.freq());
12 }
13 System.out.println("—— TermDocs id ——-");
14 td = reader.termDocs(new Term("id", "009"));
15 while (td.next()) {
16 System.out.print(" td:" + td);
17 System.out.print("\t doc:" + td.doc());
18 System.out.println("\t freq:" + td.freq());
19 }
20 System.out.println("—— TermDocs name ——-");
21 td = reader.termDocs(new Term("name", "jot"));
22 while (td.next()) {
23 System.out.print(" td:" + td);
24 System.out.print("\t doc:" + td.doc());
25 System.out.println("\t freq:" + td.freq());
26 }
27 System.out.println("—— TermEnum ——-");
28 TermEnum tm = reader.terms();
29 tm.skipTo(new Term("id"));
30 while (tm.next()) {
31 System.out.print(" tm:" + tm);
32 System.out.print("\t docFreq:" + tm.docFreq());
33 Term ttm = tm.term();
34 System.out.println("\t tm:" + ttm);
35 td = reader.termDocs(ttm);
36 while (td.next()) {
37 System.out.print(" td:" + td);
38 System.out.print("\t doc:" + td.doc());
39 System.out.println("\t freq:" + td.freq());
40 }
41 }
42 System.out.println("reader.docFreq(new Term(\"id\")) : " + reader.docFreq(new Term("id", "009")));
43 }
44 }
没有评论
