include/xapian/enquire.h

Go to the documentation of this file.
00001 
00004 /* Copyright 1999,2000,2001 BrightStation PLC
00005  * Copyright 2001,2002 Ananova Ltd
00006  * Copyright 2002,2003,2004,2005,2006,2007,2009 Olly Betts
00007  *
00008  * This program is free software; you can redistribute it and/or
00009  * modify it under the terms of the GNU General Public License as
00010  * published by the Free Software Foundation; either version 2 of the
00011  * License, or (at your option) any later version.
00012  *
00013  * This program is distributed in the hope that it will be useful,
00014  * but WITHOUT ANY WARRANTY; without even the implied warranty of
00015  * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
00016  * GNU General Public License for more details.
00017  *
00018  * You should have received a copy of the GNU General Public License
00019  * along with this program; if not, write to the Free Software
00020  * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA  02110-1301
00021  * USA
00022  */
00023 
00024 #ifndef XAPIAN_INCLUDED_ENQUIRE_H
00025 #define XAPIAN_INCLUDED_ENQUIRE_H
00026 
00027 #include <string>
00028 
00029 #include <xapian/base.h>
00030 #include <xapian/deprecated.h>
00031 #include <xapian/sorter.h>
00032 #include <xapian/types.h>
00033 #include <xapian/termiterator.h>
00034 #include <xapian/visibility.h>
00035 
00036 namespace Xapian {
00037 
00038 class Database;
00039 class Document;
00040 class ErrorHandler;
00041 class ExpandDecider;
00042 class MSetIterator;
00043 class Query;
00044 class Weight;
00045 
00049 class XAPIAN_VISIBILITY_DEFAULT MSet {
00050     public:
00051         class Internal;
00053         Xapian::Internal::RefCntPtr<Internal> internal;
00054 
00056         explicit MSet(MSet::Internal * internal_);
00057 
00059         MSet();
00060 
00062         ~MSet();
00063 
00065         MSet(const MSet & other);
00066 
00068         void operator=(const MSet &other);
00069 
00085         void fetch(const MSetIterator &begin, const MSetIterator &end) const;
00086 
00089         void fetch(const MSetIterator &item) const;
00090 
00093         void fetch() const;
00094 
00099         Xapian::percent convert_to_percent(Xapian::weight wt) const;
00100 
00102         Xapian::percent convert_to_percent(const MSetIterator &it) const;
00103 
00111         Xapian::doccount get_termfreq(const std::string &tname) const;
00112 
00120         Xapian::weight get_termweight(const std::string &tname) const;
00121 
00129         Xapian::doccount get_firstitem() const;
00130 
00140         Xapian::doccount get_matches_lower_bound() const;
00141 
00154         Xapian::doccount get_matches_estimated() const;
00155 
00165         Xapian::doccount get_matches_upper_bound() const;
00166 
00173         Xapian::weight get_max_possible() const;
00174 
00188         Xapian::weight get_max_attained() const;
00189 
00191         Xapian::doccount size() const;
00192 
00194         Xapian::doccount max_size() const { return size(); }
00195 
00197         bool empty() const;
00198 
00200         void swap(MSet & other);
00201 
00203         MSetIterator begin() const;
00204 
00206         MSetIterator end() const;
00207 
00209         MSetIterator back() const;
00210 
00220         MSetIterator operator[](Xapian::doccount i) const;
00221 
00223 
00224         typedef MSetIterator value_type; // FIXME: not assignable...
00225         typedef MSetIterator iterator;
00226         typedef MSetIterator const_iterator;
00227         typedef MSetIterator & reference; // Hmm
00228         typedef MSetIterator & const_reference;
00229         typedef MSetIterator * pointer; // Hmm
00230         typedef Xapian::doccount_diff difference_type;
00231         typedef Xapian::doccount size_type;
00233 
00235         std::string get_description() const;
00236 };
00237 
00241 class XAPIAN_VISIBILITY_DEFAULT MSetIterator {
00242     private:
00243         friend class MSet;
00244         friend bool operator==(const MSetIterator &a, const MSetIterator &b);
00245         friend bool operator!=(const MSetIterator &a, const MSetIterator &b);
00246 
00247         MSetIterator(Xapian::doccount index_, const MSet & mset_)
00248             : index(index_), mset(mset_) { }
00249 
00250         Xapian::doccount index;
00251         MSet mset;
00252 
00253     public:
00257         MSetIterator() : index(0), mset() { }
00258 
00259         ~MSetIterator() { }
00260 
00262         MSetIterator(const MSetIterator &other) {
00263             index = other.index;
00264             mset = other.mset;
00265         }
00266 
00268         void operator=(const MSetIterator &other) {
00269             index = other.index;
00270             mset = other.mset;
00271         }
00272 
00274         MSetIterator & operator++() {
00275             ++index;
00276             return *this;
00277         }
00278 
00280         MSetIterator operator++(int) {
00281             MSetIterator tmp = *this;
00282             ++index;
00283             return tmp;
00284         }
00285 
00287         MSetIterator & operator--() {
00288             --index;
00289             return *this;
00290         }
00291 
00293         MSetIterator operator--(int) {
00294             MSetIterator tmp = *this;
00295             --index;
00296             return tmp;
00297         }
00298 
00300         Xapian::docid operator*() const;
00301 
00318         Xapian::Document get_document() const;
00319 
00326         Xapian::doccount get_rank() const {
00327             return mset.get_firstitem() + index;
00328         }
00329 
00331         Xapian::weight get_weight() const;
00332 
00335         std::string get_collapse_key() const;
00336 
00353         Xapian::doccount get_collapse_count() const;
00354 
00370         Xapian::percent get_percent() const;
00371 
00373         std::string get_description() const;
00374 
00376 
00377         typedef std::bidirectional_iterator_tag iterator_category; // FIXME: could enhance to be a randomaccess_iterator
00378         typedef Xapian::docid value_type;
00379         typedef Xapian::doccount_diff difference_type;
00380         typedef Xapian::docid * pointer;
00381         typedef Xapian::docid & reference;
00383 };
00384 
00385 inline bool operator==(const MSetIterator &a, const MSetIterator &b)
00386 {
00387     return (a.index == b.index);
00388 }
00389 
00390 inline bool operator!=(const MSetIterator &a, const MSetIterator &b)
00391 {
00392     return (a.index != b.index);
00393 }
00394 
00395 class ESetIterator;
00396 
00401 class XAPIAN_VISIBILITY_DEFAULT ESet {
00402     public:
00403         class Internal;
00405         Xapian::Internal::RefCntPtr<Internal> internal;
00406 
00408         ESet();
00409 
00411         ~ESet();
00412 
00414         ESet(const ESet & other);
00415 
00417         void operator=(const ESet &other);
00418 
00423         Xapian::termcount get_ebound() const;
00424 
00426         Xapian::termcount size() const;
00427 
00429         Xapian::termcount max_size() const { return size(); }
00430 
00432         bool empty() const;
00433 
00435         void swap(ESet & other);
00436 
00438         ESetIterator begin() const;
00439 
00441         ESetIterator end() const;
00442 
00444         ESetIterator back() const;
00445 
00447         ESetIterator operator[](Xapian::termcount i) const;
00448 
00450         std::string get_description() const;
00451 };
00452 
00454 class XAPIAN_VISIBILITY_DEFAULT ESetIterator {
00455     private:
00456         friend class ESet;
00457         friend bool operator==(const ESetIterator &a, const ESetIterator &b);
00458         friend bool operator!=(const ESetIterator &a, const ESetIterator &b);
00459 
00460         ESetIterator(Xapian::termcount index_, const ESet & eset_)
00461             : index(index_), eset(eset_) { }
00462 
00463         Xapian::termcount index;
00464         ESet eset;
00465 
00466     public:
00470         ESetIterator() : index(0), eset() { }
00471 
00472         ~ESetIterator() { }
00473 
00475         ESetIterator(const ESetIterator &other) {
00476             index = other.index;
00477             eset = other.eset;
00478         }
00479 
00481         void operator=(const ESetIterator &other) {
00482             index = other.index;
00483             eset = other.eset;
00484         }
00485 
00487         ESetIterator & operator++() {
00488             ++index;
00489             return *this;
00490         }
00491 
00493         ESetIterator operator++(int) {
00494             ESetIterator tmp = *this;
00495             ++index;
00496             return tmp;
00497         }
00498 
00500         ESetIterator & operator--() {
00501             --index;
00502             return *this;
00503         }
00504 
00506         ESetIterator operator--(int) {
00507             ESetIterator tmp = *this;
00508             --index;
00509             return tmp;
00510         }
00511 
00513         const std::string & operator *() const;
00514 
00516         Xapian::weight get_weight() const;
00517 
00519         std::string get_description() const;
00520 
00522 
00523         typedef std::bidirectional_iterator_tag iterator_category; // FIXME: go for randomaccess_iterator!
00524         typedef std::string value_type;
00525         typedef Xapian::termcount_diff difference_type;
00526         typedef std::string * pointer;
00527         typedef std::string & reference;
00529 };
00530 
00531 inline bool operator==(const ESetIterator &a, const ESetIterator &b)
00532 {
00533     return (a.index == b.index);
00534 }
00535 
00536 inline bool operator!=(const ESetIterator &a, const ESetIterator &b)
00537 {
00538     return (a.index != b.index);
00539 }
00540 
00545 class XAPIAN_VISIBILITY_DEFAULT RSet {
00546     public:
00548         class Internal;
00549 
00551         Xapian::Internal::RefCntPtr<Internal> internal;
00552 
00554         RSet(const RSet &rset);
00555 
00557         void operator=(const RSet &rset);
00558 
00560         RSet();
00561 
00563         ~RSet();
00564 
00566         Xapian::doccount size() const;
00567 
00569         bool empty() const;
00570 
00572         void add_document(Xapian::docid did);
00573 
00575         void add_document(const Xapian::MSetIterator & i) { add_document(*i); }
00576 
00578         void remove_document(Xapian::docid did);
00579 
00581         void remove_document(const Xapian::MSetIterator & i) { remove_document(*i); }
00582 
00584         bool contains(Xapian::docid did) const;
00585 
00587         bool contains(const Xapian::MSetIterator & i) const { return contains(*i); }
00588 
00590         std::string get_description() const;
00591 };
00592 
00595 class XAPIAN_VISIBILITY_DEFAULT MatchDecider {
00596     public:
00602         virtual bool operator()(const Xapian::Document &doc) const = 0;
00603 
00605         virtual ~MatchDecider();
00606 };
00607 
00618 class XAPIAN_VISIBILITY_DEFAULT Enquire {
00619     public:
00621         Enquire(const Enquire & other);
00622 
00624         void operator=(const Enquire & other);
00625 
00626         class Internal;
00628         Xapian::Internal::RefCntPtr<Internal> internal;
00629 
00654         explicit Enquire(const Database &database, ErrorHandler * errorhandler_ = 0);
00655 
00658         ~Enquire();
00659 
00666         void set_query(const Xapian::Query & query, Xapian::termcount qlen = 0);
00667 
00674         const Xapian::Query & get_query() const;
00675 
00682         void set_weighting_scheme(const Weight &weight_);
00683 
00710         void set_collapse_key(Xapian::valueno collapse_key);
00711 
00712         typedef enum {
00713             ASCENDING = 1,
00714             DESCENDING = 0,
00715             DONT_CARE = 2
00716         } docid_order;
00717 
00741         void set_docid_order(docid_order order);
00742 
00761         void set_cutoff(Xapian::percent percent_cutoff, Xapian::weight weight_cutoff = 0);
00762 
00767         void set_sort_by_relevance();
00768 
00781         void set_sort_by_value(Xapian::valueno sort_key, bool reverse = true);
00782 
00791         void set_sort_by_key(Xapian::Sorter * sorter, bool reverse = true);
00792 
00806         void set_sort_by_value_then_relevance(Xapian::valueno sort_key,
00807                                               bool reverse = true);
00808 
00818         void set_sort_by_key_then_relevance(Xapian::Sorter * sorter,
00819                                             bool reverse = true);
00820 
00840         void set_sort_by_relevance_then_value(Xapian::valueno sort_key,
00841                                               bool reverse = true);
00842 
00859         void set_sort_by_relevance_then_key(Xapian::Sorter * sorter,
00860                                             bool reverse = true);
00861 
00900         MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
00901                       Xapian::doccount checkatleast = 0,
00902                       const RSet * omrset = 0,
00903                       const MatchDecider * mdecider = 0) const;
00904         MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
00905                       Xapian::doccount checkatleast,
00906                       const RSet * omrset,
00907                       const MatchDecider * mdecider,
00908                       const MatchDecider * matchspy) const;
00909         MSet get_mset(Xapian::doccount first, Xapian::doccount maxitems,
00910                       const RSet * omrset,
00911                       const MatchDecider * mdecider = 0) const {
00912             return get_mset(first, maxitems, 0, omrset, mdecider);
00913         }
00914 
00915         static const int INCLUDE_QUERY_TERMS = 1;
00916         static const int USE_EXACT_TERMFREQ = 2;
00917 #ifndef _MSC_VER
00919         XAPIAN_DEPRECATED(static const int include_query_terms) = 1;
00921         XAPIAN_DEPRECATED(static const int use_exact_termfreq) = 2;
00922 #else
00923         // Work around MSVC stupidity (you get a warning for deprecating a
00924         // declaration).
00925         static const int include_query_terms = 1;
00926         static const int use_exact_termfreq = 2;
00927 #pragma deprecated("Xapian::Enquire::include_query_terms", "Xapian::Enquire::use_exact_termfreq")
00928 #endif
00929 
00952         ESet get_eset(Xapian::termcount maxitems,
00953                         const RSet & omrset,
00954                         int flags = 0,
00955                         double k = 1.0,
00956                         const Xapian::ExpandDecider * edecider = 0) const;
00957 
00971         inline ESet get_eset(Xapian::termcount maxitems, const RSet & omrset,
00972                                const Xapian::ExpandDecider * edecider) const {
00973             return get_eset(maxitems, omrset, 0, 1.0, edecider);
00974         }
00975 
01004         TermIterator get_matching_terms_begin(Xapian::docid did) const;
01005 
01007         TermIterator get_matching_terms_end(Xapian::docid /*did*/) const {
01008             return TermIterator(NULL);
01009         }
01010 
01033         TermIterator get_matching_terms_begin(const MSetIterator &it) const;
01034 
01036         TermIterator get_matching_terms_end(const MSetIterator &/*it*/) const {
01037             return TermIterator(NULL);
01038         }
01039 
01052         XAPIAN_DEPRECATED(
01053         void register_match_decider(const std::string &name,
01054                                     const MatchDecider *mdecider = NULL));
01055 
01057         std::string get_description() const;
01058 };
01059 
01060 }
01061 
01062 class RemoteServer;
01063 class ScaleWeight;
01064 
01065 namespace Xapian {
01066 
01068 class XAPIAN_VISIBILITY_DEFAULT Weight {
01069     friend class Enquire; // So Enquire can clone us
01070     friend class ::RemoteServer; // So RemoteServer can clone us - FIXME
01071     friend class ::ScaleWeight;
01072     public:
01073         class Internal;
01074     protected:
01075         Weight(const Weight &);
01076     private:
01077         void operator=(Weight &);
01078 
01088         virtual Weight * clone() const = 0;
01089 
01090     protected:
01091         const Internal * internal; // Weight::Internal == Stats
01092         Xapian::doclength querysize;
01093         Xapian::termcount wqf;
01094         std::string tname;
01095 
01096     public:
01097         // FIXME:1.1: initialise internal to NULL here
01098         Weight() { }
01099         virtual ~Weight();
01100 
01113         Weight * create(const Internal * internal_, Xapian::doclength querysize_,
01114                         Xapian::termcount wqf_, const std::string & tname_) const;
01115 
01120         virtual std::string name() const = 0;
01121 
01123         virtual std::string serialise() const = 0;
01124 
01126         virtual Weight * unserialise(const std::string &s) const = 0;
01127 
01135         virtual Xapian::weight get_sumpart(Xapian::termcount wdf,
01136                                       Xapian::doclength len) const = 0;
01137 
01143         virtual Xapian::weight get_maxpart() const = 0;
01144 
01153         virtual Xapian::weight get_sumextra(Xapian::doclength len) const = 0;
01154 
01158         virtual Xapian::weight get_maxextra() const = 0;
01159 
01161         virtual bool get_sumpart_needs_doclength() const; /* { return true; } */
01162 };
01163 
01165 class XAPIAN_VISIBILITY_DEFAULT BoolWeight : public Weight {
01166     public:
01167         BoolWeight * clone() const;
01168         BoolWeight() { }
01169         ~BoolWeight();
01170         std::string name() const;
01171         std::string serialise() const;
01172         BoolWeight * unserialise(const std::string & s) const;
01173         Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
01174         Xapian::weight get_maxpart() const;
01175 
01176         Xapian::weight get_sumextra(Xapian::doclength len) const;
01177         Xapian::weight get_maxextra() const;
01178 
01179         bool get_sumpart_needs_doclength() const;
01180 };
01181 
01194 class XAPIAN_VISIBILITY_DEFAULT BM25Weight : public Weight {
01195     private:
01196         mutable Xapian::weight termweight;
01197         mutable Xapian::doclength lenpart;
01198 
01199         double k1, k2, k3, b;
01200         Xapian::doclength min_normlen;
01201 
01202         mutable bool weight_calculated;
01203 
01204         void calc_termweight() const;
01205 
01206     public:
01225         BM25Weight(double k1_, double k2_, double k3_, double b_,
01226                    double min_normlen_)
01227                 : k1(k1_), k2(k2_), k3(k3_), b(b_), min_normlen(min_normlen_),
01228                   weight_calculated(false)
01229         {
01230             if (k1 < 0) k1 = 0;
01231             if (k2 < 0) k2 = 0;
01232             if (k3 < 0) k3 = 0;
01233             if (b < 0) b = 0; else if (b > 1) b = 1;
01234         }
01235         BM25Weight() : k1(1), k2(0), k3(1), b(0.5), min_normlen(0.5),
01236                        weight_calculated(false) { }
01237 
01238         BM25Weight * clone() const;
01239         ~BM25Weight() { }
01240         std::string name() const;
01241         std::string serialise() const;
01242         BM25Weight * unserialise(const std::string & s) const;
01243         Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
01244         Xapian::weight get_maxpart() const;
01245 
01246         Xapian::weight get_sumextra(Xapian::doclength len) const;
01247         Xapian::weight get_maxextra() const;
01248 
01249         bool get_sumpart_needs_doclength() const;
01250 };
01251 
01269 class XAPIAN_VISIBILITY_DEFAULT TradWeight : public Weight {
01270     private:
01271         mutable Xapian::weight termweight;
01272         mutable Xapian::doclength lenpart;
01273 
01274         double param_k;
01275 
01276         mutable bool weight_calculated;
01277 
01278         void calc_termweight() const;
01279 
01280     public:
01288         explicit TradWeight(double k) : param_k(k), weight_calculated(false) {
01289             if (param_k < 0) param_k = 0;
01290         }
01291 
01292         TradWeight() : param_k(1.0), weight_calculated(false) { }
01293 
01294         TradWeight * clone() const;
01295         ~TradWeight() { }
01296         std::string name() const;
01297         std::string serialise() const;
01298         TradWeight * unserialise(const std::string & s) const;
01299 
01300         Xapian::weight get_sumpart(Xapian::termcount wdf, Xapian::doclength len) const;
01301         Xapian::weight get_maxpart() const;
01302 
01303         Xapian::weight get_sumextra(Xapian::doclength len) const;
01304         Xapian::weight get_maxextra() const;
01305 
01306         bool get_sumpart_needs_doclength() const;
01307 };
01308 
01309 }
01310 
01311 #endif /* XAPIAN_INCLUDED_ENQUIRE_H */

Documentation for Xapian (version 1.0.11).
Generated on 15 Mar 2009 by Doxygen 1.5.2.