00001
00002 #include <xapian.h>
00003 #include <ept/core/apt.h>
00004
00005 #ifndef EPT_XAPIAN_H
00006 #define EPT_XAPIAN_H
00007
00008 namespace ept {
00009 namespace core {
00010 namespace xapian {
00011
00012
00013 const Xapian::valueno VAL_APT_INSTALLED_SIZE = 1;
00014 const Xapian::valueno VAL_APT_PACKAGE_SIZE = 2;
00015 const Xapian::valueno VAL_POPCON = 10;
00016 const Xapian::valueno VAL_ITERATING_RATING = 20;
00017 const Xapian::valueno VAL_ITERATING_FUNCTIONALITY = 21;
00018 const Xapian::valueno VAL_ITERATING_USABILITY = 22;
00019 const Xapian::valueno VAL_ITERATING_SECURITY = 23;
00020 const Xapian::valueno VAL_ITERATING_PERFORMANCE = 24;
00021 const Xapian::valueno VAL_ITERATING_QUALITY = 25;
00022 const Xapian::valueno VAL_ITERATING_SUPPORT = 26;
00023 const Xapian::valueno VAL_ITERATING_ADOPTION = 27;
00024
00025 struct List {
00026 char m_enqPlace[sizeof(Xapian::Enquire)];
00027 mutable Xapian::MSet m_matches;
00028 mutable Xapian::MSet::const_iterator m_iter;
00029 mutable int m_pos;
00030 typedef List Type;
00031
00032 static const int chunkSize = 20;
00033
00034 List head() const {
00035 seek();
00036 return *this;
00037 }
00038
00039 Token token() {
00040 Token t;
00041 t._id = m_iter.get_document().get_data();
00042 return t;
00043 }
00044
00045 void seek() const {
00046 if ( m_matches.size() == chunkSize && m_iter == m_matches.end() ) {
00047 m_matches = enq().get_mset( m_pos, chunkSize );
00048 m_iter = m_matches.begin();
00049 m_pos += chunkSize;
00050 }
00051 }
00052
00053 bool empty() const {
00054 if ( m_pos == -1 )
00055 return true;
00056 seek();
00057 return m_matches.size() < 30 && m_iter == m_matches.end();
00058 }
00059
00060 List tail() const {
00061 List t = *this;
00062 t.seek();
00063 t.m_iter ++;
00064 return t;
00065 }
00066
00067 Xapian::Enquire const &enq() const {
00068 return *reinterpret_cast< Xapian::Enquire const * >( m_enqPlace );
00069 }
00070
00071 List( Xapian::Enquire _enq )
00072 {
00073 Xapian::Enquire *e = new (m_enqPlace) Xapian::Enquire( _enq );
00074 assert_eq( e, &enq() );
00075 m_matches = enq().get_mset( 0, chunkSize );
00076 m_iter = m_matches.begin();
00077 m_pos = chunkSize;
00078 }
00079
00080 List() {}
00081 };
00082
00083 struct Source
00084 {
00085 protected:
00086 time_t m_timestamp;
00087 Xapian::Database m_db;
00088 Xapian::Stem m_stem;
00089
00091 static std::string toLower(const std::string& str);
00092
00099 void normalize_and_add(Xapian::Document& doc, const std::string& term,
00100 int& pos) const;
00101
00102 public:
00103 Source();
00104
00106 Xapian::Database& db() { return m_db; }
00107
00109 const Xapian::Database& db() const { return m_db; }
00110
00112 time_t timestamp() const { return m_timestamp; }
00113
00115 bool hasData() const { return m_timestamp > 0; }
00116
00117 List query( const std::string &s, int qualityCutoff = 50 ) {
00118 Xapian::Enquire enq( db() );
00119 enq.set_query( makeORQuery( s ) );
00120 Xapian::MSet first = enq.get_mset(0, 1, 0, 0, 0);
00121 Xapian::MSetIterator ifirst = first.begin();
00122 if ( ifirst != first.end() ) {
00123 Xapian::percent cutoff = ifirst.get_percent() * qualityCutoff / 100;
00124 enq.set_cutoff(cutoff);
00125 }
00126 return List( enq );
00127 }
00128
00129 List partialQuery( const std::string &s ) {
00130 Xapian::Enquire enq( db() );
00131 enq.set_query( makePartialORQuery( s ) );
00132 return List( enq );
00133 }
00134
00136
00137
00138 Xapian::docid docidByName(const std::string& pkgname) const;
00139
00143 Xapian::Query makeORQuery(const std::string& keywords) const;
00144
00151 Xapian::Query makePartialORQuery(const std::string& keywords) const;
00152
00156 template<typename ITER>
00157 Xapian::Query makeORQuery(const ITER& begin, const ITER& end) const
00158 {
00159 std::vector<std::string> terms;
00160
00161 for (ITER i = begin; i != end; ++i)
00162 {
00163 std::string t = toLower(*i);
00164 std::string s = m_stem(t);
00165 terms.push_back(t);
00166 if (s != t)
00167 terms.push_back(s);
00168 }
00169 return Xapian::Query(Xapian::Query::OP_OR, terms.begin(), terms.end());
00170 }
00171
00173 std::vector<std::string> expand(Xapian::Enquire& enq) const;
00174
00175
00176
00180 Xapian::Query makeRelatedQuery(const std::string& pkgname) const;
00181
00185 double getDoubleValue(const std::string& pkgname,
00186 Xapian::valueno val_id) const;
00187
00191 int getIntValue(const std::string& pkgname, Xapian::valueno val_id) const;
00192 };
00193
00194 }
00195 }
00196 }
00197
00198 #endif