1 : /** \file query.h
2 : * \brief Classes for representing a query
3 : */
4 : /* Copyright 1999,2000,2001 BrightStation PLC
5 : * Copyright 2002 Ananova Ltd
6 : * Copyright 2003,2004,2005,2006,2007 Olly Betts
7 : * Copyright 2006,2007,2008 Lemur Consulting Ltd
8 : *
9 : * This program is free software; you can redistribute it and/or
10 : * modify it under the terms of the GNU General Public License as
11 : * published by the Free Software Foundation; either version 2 of the
12 : * License, or (at your option) any later version.
13 : *
14 : * This program is distributed in the hope that it will be useful,
15 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
16 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
17 : * GNU General Public License for more details.
18 : *
19 : * You should have received a copy of the GNU General Public License
20 : * along with this program; if not, write to the Free Software
21 : * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301
22 : * USA
23 : */
24 :
25 : #ifndef XAPIAN_INCLUDED_QUERY_H
26 : #define XAPIAN_INCLUDED_QUERY_H
27 :
28 : #include <string>
29 : #include <vector>
30 :
31 : #include <xapian/base.h>
32 : #include <xapian/deprecated.h>
33 : #include <xapian/types.h>
34 : #include <xapian/termiterator.h>
35 : #include <xapian/visibility.h>
36 :
37 : // FIXME: sort this out so we avoid exposing Xapian::Query::Internal
38 : // - we need to at present so that the Xapian::Query's template ctors
39 : // compile.
40 : class LocalSubMatch;
41 : class MultiMatch;
42 : class QueryOptimiser;
43 : struct SortPosName;
44 :
45 : namespace Xapian {
46 :
47 : /** Class representing a query.
48 : *
49 : * Queries are represented as a tree of objects.
50 : */
51 : class XAPIAN_VISIBILITY_DEFAULT Query {
52 : public:
53 : /// Class holding details of the query
54 : class Internal;
55 : /// @private @internal Reference counted internals.
56 : Xapian::Internal::RefCntPtr<Internal> internal;
57 :
58 : /// Enum of possible query operations
59 : typedef enum {
60 : /// Return iff both subqueries are satisfied
61 : OP_AND,
62 :
63 : /// Return if either subquery is satisfied
64 : OP_OR,
65 :
66 : /// Return if left but not right satisfied
67 : OP_AND_NOT,
68 :
69 : /// Return if one query satisfied, but not both
70 : OP_XOR,
71 :
72 : /// Return iff left satisfied, but use weights from both
73 : OP_AND_MAYBE,
74 :
75 : /// As AND, but use only weights from left subquery
76 : OP_FILTER,
77 :
78 : /** Find occurrences of a list of terms with all the terms
79 : * occurring within a specified window of positions.
80 : * Each occurrence of a term must be at a different position,
81 : * but the order they appear in is irrelevant.
82 : *
83 : * The window parameter should be specified for this operation,
84 : * but will default to the number of terms in the list.
85 : */
86 : OP_NEAR,
87 :
88 : /** Find occurrences of a list of terms with all the terms
89 : * occurring within a specified window of positions, and all
90 : * the terms appearing in the order specified. Each occurrence
91 : * of a term must be at a different position.
92 : *
93 : * The window parameter should be specified for this operation,
94 : * but will default to the number of terms in the list.
95 : */
96 : OP_PHRASE,
97 :
98 : /** Filter by a range test on a document value. */
99 : OP_VALUE_RANGE,
100 :
101 : /** Scale the weight of a subquery by the specified factor.
102 : *
103 : * A factor of 0 means this subquery will contribute no weight to
104 : * the query - it will act as a purely boolean subquery.
105 : *
106 : * If the factor is negative, Xapian::InvalidArgumentError will
107 : * be thrown.
108 : */
109 : OP_SCALE_WEIGHT,
110 :
111 : /** Select an elite set from the subqueries, and perform
112 : * a query with these combined as an OR query.
113 : */
114 : OP_ELITE_SET,
115 :
116 : /** Filter by a greater-than-or-equal test on a document value. */
117 : OP_VALUE_GE,
118 :
119 : /** Filter by a less-than-or-equal test on a document value. */
120 : OP_VALUE_LE
121 : } op;
122 :
123 : /** Copy constructor. */
124 : Query(const Query & copyme);
125 :
126 : /** Assignment. */
127 : Query & operator=(const Query & copyme);
128 :
129 : /** Default constructor: makes an empty query which matches no
130 : * documents.
131 : *
132 : * Also useful for defining a Query object to be assigned to later.
133 : *
134 : * An exception will be thrown if an attempt is made to use an
135 : * undefined query when building up a composite query.
136 : */
137 : Query();
138 :
139 : /** Destructor. */
140 : ~Query();
141 :
142 : /** A query consisting of a single term. */
143 : Query(const std::string & tname_, Xapian::termcount wqf_ = 1,
144 : Xapian::termpos pos_ = 0);
145 :
146 : /** A query consisting of two subqueries, opp-ed together. */
147 : Query(Query::op op_, const Query & left, const Query & right);
148 :
149 : /** A query consisting of two termnames opp-ed together. */
150 : Query(Query::op op_,
151 : const std::string & left, const std::string & right);
152 :
153 : /** Combine a number of Xapian::Query-s with the specified operator.
154 : *
155 : * The Xapian::Query objects are specified with begin and end
156 : * iterators.
157 : *
158 : * AND, OR, NEAR and PHRASE can take any number of subqueries.
159 : * Other operators take exactly two subqueries.
160 : *
161 : * The iterators may be to Xapian::Query objects, pointers to
162 : * Xapian::Query objects, or termnames (std::string-s).
163 : *
164 : * For NEAR and PHRASE, a window size can be specified in parameter.
165 : *
166 : * For ELITE_SET, the elite set size can be specified in parameter.
167 : */
168 : template <class Iterator>
169 : Query(Query::op op_, Iterator qbegin, Iterator qend,
170 : Xapian::termcount parameter = 0);
171 :
172 : /** Apply the specified operator to a single Xapian::Query object.
173 : *
174 : * @deprecated This method is deprecated because it isn't useful,
175 : * since none of the current query operators can be usefully
176 : * applied to a single subquery with a parameter value.
177 : */
178 : XAPIAN_DEPRECATED(Query(Query::op op_, Xapian::Query q));
179 :
180 : /** Apply the specified operator to a single Xapian::Query object, with
181 : * a double parameter.
182 : */
183 : Query(Query::op op_, Xapian::Query q, double parameter);
184 :
185 : /** Construct a value range query on a document value.
186 : *
187 : * A value range query matches those documents which have a value
188 : * stored in the slot given by @a valno which is in the range
189 : * specified by @a begin and @a end (in lexicographical
190 : * order), including the endpoints.
191 : *
192 : * @param op_ The operator to use for the query. Currently, must
193 : * be OP_VALUE_RANGE.
194 : * @param valno The slot number to get the value from.
195 : * @param begin The start of the range.
196 : * @param end The end of the range.
197 : */
198 : Query(Query::op op_, Xapian::valueno valno,
199 : const std::string &begin, const std::string &end);
200 :
201 : /** Construct a value comparison query on a document value.
202 : *
203 : * This query matches those documents which have a value stored in the
204 : * slot given by @a valno which compares, as specified by the
205 : * operator, to @a value.
206 : *
207 : * @param op_ The operator to use for the query. Currently, must
208 : * be OP_VALUE_GE or OP_VALUE_LE.
209 : * @param valno The slot number to get the value from.
210 : * @param value The value to compare.
211 : */
212 : Query(Query::op op_, Xapian::valueno valno, const std::string &value);
213 :
214 : /** A query which matches all documents in the database. */
215 : static Xapian::Query MatchAll;
216 :
217 : /** A query which matches no documents. */
218 : static Xapian::Query MatchNothing;
219 :
220 : /** Get the length of the query, used by some ranking formulae.
221 : * This value is calculated automatically - if you want to override
222 : * it you can pass a different value to Enquire::set_query().
223 : */
224 : Xapian::termcount get_length() const;
225 :
226 : /** Return a Xapian::TermIterator returning all the terms in the query,
227 : * in order of termpos. If multiple terms have the same term
228 : * position, their order is unspecified. Duplicates (same term and
229 : * termpos) will be removed.
230 : */
231 : TermIterator get_terms_begin() const;
232 :
233 : /** Return a Xapian::TermIterator to the end of the list of terms in the
234 : * query.
235 : */
236 : TermIterator get_terms_end() const {
237 : return TermIterator(NULL);
238 : }
239 :
240 : /** Test if the query is empty (i.e. was constructed using
241 : * the default ctor or with an empty iterator ctor).
242 : */
243 : bool empty() const;
244 :
245 : /// Return a string describing this object.
246 : std::string get_description() const;
247 :
248 : private:
249 : void add_subquery(const Query & subq);
250 : void add_subquery(const Query * subq);
251 : void add_subquery(const std::string & tname);
252 : void start_construction(Query::op op_, Xapian::termcount parameter);
253 : void end_construction();
254 : void abort_construction();
255 : };
256 :
257 : template <class Iterator>
258 19 : Query::Query(Query::op op_, Iterator qbegin, Iterator qend, termcount parameter)
259 19 : : internal(0)
260 : {
261 : try {
262 19 : start_construction(op_, parameter);
263 :
264 : /* Add all the elements */
265 61 : while (qbegin != qend) {
266 23 : add_subquery(*qbegin);
267 23 : ++qbegin;
268 : }
269 :
270 19 : end_construction();
271 0 : } catch (...) {
272 0 : abort_construction();
273 0 : throw;
274 : }
275 19 : }
276 :
277 : /// @internal Internal class, implementing most of Xapian::Query.
278 : class XAPIAN_VISIBILITY_DEFAULT Query::Internal : public Xapian::Internal::RefCntBase {
279 : friend class ::LocalSubMatch;
280 : friend class ::MultiMatch;
281 : friend class ::QueryOptimiser;
282 : friend struct ::SortPosName;
283 : friend class Query;
284 : public:
285 : static const int OP_LEAF = -1;
286 :
287 : /// The container type for storing pointers to subqueries
288 : typedef std::vector<Internal *> subquery_list;
289 :
290 : /// Type storing the operation
291 : typedef int op_t;
292 :
293 : private:
294 : /// Operation to be performed at this node
295 : Xapian::Query::Internal::op_t op;
296 :
297 : /// Sub queries on which to perform operation
298 : subquery_list subqs;
299 :
300 : /** For NEAR or PHRASE, how close terms must be to match: all terms
301 : * within the operation must occur in a window of this size.
302 : *
303 : * For ELITE_SET, the number of terms to select from those specified.
304 : *
305 : * For RANGE, the value number to apply the range test to.
306 : */
307 : Xapian::termcount parameter;
308 :
309 : /** Term that this node represents, or start of a range query.
310 : *
311 : * For a leaf node, this holds the term name. For an OP_VALUE_RANGE
312 : * query this holds the start of the range. For an OP_VALUE_GE or
313 : * OP_VALUE_LE query this holds the value to compare against.
314 : */
315 : std::string tname;
316 :
317 : /** Used to store the end of a range query. */
318 : std::string str_parameter;
319 :
320 : /// Position in query of this term - leaf node only
321 : Xapian::termpos term_pos;
322 :
323 : /// Within query frequency of this term - leaf node only
324 : Xapian::termcount wqf;
325 :
326 : /** swap the contents of this with another Xapian::Query::Internal,
327 : * in a way which is guaranteed not to throw. This is
328 : * used with the assignment operator to make it exception
329 : * safe.
330 : * It's important to adjust swap with any addition of
331 : * member variables!
332 : */
333 : void swap(Query::Internal &other);
334 :
335 : /// Copy another Xapian::Query::Internal into self.
336 : void initialise_from_copy(const Query::Internal & copyme);
337 :
338 : void accumulate_terms(
339 : std::vector<std::pair<std::string, Xapian::termpos> > &terms) const;
340 :
341 : /** Simplify the query.
342 : * For example, an AND query with only one subquery would become the
343 : * subquery itself.
344 : */
345 : Internal * simplify_query();
346 :
347 : /** Perform checks that query is valid. (eg, has correct number of
348 : * sub queries.) Throw an exception if not. This is initially called
349 : * on the query before any simplifications have been made, and after
350 : * simplications.
351 : */
352 : void validate_query() const;
353 :
354 : /** Simplify any matchnothing subqueries, either eliminating them,
355 : * or setting this query to matchnothing, depending on the query
356 : * operator. Returns true if simplification resulted in a
357 : * matchnothing query.
358 : */
359 : bool simplify_matchnothing();
360 :
361 : /** Get a string describing the given query type.
362 : */
363 : static std::string get_op_name(Xapian::Query::Internal::op_t op);
364 :
365 : /** Collapse the subqueries together if appropriate.
366 : */
367 : void collapse_subqs();
368 :
369 : /** Flatten a query structure, by changing, for example,
370 : * "A NEAR (B AND C)" to "(A NEAR B) AND (A NEAR C)"
371 : */
372 : void flatten_subqs();
373 :
374 : /** Implementation of serialisation; called recursively.
375 : */
376 : std::string serialise(Xapian::termpos & curpos) const;
377 :
378 : public:
379 : /** Copy constructor. */
380 : Internal(const Query::Internal & copyme);
381 :
382 : /** Assignment. */
383 : void operator=(const Query::Internal & copyme);
384 :
385 : /** A query consisting of a single term. */
386 : explicit Internal(const std::string & tname_, Xapian::termcount wqf_ = 1,
387 : Xapian::termpos term_pos_ = 0);
388 :
389 : /** Create internals given only the operator and a parameter. */
390 : Internal(op_t op_, Xapian::termcount parameter);
391 :
392 : /** Construct a range query on a document value. */
393 : Internal(op_t op_, Xapian::valueno valno,
394 : const std::string &begin, const std::string &end);
395 :
396 : /** Construct a value greater-than-or-equal query on a document value.
397 : */
398 : Internal(op_t op_, Xapian::valueno valno, const std::string &value);
399 :
400 : /** Destructor. */
401 : ~Internal();
402 :
403 : static Xapian::Query::Internal * unserialise(const std::string &s);
404 :
405 : /** Add a subquery.
406 : */
407 : void add_subquery(const Query::Internal * subq);
408 :
409 : void set_dbl_parameter(double dbl_parameter_);
410 :
411 : double get_dbl_parameter() const;
412 :
413 : /** Finish off the construction.
414 : */
415 : Query::Internal * end_construction();
416 :
417 : /** Return a string in an easily parsed form
418 : * which contains all the information in a query.
419 : */
420 : std::string serialise() const {
421 : Xapian::termpos curpos = 1;
422 : return serialise(curpos);
423 : }
424 :
425 : /// Return a string describing this object.
426 : std::string get_description() const;
427 :
428 : /** Get the numeric parameter used in this query.
429 : *
430 : * This is used by the QueryParser to get the value number for
431 : * VALUE_RANGE queries. It should be replaced by a public method on
432 : * the Query class at some point, but the API which should be used for
433 : * that is unclear, so this is a temporary workaround.
434 : */
435 : Xapian::termcount get_parameter() const { return parameter; }
436 :
437 : /** Get the length of the query, used by some ranking formulae.
438 : * This value is calculated automatically - if you want to override
439 : * it you can pass a different value to Enquire::set_query().
440 : */
441 : Xapian::termcount get_length() const;
442 :
443 : /** Return an iterator over all the terms in the query,
444 : * in order of termpos. If multiple terms have the same term
445 : * position, their order is unspecified. Duplicates (same term and
446 : * termpos) will be removed.
447 : */
448 : TermIterator get_terms() const;
449 : };
450 :
451 : }
452 :
453 : #endif /* XAPIAN_INCLUDED_QUERY_H */
|