/*
* call-seq:
* FuzzyQuery.new(field, term, options = {}) -> fuzzy-query
*
* Create a new FuzzyQuery that will match terms with a similarity of at
* least +:min_similarity+ to +term+. Similarity is scored using the
* Levenshtein edit distance formula. See
* http://en.wikipedia.org/wiki/Levenshtein_distance
*
* If a +:prefix_length+ > 0 is specified, a common prefix of that length is
* also required.
*
* You can also set +:max_terms+ to prevent memory overflow problems. By
* default it is set to 512.
*
* == Example
*
* FuzzyQuery.new(:content, "levenshtein",
* :min_similarity => 0.8,
* :prefix_length => 5,
* :max_terms => 1024)
*
* field:: field to search
* term:: term to search for including it's close matches
* :min_similarity:: Default: 0.5. minimum levenshtein distance score for a
* match
* :prefix_length:: Default: 0. minimum prefix_match before levenshtein
* distance is measured. This parameter is used to improve
* performance. With a +:prefix_length+ of 0, all terms in
* the index must be checked which can be quite a
* performance hit. By setting the prefix length to a
* larger number you minimize the number of terms that need
* to be checked. Even 1 will cut down the work by a
* factor of about 26 depending on your character set and
* the first letter.
* :max_terms:: Limits the number of terms that can be added to the
* query when it is expanded as a MultiTermQuery. This is
* not usually a problem with FuzzyQueries unless you set
* +:min_similarity+ to a very low value.
*/
static VALUE
frt_fq_init(int argc, VALUE *argv, VALUE self)
{
Query *q;
VALUE rfield, rterm, roptions;
float min_sim =
(float)NUM2DBL(rb_cvar_get(cFuzzyQuery, id_default_min_similarity));
int pre_len =
FIX2INT(rb_cvar_get(cFuzzyQuery, id_default_prefix_length));
int max_terms =
FIX2INT(rb_cvar_get(cMultiTermQuery, id_default_max_terms));
if (rb_scan_args(argc, argv, "21", &rfield, &rterm, &roptions) >= 3) {
VALUE v;
Check_Type(roptions, T_HASH);
if (Qnil != (v = rb_hash_aref(roptions, sym_prefix_length))) {
pre_len = FIX2INT(v);
}
if (Qnil != (v = rb_hash_aref(roptions, sym_min_similarity))) {
min_sim = (float)NUM2DBL(v);
}
if (Qnil != (v = rb_hash_aref(roptions, sym_max_terms))) {
max_terms = FIX2INT(v);
}
}
if (min_sim >= 1.0) {
rb_raise(rb_eArgError,
"%f >= 1.0. :min_similarity must be < 1.0", min_sim);
} else if (min_sim < 0.0) {
rb_raise(rb_eArgError,
"%f < 0.0. :min_similarity must be > 0.0", min_sim);
}
if (pre_len < 0) {
rb_raise(rb_eArgError,
"%d < 0. :prefix_length must be >= 0", pre_len);
}
if (max_terms < 0) {
rb_raise(rb_eArgError,
"%d < 0. :max_terms must be >= 0", max_terms);
}
q = fuzq_new_conf(frt_field(rfield), StringValuePtr(rterm),
min_sim, pre_len, max_terms);
Frt_Wrap_Struct(self, NULL, &frt_q_free, q);
object_add(q, self);
return self;
}