/*
* call-seq:
* StemFilter.new(token_stream) -> token_stream
* StemFilter.new(token_stream,
* algorithm="english",
* encoding="UTF-8") -> token_stream
*
* Create an StemFilter which uses a snowball stemmer (thank you Martin
* Porter) to stem words. You can optionally specify the algorithm (default:
* "english") and encoding (default: "UTF-8").
*
* token_stream:: TokenStream to be filtered
* algorithm:: The algorithm (or language) to use
* encoding:: The encoding of the data (default: "UTF-8")
*/
static VALUE
frt_stem_filter_init(int argc, VALUE *argv, VALUE self)
{
VALUE rsub_ts, ralgorithm, rcharenc;
char *algorithm = "english";
char *charenc = NULL;
TokenStream *ts;
rb_scan_args(argc, argv, "12", &rsub_ts, &ralgorithm, &rcharenc);
ts = frt_get_cwrapped_rts(rsub_ts);
switch (argc) {
case 3: charenc = rs2s(rb_obj_as_string(rcharenc));
case 2: algorithm = rs2s(rb_obj_as_string(ralgorithm));
}
ts = stem_filter_new(ts, algorithm, charenc);
object_add(&(TkFilt(ts)->sub_ts), rsub_ts);
Frt_Wrap_Struct(self, &frt_tf_mark, &frt_tf_free, ts);
object_add(ts, self);
return self;
}