-
-
Notifications
You must be signed in to change notification settings - Fork 2
/
Copy pathlinguist.c
45 lines (35 loc) · 1.01 KB
/
linguist.c
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
#include "ruby.h"
#include "lex.linguist_yy.h"
int linguist_yywrap(yyscan_t yyscanner) {
return 1;
}
static VALUE rb_tokenizer_extract_tokens(VALUE self, VALUE rb_data) {
YY_BUFFER_STATE buf;
yyscan_t scanner;
VALUE extra;
VALUE ary;
long len;
int r;
Check_Type(rb_data, T_STRING);
len = RSTRING_LEN(rb_data);
if (len > 100000)
len = 100000;
linguist_yylex_init_extra(&extra, &scanner);
buf = linguist_yy_scan_bytes(RSTRING_PTR(rb_data), (int) len, scanner);
ary = rb_ary_new();
do {
extra = 0;
r = linguist_yylex(scanner);
if (extra) {
rb_ary_push(ary, extra);
}
} while (r);
linguist_yy_delete_buffer(buf, scanner);
linguist_yylex_destroy(scanner);
return ary;
}
__attribute__((visibility("default"))) void Init_linguist() {
VALUE rb_mLinguist = rb_define_module("Linguist");
VALUE rb_cTokenizer = rb_define_class_under(rb_mLinguist, "Tokenizer", rb_cObject);
rb_define_method(rb_cTokenizer, "extract_tokens", rb_tokenizer_extract_tokens, 1);
}