Skip to content

Commit 37671c9

Browse files
committed
Merge branch 'pfalcon-modure'
2 parents 1ce916a + dd5ee9f commit 37671c9

14 files changed

+811
-0
lines changed

extmod/modure.c

Lines changed: 241 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,241 @@
1+
/*
2+
* This file is part of the Micro Python project, http://micropython.org/
3+
*
4+
* The MIT License (MIT)
5+
*
6+
* Copyright (c) 2014 Paul Sokolovsky
7+
*
8+
* Permission is hereby granted, free of charge, to any person obtaining a copy
9+
* of this software and associated documentation files (the "Software"), to deal
10+
* in the Software without restriction, including without limitation the rights
11+
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
12+
* copies of the Software, and to permit persons to whom the Software is
13+
* furnished to do so, subject to the following conditions:
14+
*
15+
* The above copyright notice and this permission notice shall be included in
16+
* all copies or substantial portions of the Software.
17+
*
18+
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
19+
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
20+
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
21+
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
22+
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
23+
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
24+
* THE SOFTWARE.
25+
*/
26+
27+
#include <stdio.h>
28+
#include <assert.h>
29+
#include <string.h>
30+
31+
#include "mpconfig.h"
32+
#include "nlr.h"
33+
#include "misc.h"
34+
#include "qstr.h"
35+
#include "obj.h"
36+
#include "runtime.h"
37+
#include "binary.h"
38+
39+
#if MICROPY_PY_URE
40+
41+
#include "re1.5/regexp.h"
42+
43+
#define FLAG_DEBUG 0x1000
44+
45+
typedef struct _mp_obj_re_t {
46+
mp_obj_base_t base;
47+
ByteProg re;
48+
} mp_obj_re_t;
49+
50+
typedef struct _mp_obj_match_t {
51+
mp_obj_base_t base;
52+
int num_matches;
53+
mp_obj_t str;
54+
const char *caps[0];
55+
} mp_obj_match_t;
56+
57+
58+
STATIC void match_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind) {
59+
mp_obj_match_t *self = self_in;
60+
print(env, "<match num=%d @%p>", self->num_matches);
61+
}
62+
63+
STATIC mp_obj_t match_group(mp_obj_t self_in, mp_obj_t no_in) {
64+
mp_obj_match_t *self = self_in;
65+
mp_int_t no = mp_obj_int_get(no_in);
66+
if (no < 0 || no >= self->num_matches / 2) {
67+
nlr_raise(mp_obj_new_exception_arg1(&mp_type_IndexError, no_in));
68+
}
69+
70+
const char *start = self->caps[no * 2];
71+
return mp_obj_new_str(start, self->caps[no * 2 + 1] - start, false);
72+
}
73+
MP_DEFINE_CONST_FUN_OBJ_2(match_group_obj, match_group);
74+
75+
STATIC const mp_map_elem_t match_locals_dict_table[] = {
76+
{ MP_OBJ_NEW_QSTR(MP_QSTR_group), (mp_obj_t) &match_group_obj },
77+
};
78+
79+
STATIC MP_DEFINE_CONST_DICT(match_locals_dict, match_locals_dict_table);
80+
81+
STATIC const mp_obj_type_t match_type = {
82+
{ &mp_type_type },
83+
.name = MP_QSTR_match,
84+
.print = match_print,
85+
.locals_dict = (mp_obj_t)&match_locals_dict,
86+
};
87+
88+
STATIC void re_print(void (*print)(void *env, const char *fmt, ...), void *env, mp_obj_t self_in, mp_print_kind_t kind) {
89+
mp_obj_re_t *self = self_in;
90+
print(env, "<re %p>", self);
91+
}
92+
93+
STATIC mp_obj_t re_exec(bool is_anchored, uint n_args, const mp_obj_t *args) {
94+
mp_obj_re_t *self = args[0];
95+
Subject subj;
96+
mp_uint_t len;
97+
subj.begin = mp_obj_str_get_data(args[1], &len);
98+
subj.end = subj.begin + len;
99+
int caps_num = (self->re.sub + 1) * 2;
100+
mp_obj_match_t *match = m_new_obj_var(mp_obj_match_t, char*, caps_num);
101+
int res = re1_5_recursiveloopprog(&self->re, &subj, match->caps, caps_num, is_anchored);
102+
if (res == 0) {
103+
m_del_var(mp_obj_match_t, char*, caps_num, match);
104+
return mp_const_none;
105+
}
106+
107+
match->base.type = &match_type;
108+
match->num_matches = caps_num;
109+
match->str = args[1];
110+
return match;
111+
}
112+
113+
STATIC mp_obj_t re_match(uint n_args, const mp_obj_t *args) {
114+
return re_exec(true, n_args, args);
115+
}
116+
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(re_match_obj, 2, 4, re_match);
117+
118+
STATIC mp_obj_t re_search(uint n_args, const mp_obj_t *args) {
119+
return re_exec(false, n_args, args);
120+
}
121+
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(re_search_obj, 2, 4, re_search);
122+
123+
STATIC mp_obj_t re_split(uint n_args, const mp_obj_t *args) {
124+
mp_obj_re_t *self = args[0];
125+
Subject subj;
126+
mp_uint_t len;
127+
subj.begin = mp_obj_str_get_data(args[1], &len);
128+
subj.end = subj.begin + len;
129+
int caps_num = (self->re.sub + 1) * 2;
130+
131+
int maxsplit = 0;
132+
if (n_args > 2) {
133+
maxsplit = mp_obj_int_get(args[2]);
134+
}
135+
136+
mp_obj_t retval = mp_obj_new_list(0, NULL);
137+
const char *caps[caps_num];
138+
while (true) {
139+
int res = re1_5_recursiveloopprog(&self->re, &subj, caps, caps_num, false);
140+
141+
// if we didn't have a match, or had an empty match, it's time to stop
142+
if (!res || caps[0] == caps[1]) {
143+
break;
144+
}
145+
146+
mp_obj_t s = mp_obj_new_str(subj.begin, caps[0] - subj.begin, false);
147+
mp_obj_list_append(retval, s);
148+
if (self->re.sub > 0) {
149+
mp_not_implemented("Splitting with sub-captures");
150+
}
151+
subj.begin = caps[1];
152+
if (maxsplit > 0 && --maxsplit == 0) {
153+
break;
154+
}
155+
}
156+
157+
mp_obj_t s = mp_obj_new_str(subj.begin, subj.end - subj.begin, false);
158+
mp_obj_list_append(retval, s);
159+
return retval;
160+
}
161+
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(re_split_obj, 2, 3, re_split);
162+
163+
STATIC const mp_map_elem_t re_locals_dict_table[] = {
164+
{ MP_OBJ_NEW_QSTR(MP_QSTR_match), (mp_obj_t) &re_match_obj },
165+
{ MP_OBJ_NEW_QSTR(MP_QSTR_search), (mp_obj_t) &re_search_obj },
166+
{ MP_OBJ_NEW_QSTR(MP_QSTR_split), (mp_obj_t) &re_split_obj },
167+
};
168+
169+
STATIC MP_DEFINE_CONST_DICT(re_locals_dict, re_locals_dict_table);
170+
171+
STATIC const mp_obj_type_t re_type = {
172+
{ &mp_type_type },
173+
.name = MP_QSTR_ure,
174+
.print = re_print,
175+
.locals_dict = (mp_obj_t)&re_locals_dict,
176+
};
177+
178+
mp_obj_t mod_re_compile(uint n_args, const mp_obj_t *args) {
179+
const char *re_str = mp_obj_str_get_str(args[0]);
180+
int size = re1_5_sizecode(re_str);
181+
mp_obj_re_t *o = m_new_obj_var(mp_obj_re_t, char, size);
182+
o->base.type = &re_type;
183+
int flags = 0;
184+
if (n_args > 1) {
185+
flags = mp_obj_get_int(args[1]);
186+
}
187+
int error = re1_5_compilecode(&o->re, re_str);
188+
if (error != 0) {
189+
nlr_raise(mp_obj_new_exception_msg(&mp_type_ValueError, "Error in regex"));
190+
}
191+
if (flags & FLAG_DEBUG) {
192+
re1_5_dumpcode(&o->re);
193+
}
194+
return o;
195+
}
196+
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mod_re_compile_obj, 1, 2, mod_re_compile);
197+
198+
STATIC mp_obj_t mod_re_exec(bool is_anchored, uint n_args, const mp_obj_t *args) {
199+
mp_obj_re_t *self = mod_re_compile(1, args);
200+
201+
const mp_obj_t args2[] = {self, args[1]};
202+
mp_obj_match_t *match = re_exec(is_anchored, 2, args2);
203+
return match;
204+
}
205+
206+
STATIC mp_obj_t mod_re_match(uint n_args, const mp_obj_t *args) {
207+
return mod_re_exec(true, n_args, args);
208+
}
209+
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mod_re_match_obj, 2, 4, mod_re_match);
210+
211+
STATIC mp_obj_t mod_re_search(uint n_args, const mp_obj_t *args) {
212+
return mod_re_exec(false, n_args, args);
213+
}
214+
MP_DEFINE_CONST_FUN_OBJ_VAR_BETWEEN(mod_re_search_obj, 2, 4, mod_re_search);
215+
216+
STATIC const mp_map_elem_t mp_module_re_globals_table[] = {
217+
{ MP_OBJ_NEW_QSTR(MP_QSTR___name__), MP_OBJ_NEW_QSTR(MP_QSTR_ure) },
218+
{ MP_OBJ_NEW_QSTR(MP_QSTR_compile), (mp_obj_t)&mod_re_compile_obj },
219+
{ MP_OBJ_NEW_QSTR(MP_QSTR_match), (mp_obj_t)&mod_re_match_obj },
220+
{ MP_OBJ_NEW_QSTR(MP_QSTR_search), (mp_obj_t)&mod_re_search_obj },
221+
{ MP_OBJ_NEW_QSTR(MP_QSTR_DEBUG), MP_OBJ_NEW_SMALL_INT(FLAG_DEBUG) },
222+
};
223+
224+
STATIC const mp_obj_dict_t mp_module_re_globals = {
225+
.base = {&mp_type_dict},
226+
.map = {
227+
.all_keys_are_qstrs = 1,
228+
.table_is_fixed_array = 1,
229+
.used = MP_ARRAY_SIZE(mp_module_re_globals_table),
230+
.alloc = MP_ARRAY_SIZE(mp_module_re_globals_table),
231+
.table = (mp_map_elem_t*)mp_module_re_globals_table,
232+
},
233+
};
234+
235+
const mp_obj_module_t mp_module_ure = {
236+
.base = { &mp_type_module },
237+
.name = MP_QSTR_ure,
238+
.globals = (mp_obj_dict_t*)&mp_module_re_globals,
239+
};
240+
241+
#endif //MICROPY_PY_URE

0 commit comments

Comments
 (0)