Crossfire Server, Trunk
re-cmp.cpp
Go to the documentation of this file.
1 /*
2  * Crossfire -- cooperative multi-player graphical RPG and adventure game
3  *
4  * Copyright (c) 1999-2014 Mark Wedel and the Crossfire Development Team
5  * Copyright (c) 1992 Frank Tore Johansen
6  *
7  * Crossfire is free software and comes with ABSOLUTELY NO WARRANTY. You are
8  * welcome to redistribute it under certain conditions. For details, please
9  * see COPYING and LICENSE.
10  *
11  * The authors can be reached via e-mail at <crossfire@metalforge.org>.
12  */
13 
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <limits.h>
34 #include <re-cmp.h>
35 #include <ctype.h>
36 #include <global.h>
37 #include <define.h> /* Needed for OUT_OF_MEMORY. */
38 
39 /* P r o t o t y p e s
40  */
41 const char *re_cmp(const char *, const char *);
42 static Boolean re_cmp_step(const char *, const char *, unsigned, int);
43 static void re_init(void);
45 static const char *re_get_token(selection *, const char *);
46 
47 /* G l o b a l v a r i a b l e s
48  */
51 static const char *re_substr[RE_TOKEN_MAX];
52 static unsigned int re_token_depth;
53 
54 /* E x t e r n a l f u n c t i o n
55  */
56 
68 const char *re_cmp(const char *str, const char *regexp) {
69  const char *next_regexp;
70  Boolean once = False;
71  Boolean matched;
72 
73  if (re_init_done == False)
74  re_init();
75 
76 #ifdef SAFE_CHECKS
77  if (regexp == NULL || str == NULL)
78  return NULL;
79 #endif
80  if (*regexp == '^') {
81  once = True;
82  ++regexp;
83  }
84  if (*regexp == 0) {
85  /* // or /^/ matches any string */
86  return str;
87  }
88 
89  next_regexp = re_get_token(re_token[0], regexp);
90  re_token_depth = 0;
91  re_substr[0] = next_regexp;
92 
93  matched = False;
94  while (*str != '\0' && !(matched = re_match_token(*str, re_token[0])))
95  str++;
96 
97  if (matched && *next_regexp == 0)
98  return str;
99 
100  /* Apologies for the nearly duplicated code below, hopefully it
101  * speeds things up.
102  */
103  if (once) {
104  switch (re_token[0]->repeat) {
105  case rep_once:
106  if (matched == False)
107  return NULL;
108  break;
109 
110  case rep_once_or_more:
111  if (matched == False)
112  return NULL;
113 
114  if (re_cmp_step(str+1, regexp, 0, 1))
115  return str;
116  break;
117 
118  case rep_null_or_once:
119  if (matched == False)
120  return re_cmp_step(str, next_regexp, 1, 0) ? str : NULL;
121  break;
122 
123  case rep_null_or_more:
124  if (matched) {
125  if (re_cmp_step(str+1, regexp, 0, 1))
126  return str;
127  } else {
128  return re_cmp_step(str, next_regexp, 1, 0) ? str : NULL;
129  }
130  break;
131  }
132 
133  return re_cmp_step(str+1, next_regexp, 1, 0) ? str : NULL;
134  }
135 
136  if (matched) {
137  switch (re_token[0]->repeat) {
138  case rep_once:
139  case rep_null_or_once:
140  break;
141 
142  case rep_once_or_more:
143  case rep_null_or_more:
144  if (re_cmp_step(str+1, regexp, 0, 1))
145  return str;
146  break;
147  }
148 
149  /* The logic here is that re_match_token only sees
150  * if the one letter matches. Thus, if the
151  * regex is like '@match eureca', and the
152  * the user enters anything with an e, re_match_token
153  * returns true, but they really need to match the
154  * entire regexp, which re_cmp_step will do.
155  * However, what happens is that there can be a case
156  * where the string being match is something like
157  * 'where is eureca'. In this case, the re_match_token
158  * matches that first e, but the re_cmp_step below,
159  * fails because the next character (r) doesn't match
160  * the u. So we call re_cmp with the string
161  * after the first r, so that it should hopefully match
162  * up properly.
163  */
164  if (re_cmp_step(str+1, next_regexp, 1, 0))
165  return str;
166  else if (*(str+1) != 0)
167  return re_cmp(str+1, regexp);
168  }
169  return NULL;
170 }
171 
172 /* A u x i l l i a r y f u n c t i o n s
173  */
174 
189 static Boolean re_cmp_step(const char *str, const char *regexp, unsigned slot, int matches) {
190  const char *next_regexp;
191  Boolean matched;
192 
193 #ifdef DEBUG
194 /* fprintf(stderr, "['%s', '%s', %u, %d]\n", str, regexp, slot, matches);*/
195 #endif
196 
197  if (*regexp == 0) {
198  /* When we reach the end of the regexp, the match is a success */
199  return True;
200  }
201 
202  /* This chunk of code makes sure that the regexp-tokenising happens
203  * only once. We only tokenise as much as we need.
204  */
205  if (slot > re_token_depth) {
206  re_token_depth = slot;
207  if (re_token[slot] == NULL)
208  re_token[slot] = (selection *)malloc(sizeof(selection));
209  next_regexp = re_get_token(re_token[slot], regexp);
210  if (next_regexp == NULL) {
211  /* Syntax error, what else can we do? */
212  return False;
213  }
214  re_substr[slot] = next_regexp;
215  } else {
216  next_regexp = re_substr[slot];
217  }
218 
219  matched = re_match_token(*str, re_token[slot]);
220  if (matched)
221  ++matches;
222 
223  if (*str == 0)
224  return (*next_regexp == 0 || re_token[slot]->type == sel_end) && matched;
225 
226  switch (re_token[slot]->repeat) {
227  case rep_once:
228  if (matches == 1) { /* (matches == 1) => (matched == True) */
229  return re_cmp_step(str+1, next_regexp, slot+1, 0);
230  }
231  return False;
232 
233  case rep_once_or_more:
234  if (matched) { /* (matched == True) => (matches >= 1) */
235  /* First check if the current token repeats more */
236  if (re_cmp_step(str+1, regexp, slot, matches))
237  return True;
238  return re_cmp_step(str+1, next_regexp, slot+1, 0);
239  }
240  return False;
241 
242  case rep_null_or_once:
243  /* We must go on to the next token, but should we advance str? */
244  if (matches == 0) {
245  return re_cmp_step(str, next_regexp, slot+1, 0);
246  } else if (matches == 1) {
247  return re_cmp_step(str+1, next_regexp, slot+1, 0);
248  }
249  return False; /* Not reached */
250 
251  case rep_null_or_more:
252  if (matched) {
253  /* Look for further repeats, advance str */
254  if (re_cmp_step(str+1, regexp, slot, matches))
255  return True;
256  return re_cmp_step(str, next_regexp, slot+1, 0);
257  }
258  return re_cmp_step(str, next_regexp, slot+1, 0);
259  }
260 
261  return False;
262 }
263 
270 static void re_init(void) {
271  int i;
272 
273  re_token[0] = (selection *)malloc(sizeof(selection));
274  if (re_token[0] == NULL)
276  for (i = 1; i < RE_TOKEN_MAX; i++)
277  re_token[i] = NULL;
278 
279  re_init_done = True;
280 }
281 
293  switch (sel->type) {
294  case sel_any:
295  return True;
296 
297  case sel_end:
298  return (c == 0);
299 
300  case sel_single:
301  return (tolower(c) == tolower(sel->u.single));
302 
303  case sel_range:
304  return (c >= sel->u.range.low && c <= sel->u.range.high);
305 
306  case sel_array:
307  return (sel->u.array[c]);
308 
309  case sel_not_single:
310  return (tolower(c) != tolower(sel->u.single));
311 
312  case sel_not_range:
313  return (c < sel->u.range.low && c > sel->u.range.high);
314  }
315 
316  return False;
317 }
318 
330 static const char *re_get_token(selection *sel, const char *regexp) {
331 #ifdef SAFE_CHECKS
332 # define exit_if_null if (*regexp == 0) return NULL
333 #else
334 # define exit_if_null
335 #endif
336  Boolean quoted = False;
337  uchar looking_at;
338 
339 #ifdef SAFE_CHECKS
340  if (sel == NULL || regexp == NULL || *regexp == 0)
341  return NULL;
342 #endif
343 
344  do {
345  looking_at = *regexp++;
346  switch (looking_at) {
347  case '$':
348  if (quoted) {
349  quoted = False;
350  sel->type = sel_single;
351  sel->u.single = looking_at;
352  } else {
353  sel->type = sel_end;
354  }
355  break;
356 
357  case '.':
358  if (quoted) {
359  quoted = False;
360  sel->type = sel_single;
361  sel->u.single = looking_at;
362  } else {
363  sel->type = sel_any;
364  }
365  break;
366 
367  case '[':
368  /* The fun stuff... perhaps a little obfuscated since I
369  * don't trust the compiler to analyse liveness.
370  */
371  if (quoted) {
372  quoted = False;
373  sel->type = sel_single;
374  sel->u.single = looking_at;
375  } else {
376  Boolean neg = False;
377  uchar first, last = 0;
378 
379  exit_if_null;
380  looking_at = *regexp++;
381 
382  if (looking_at == '^') {
383  neg = True;
384  exit_if_null;
385  looking_at = *regexp++;
386  }
387  first = looking_at;
388  exit_if_null;
389  looking_at = *regexp++;
390  if (looking_at == ']') {
391  /* On the form [q] or [^q] */
392  sel->type = neg ? sel_not_single : sel_single;
393  sel->u.single = first;
394  break;
395  } else if (looking_at == '-') {
396  exit_if_null;
397  last = *regexp++;
398  if (last == ']') {
399  /* On the form [A-] or [^A-]. Checking for
400  * [,-] and making it a range is probably not
401  * worth it :-)
402  */
403  sel->type = sel_array;
404  memset(sel->u.array, neg, sizeof(sel->u.array));
405  sel->u.array[first] = sel->u.array['-'] = !neg;
406  break;
407  } else {
408  exit_if_null;
409  looking_at = *regexp++;
410  if (looking_at == ']') {
411  /* On the form [A-G] or [^A-G]. Note that [G-A]
412  * is a syntax error. Fair enough, I think.
413  */
414 #ifdef SAFE_CHECKS
415  if (first > last)
416  return NULL;
417 #endif
418  sel->type = neg ? sel_not_range : sel_range;
419  sel->u.range.low = first;
420  sel->u.range.high = last;
421  break;
422  }
423  }
424  }
425  {
426  /* The datastructure can only represent a RE this
427  * complex with an array.
428  */
429  int i;
430  uchar previous;
431 
432  sel->type = sel_array;
433  memset(sel->u.array, neg, sizeof(sel->u.array));
434  if (last) {
435  /* It starts with a range */
436 #ifdef SAFE_CHECKS
437  if (first > last)
438  return NULL;
439 #endif
440  for (i = first; i <= last; i++) {
441  sel->u.array[i] = !neg;
442  }
443  } else {
444  /* It begins with a "random" character */
445  sel->u.array[first] = !neg;
446  }
447  sel->u.array[looking_at] = !neg;
448 
449  exit_if_null;
450  previous = looking_at;
451  looking_at = *regexp++;
452 
453  /* Add more characters to the array until we reach
454  * ]. Quoting doesn't and shouldn't work in here.
455  * ("]" should be put first, and "-" last if they
456  * are needed inside this construct.)
457  * Look for ranges as we go along.
458  */
459  while (looking_at != ']') {
460  if (looking_at == '-') {
461  exit_if_null;
462  looking_at = *regexp++;
463  if (looking_at != ']') {
464 #ifdef SAFE_CHECKS
465  if (previous > looking_at)
466  return NULL;
467 #endif
468  for (i = previous+1; i < looking_at; i++) {
469  /* previous has already been set and
470  * looking_at is set below.
471  */
472  sel->u.array[i] = !neg;
473  }
474  exit_if_null;
475  } else {
476  sel->u.array['-'] = !neg;
477  break;
478  }
479  }
480  sel->u.array[looking_at] = !neg;
481  previous = looking_at;
482  exit_if_null;
483  looking_at = *regexp++;
484  }
485  }
486  }
487  break;
488 
489  case '\\':
490  if (quoted) {
491  quoted = False;
492  sel->type = sel_single;
493  sel->u.single = looking_at;
494  } else {
495  quoted = True;
496  }
497  break;
498 
499  default:
500  quoted = False;
501  sel->type = sel_single;
502  sel->u.single = looking_at;
503  break;
504  }
505  } while (quoted);
506 
507  if (*regexp == '*') {
508  sel->repeat = rep_null_or_more;
509  ++regexp;
510  } else if (*regexp == '?') {
511  sel->repeat = rep_null_or_once;
512  ++regexp;
513  } else if (*regexp == '+') {
514  sel->repeat = rep_once_or_more;
515  ++regexp;
516  } else {
517  sel->repeat = rep_once;
518  }
519 
520  return regexp;
521 }
522 
sel_not_single
@ sel_not_single
Definition: re-cmp.h:53
re_token_depth
static unsigned int re_token_depth
Definition: re-cmp.cpp:52
global.h
uchar
#define uchar
Definition: re-cmp.h:42
selection::u
union selection::@4 u
re_cmp_step
static Boolean re_cmp_step(const char *, const char *, unsigned, int)
Definition: re-cmp.cpp:189
say.previous
dictionary previous
Definition: say.py:205
c
static event_registration c
Definition: citylife.cpp:425
Boolean
#define Boolean
Definition: re-cmp.h:43
rep_null_or_once
@ rep_null_or_once
Definition: re-cmp.h:60
tolower
#define tolower(C)
Definition: c_new.cpp:30
sel_any
@ sel_any
Definition: re-cmp.h:48
sel_range
@ sel_range
Definition: re-cmp.h:51
rep_once_or_more
@ rep_once_or_more
Definition: re-cmp.h:59
re_substr
static const char * re_substr[RE_TOKEN_MAX]
Definition: re-cmp.cpp:51
selection
Definition: re-cmp.h:64
selection::single
uchar single
Definition: re-cmp.h:67
re_match_token
static Boolean re_match_token(uchar, selection *)
Definition: re-cmp.cpp:292
False
#define False
Definition: re-cmp.h:45
selection::repeat
repetetion_type repeat
Definition: re-cmp.h:73
re_init_done
static Boolean re_init_done
Definition: re-cmp.cpp:49
make_face_from_files.str
str
Definition: make_face_from_files.py:30
selection::array
Boolean array[UCHAR_MAX]
Definition: re-cmp.h:71
re_token
static selection * re_token[RE_TOKEN_MAX]
Definition: re-cmp.cpp:50
re_init
static void re_init(void)
Definition: re-cmp.cpp:270
sel_end
@ sel_end
Definition: re-cmp.h:49
fatal
void fatal(enum fatal_error err)
Definition: utils.cpp:590
rep_once
@ rep_once
Definition: re-cmp.h:58
re_get_token
static const char * re_get_token(selection *, const char *)
Definition: re-cmp.cpp:330
True
#define True
Definition: re-cmp.h:44
sel_not_range
@ sel_not_range
Definition: re-cmp.h:54
exit_if_null
#define exit_if_null
define.h
re-cmp.h
sel_single
@ sel_single
Definition: re-cmp.h:50
selection::range
struct selection::@4::@5 range
rep_null_or_more
@ rep_null_or_more
Definition: re-cmp.h:61
sel_array
@ sel_array
Definition: re-cmp.h:52
RE_TOKEN_MAX
#define RE_TOKEN_MAX
Definition: re-cmp.h:19
OUT_OF_MEMORY
@ OUT_OF_MEMORY
Definition: define.h:48
re_cmp
const char * re_cmp(const char *, const char *)
Definition: re-cmp.cpp:68
matches
static int matches(const char *exp, const char *text)
Definition: dialog.cpp:79
selection::type
selection_type type
Definition: re-cmp.h:65
is_valid_types_gen.type
list type
Definition: is_valid_types_gen.py:25