Crossfire Server, Trunk
re-cmp.cpp
Go to the documentation of this file.
1 /*
2  * Crossfire -- cooperative multi-player graphical RPG and adventure game
3  *
4  * Copyright (c) 1999-2014 Mark Wedel and the Crossfire Development Team
5  * Copyright (c) 1992 Frank Tore Johansen
6  *
7  * Crossfire is free software and comes with ABSOLUTELY NO WARRANTY. You are
8  * welcome to redistribute it under certain conditions. For details, please
9  * see COPYING and LICENSE.
10  *
11  * The authors can be reached via e-mail at <crossfire@metalforge.org>.
12  */
13 
30 #include <stdio.h>
31 #include <stdlib.h>
32 #include <string.h>
33 #include <limits.h>
34 #include <re-cmp.h>
35 #include <ctype.h>
36 #include <global.h>
37 #include <define.h> /* Needed for OUT_OF_MEMORY. */
38 
39 /* P r o t o t y p e s
40  */
41 const char *re_cmp(const char *, const char *);
42 static Boolean re_cmp_step(const char *, const char *, unsigned, int);
43 static void re_init(void);
45 static const char *re_get_token(selection *, const char *);
46 
47 /* G l o b a l v a r i a b l e s
48  */
51 static const char *re_substr[RE_TOKEN_MAX];
52 static unsigned int re_token_depth;
53 
54 /* E x t e r n a l f u n c t i o n
55  */
56 
68 const char *re_cmp(const char *str, const char *regexp) {
69  const char *next_regexp;
70  Boolean once = False;
71  Boolean matched;
72 
73  if (re_init_done == False)
74  re_init();
75 
76 #ifdef SAFE_CHECKS
77  if (regexp == NULL || str == NULL)
78  return NULL;
79 #endif
80  if (*regexp == '^') {
81  once = True;
82  ++regexp;
83  }
84  if (*regexp == 0) {
85  /* // or /^/ matches any string */
86  return str;
87  }
88 
89  next_regexp = re_get_token(re_token[0], regexp);
90  re_token_depth = 0;
91  re_substr[0] = next_regexp;
92 
93  matched = False;
94  while (*str != '\0' && !(matched = re_match_token(*str, re_token[0])))
95  str++;
96 
97  if (matched && *next_regexp == 0)
98  return str;
99 
100  /* Apologies for the nearly duplicated code below, hopefully it
101  * speeds things up.
102  */
103  if (once) {
104  switch (re_token[0]->repeat) {
105  case rep_once:
106  if (matched == False)
107  return NULL;
108  break;
109 
110  case rep_once_or_more:
111  if (matched == False)
112  return NULL;
113 
114  if (re_cmp_step(str+1, regexp, 0, 1))
115  return str;
116  break;
117 
118  case rep_null_or_once:
119  if (matched == False)
120  return re_cmp_step(str, next_regexp, 1, 0) ? str : NULL;
121  break;
122 
123  case rep_null_or_more:
124  if (matched) {
125  if (re_cmp_step(str+1, regexp, 0, 1))
126  return str;
127  } else {
128  return re_cmp_step(str, next_regexp, 1, 0) ? str : NULL;
129  }
130  break;
131  }
132 
133  return re_cmp_step(str+1, next_regexp, 1, 0) ? str : NULL;
134  }
135 
136  if (matched) {
137  switch (re_token[0]->repeat) {
138  case rep_once:
139  case rep_null_or_once:
140  break;
141 
142  case rep_once_or_more:
143  case rep_null_or_more:
144  if (re_cmp_step(str+1, regexp, 0, 1))
145  return str;
146  break;
147  }
148 
149  /* The logic here is that re_match_token only sees
150  * if the one letter matches. Thus, if the
151  * regex is like '@match eureca', and the
152  * the user enters anything with an e, re_match_token
153  * returns true, but they really need to match the
154  * entire regexp, which re_cmp_step will do.
155  * However, what happens is that there can be a case
156  * where the string being match is something like
157  * 'where is eureca'. In this case, the re_match_token
158  * matches that first e, but the re_cmp_step below,
159  * fails because the next character (r) doesn't match
160  * the u. So we call re_cmp with the string
161  * after the first r, so that it should hopefully match
162  * up properly.
163  */
164  if (re_cmp_step(str+1, next_regexp, 1, 0))
165  return str;
166  else if (*(str+1) != 0)
167  return re_cmp(str+1, regexp);
168  }
169  return NULL;
170 }
171 
172 /* A u x i l l i a r y f u n c t i o n s
173  */
174 
189 static Boolean re_cmp_step(const char *str, const char *regexp, unsigned slot, int matches) {
190  const char *next_regexp;
191  Boolean matched;
192 
193 #ifdef DEBUG
194 /* fprintf(stderr, "['%s', '%s', %u, %d]\n", str, regexp, slot, matches);*/
195 #endif
196 
197  if (*regexp == 0) {
198  /* When we reach the end of the regexp, the match is a success */
199  return True;
200  }
201 
202  /* This chunk of code makes sure that the regexp-tokenising happens
203  * only once. We only tokenise as much as we need.
204  */
205  if (slot > re_token_depth) {
206  re_token_depth = slot;
207  if (re_token[slot] == NULL)
208  re_token[slot] = (selection *)malloc(sizeof(selection));
209  next_regexp = re_get_token(re_token[slot], regexp);
210  if (next_regexp == NULL) {
211  /* Syntax error, what else can we do? */
212  return False;
213  }
214  re_substr[slot] = next_regexp;
215  } else {
216  next_regexp = re_substr[slot];
217  }
218 
219  matched = re_match_token(*str, re_token[slot]);
220  if (matched)
221  ++matches;
222 
223  if (*str == 0)
224  return (*next_regexp == 0 || re_token[slot]->type == sel_end) && matched;
225 
226  switch (re_token[slot]->repeat) {
227  case rep_once:
228  if (matches == 1) { /* (matches == 1) => (matched == True) */
229  return re_cmp_step(str+1, next_regexp, slot+1, 0);
230  }
231  return False;
232 
233  case rep_once_or_more:
234  if (matched) { /* (matched == True) => (matches >= 1) */
235  /* First check if the current token repeats more */
236  if (re_cmp_step(str+1, regexp, slot, matches))
237  return True;
238  return re_cmp_step(str+1, next_regexp, slot+1, 0);
239  }
240  return False;
241 
242  case rep_null_or_once:
243  /* We must go on to the next token, but should we advance str? */
244  if (matches == 0) {
245  return re_cmp_step(str, next_regexp, slot+1, 0);
246  } else if (matches == 1) {
247  return re_cmp_step(str+1, next_regexp, slot+1, 0);
248  }
249  return False; /* Not reached */
250 
251  case rep_null_or_more:
252  if (matched) {
253  /* Look for further repeats, advance str */
254  if (re_cmp_step(str+1, regexp, slot, matches))
255  return True;
256  return re_cmp_step(str, next_regexp, slot+1, 0);
257  }
258  return re_cmp_step(str, next_regexp, slot+1, 0);
259  }
260 
261  return False;
262 }
263 
270 static void re_init(void) {
271  int i;
272 
273  re_token[0] = (selection *)malloc(sizeof(selection));
274  if (re_token[0] == NULL)
276  for (i = 1; i < RE_TOKEN_MAX; i++)
277  re_token[i] = NULL;
278 
279  re_init_done = True;
280 }
281 
293  switch (sel->type) {
294  case sel_any:
295  return True;
296 
297  case sel_end:
298  return (c == 0);
299 
300  case sel_single:
301  return (tolower(c) == tolower(sel->u.single));
302 
303  case sel_range:
304  return (c >= sel->u.range.low && c <= sel->u.range.high);
305 
306  case sel_array:
307  return (sel->u.array[c]);
308 
309  case sel_not_single:
310  return (tolower(c) != tolower(sel->u.single));
311 
312  case sel_not_range:
313  return (c < sel->u.range.low && c > sel->u.range.high);
314  }
315 
316  return False;
317 }
318 
330 static const char *re_get_token(selection *sel, const char *regexp) {
331 #ifdef SAFE_CHECKS
332 # define exit_if_null if (*regexp == 0) return NULL
333 #else
334 # define exit_if_null
335 #endif
336  Boolean quoted = False;
337  uchar looking_at;
338 
339 #ifdef SAFE_CHECKS
340  if (sel == NULL || regexp == NULL || *regexp == 0)
341  return NULL;
342 #endif
343 
344  do {
345  looking_at = *regexp++;
346  switch (looking_at) {
347  case '$':
348  if (quoted) {
349  quoted = False;
350  sel->type = sel_single;
351  sel->u.single = looking_at;
352  } else {
353  sel->type = sel_end;
354  }
355  break;
356 
357  case '.':
358  if (quoted) {
359  quoted = False;
360  sel->type = sel_single;
361  sel->u.single = looking_at;
362  } else {
363  sel->type = sel_any;
364  }
365  break;
366 
367  case '[':
368  /* The fun stuff... perhaps a little obfuscated since I
369  * don't trust the compiler to analyse liveness.
370  */
371  if (quoted) {
372  quoted = False;
373  sel->type = sel_single;
374  sel->u.single = looking_at;
375  } else {
376  Boolean neg = False;
377  uchar first, last = 0;
378 
379  exit_if_null;
380  looking_at = *regexp++;
381 
382  if (looking_at == '^') {
383  neg = True;
384  exit_if_null;
385  looking_at = *regexp++;
386  }
387  first = looking_at;
388  exit_if_null;
389  looking_at = *regexp++;
390  if (looking_at == ']') {
391  /* On the form [q] or [^q] */
392  sel->type = neg ? sel_not_single : sel_single;
393  sel->u.single = first;
394  break;
395  } else if (looking_at == '-') {
396  exit_if_null;
397  last = *regexp++;
398  if (last == ']') {
399  /* On the form [A-] or [^A-]. Checking for
400  * [,-] and making it a range is probably not
401  * worth it :-)
402  */
403  sel->type = sel_array;
404  memset(sel->u.array, neg, sizeof(sel->u.array));
405  sel->u.array[first] = sel->u.array['-'] = !neg;
406  break;
407  } else {
408  exit_if_null;
409  looking_at = *regexp++;
410  if (looking_at == ']') {
411  /* On the form [A-G] or [^A-G]. Note that [G-A]
412  * is a syntax error. Fair enough, I think.
413  */
414 #ifdef SAFE_CHECKS
415  if (first > last)
416  return NULL;
417 #endif
418  sel->type = neg ? sel_not_range : sel_range;
419  sel->u.range.low = first;
420  sel->u.range.high = last;
421  break;
422  }
423  }
424  }
425  {
426  /* The datastructure can only represent a RE this
427  * complex with an array.
428  */
429  int i;
430  uchar previous;
431 
432  sel->type = sel_array;
433  memset(sel->u.array, neg, sizeof(sel->u.array));
434  if (last) {
435  /* It starts with a range */
436 #ifdef SAFE_CHECKS
437  if (first > last)
438  return NULL;
439 #endif
440  for (i = first; i <= last; i++) {
441  sel->u.array[i] = !neg;
442  }
443  } else {
444  /* It begins with a "random" character */
445  sel->u.array[first] = !neg;
446  }
447  sel->u.array[looking_at] = !neg;
448 
449  exit_if_null;
450  previous = looking_at;
451  looking_at = *regexp++;
452 
453  /* Add more characters to the array until we reach
454  * ]. Quoting doesn't and shouldn't work in here.
455  * ("]" should be put first, and "-" last if they
456  * are needed inside this construct.)
457  * Look for ranges as we go along.
458  */
459  while (looking_at != ']') {
460  if (looking_at == '-') {
461  exit_if_null;
462  looking_at = *regexp++;
463  if (looking_at != ']') {
464 #ifdef SAFE_CHECKS
465  if (previous > looking_at)
466  return NULL;
467 #endif
468  for (i = previous+1; i < looking_at; i++) {
469  /* previous has already been set and
470  * looking_at is set below.
471  */
472  sel->u.array[i] = !neg;
473  }
474  exit_if_null;
475  } else {
476  sel->u.array['-'] = !neg;
477  break;
478  }
479  }
480  sel->u.array[looking_at] = !neg;
481  previous = looking_at;
482  exit_if_null;
483  looking_at = *regexp++;
484  }
485  }
486  }
487  break;
488 
489  case '\\':
490  if (quoted) {
491  quoted = False;
492  sel->type = sel_single;
493  sel->u.single = looking_at;
494  } else {
495  quoted = True;
496  }
497  break;
498 
499  default:
500  quoted = False;
501  sel->type = sel_single;
502  sel->u.single = looking_at;
503  break;
504  }
505  } while (quoted);
506 
507  if (*regexp == '*') {
508  sel->repeat = rep_null_or_more;
509  ++regexp;
510  } else if (*regexp == '?') {
511  sel->repeat = rep_null_or_once;
512  ++regexp;
513  } else if (*regexp == '+') {
514  sel->repeat = rep_once_or_more;
515  ++regexp;
516  } else {
517  sel->repeat = rep_once;
518  }
519 
520  return regexp;
521 }
522 
sel_not_single
@ sel_not_single
Definition: re-cmp.h:53
re_token_depth
static unsigned int re_token_depth
Definition: re-cmp.cpp:52
global.h
uchar
#define uchar
Definition: re-cmp.h:42
selection::u
union selection::@4 u
re_cmp_step
static Boolean re_cmp_step(const char *, const char *, unsigned, int)
Definition: re-cmp.cpp:189
say.previous
dictionary previous
Definition: say.py:205
c
static event_registration c
Definition: citylife.cpp:425
Boolean
#define Boolean
Definition: re-cmp.h:43
rep_null_or_once
@ rep_null_or_once
Definition: re-cmp.h:60
tolower
#define tolower(C)
Definition: c_new.cpp:30
sel_any
@ sel_any
Definition: re-cmp.h:48
sel_range
@ sel_range
Definition: re-cmp.h:51
rep_once_or_more
@ rep_once_or_more
Definition: re-cmp.h:59
re_substr
static const char * re_substr[RE_TOKEN_MAX]
Definition: re-cmp.cpp:51
selection
Definition: re-cmp.h:64
selection::single
uchar single
Definition: re-cmp.h:67
re_match_token
static Boolean re_match_token(uchar, selection *)
Definition: re-cmp.cpp:292
False
#define False
Definition: re-cmp.h:45
selection::repeat
repetetion_type repeat
Definition: re-cmp.h:73
re_init_done
static Boolean re_init_done
Definition: re-cmp.cpp:49
make_face_from_files.str
str
Definition: make_face_from_files.py:30
selection::array
Boolean array[UCHAR_MAX]
Definition: re-cmp.h:71
re_token
static selection * re_token[RE_TOKEN_MAX]
Definition: re-cmp.cpp:50
re_init
static void re_init(void)
Definition: re-cmp.cpp:270
sel_end
@ sel_end
Definition: re-cmp.h:49
rep_once
@ rep_once
Definition: re-cmp.h:58
fatal
void fatal(enum fatal_error err)
Definition: utils.cpp:570
re_get_token
static const char * re_get_token(selection *, const char *)
Definition: re-cmp.cpp:330
True
#define True
Definition: re-cmp.h:44
sel_not_range
@ sel_not_range
Definition: re-cmp.h:54
exit_if_null
#define exit_if_null
define.h
re-cmp.h
sel_single
@ sel_single
Definition: re-cmp.h:50
first
Crossfire Protocol most of the time after the actual code was already omit certain important and possibly make life miserable any new developer or curious player should be able to find most of the relevant information here If inconsistencies are found or this documentation proves to be consider the latest server side protocol code in the public source code repository as the authoritative reference Introduction If you were ever curious enough to telnet or netcat to a Crossfire chances are you were sorely disappointed While the protocol may seem to use plain text at first
Definition: protocol.txt:20
selection::range
struct selection::@4::@5 range
rep_null_or_more
@ rep_null_or_more
Definition: re-cmp.h:61
sel_array
@ sel_array
Definition: re-cmp.h:52
RE_TOKEN_MAX
#define RE_TOKEN_MAX
Definition: re-cmp.h:19
OUT_OF_MEMORY
@ OUT_OF_MEMORY
Definition: define.h:48
re_cmp
const char * re_cmp(const char *, const char *)
Definition: re-cmp.cpp:68
matches
static int matches(const char *exp, const char *text)
Definition: dialog.cpp:79
selection::type
selection_type type
Definition: re-cmp.h:65
is_valid_types_gen.type
list type
Definition: is_valid_types_gen.py:25