00001
00002
00003
00004
00005
00022 #include <stdio.h>
00023 #include <stdlib.h>
00024 #include <memory.h>
00025 #include <limits.h>
00026 #include <re-cmp.h>
00027 #include <ctype.h>
00028 #include <global.h>
00029 #include <define.h>
00030
00031
00032 #if defined(__sun__) && defined(StupidSunHeaders)
00033 # include <sys/types.h>
00034 # include <sys/time.h>
00035 # include "sunos.h"
00036 #endif
00037
00038
00039
00040 const char *re_cmp(const char *, const char *);
00041 static Boolean re_cmp_step(const char *, const char *, unsigned, int);
00042 static void re_init(void);
00043 static Boolean re_match_token(uchar, selection *);
00044 static const char *re_get_token(selection *, const char *);
00045 #ifdef DEBUG2
00046 static void re_dump_sel(selection *);
00047 #endif
00048
00049
00050
00051 static Boolean re_init_done = False;
00052 static selection *re_token[RE_TOKEN_MAX];
00053 static const char *re_substr[RE_TOKEN_MAX];
00054 static unsigned int re_token_depth;
00055
00056
00057
00058
00070 const char *re_cmp(const char *str, const char *regexp) {
00071 const char *next_regexp;
00072 Boolean once = False;
00073 Boolean matched;
00074
00075 if (re_init_done == False)
00076 re_init();
00077
00078 #ifdef SAFE_CHECKS
00079 if (regexp == NULL || str == NULL)
00080 return NULL;
00081 #endif
00082 if (*regexp == '^') {
00083 once = True;
00084 ++regexp;
00085 }
00086 if (*regexp == 0) {
00087
00088 return str;
00089 }
00090
00091 next_regexp = re_get_token(re_token[0], regexp);
00092 re_token_depth = 0;
00093 re_substr[0] = next_regexp;
00094
00095 matched = False;
00096 while (*str != '\0' && !(matched = re_match_token(*str, re_token[0])))
00097 str++;
00098
00099 if (matched && *next_regexp == 0)
00100 return str;
00101
00102
00103
00104
00105 if (once) {
00106 switch (re_token[0]->repeat) {
00107 case rep_once:
00108 if (matched == False)
00109 return NULL;
00110 break;
00111
00112 case rep_once_or_more:
00113 if (matched == False)
00114 return NULL;
00115
00116 if (re_cmp_step(str+1, regexp, 0, 1))
00117 return str;
00118 break;
00119
00120 case rep_null_or_once:
00121 if (matched == False)
00122 return re_cmp_step(str, next_regexp, 1, 0) ? str : NULL;
00123 break;
00124
00125 case rep_null_or_more:
00126 if (matched) {
00127 if (re_cmp_step(str+1, regexp, 0, 1))
00128 return str;
00129 } else {
00130 return re_cmp_step(str, next_regexp, 1, 0) ? str : NULL;
00131 }
00132 break;
00133 }
00134
00135 return re_cmp_step(str+1, next_regexp, 1, 0) ? str : NULL;
00136 }
00137
00138 if (matched) {
00139 switch (re_token[0]->repeat) {
00140 case rep_once:
00141 case rep_null_or_once:
00142 break;
00143
00144 case rep_once_or_more:
00145 case rep_null_or_more:
00146 if (re_cmp_step(str+1, regexp, 0, 1))
00147 return str;
00148 break;
00149 }
00150
00151
00152
00153
00154
00155
00156
00157
00158
00159
00160
00161
00162
00163
00164
00165
00166 if (re_cmp_step(str+1, next_regexp, 1, 0))
00167 return str;
00168 else if (*(str+1) != 0)
00169 return re_cmp(str+1, regexp);
00170 }
00171 return NULL;
00172 }
00173
00174
00175
00176
00191 static Boolean re_cmp_step(const char *str, const char *regexp, unsigned slot, int matches) {
00192 const char *next_regexp;
00193 Boolean matched;
00194
00195 #ifdef DEBUG
00196
00197 #endif
00198
00199 if (*regexp == 0) {
00200
00201 return True;
00202 }
00203
00204
00205
00206
00207 if (slot > re_token_depth) {
00208 re_token_depth = slot;
00209 if (re_token[slot] == NULL)
00210 re_token[slot] = (selection *)malloc(sizeof(selection));
00211 next_regexp = re_get_token(re_token[slot], regexp);
00212 if (next_regexp == NULL) {
00213
00214 return False;
00215 }
00216 re_substr[slot] = next_regexp;
00217 } else {
00218 next_regexp = re_substr[slot];
00219 }
00220
00221 matched = re_match_token(*str, re_token[slot]);
00222 if (matched)
00223 ++matches;
00224
00225 if (*str == 0)
00226 return (*next_regexp == 0 || re_token[slot]->type == sel_end) && matched;
00227
00228 switch (re_token[slot]->repeat) {
00229 case rep_once:
00230 if (matches == 1) {
00231 return re_cmp_step(str+1, next_regexp, slot+1, 0);
00232 }
00233 return False;
00234
00235 case rep_once_or_more:
00236 if (matched) {
00237
00238 if (re_cmp_step(str+1, regexp, slot, matches))
00239 return True;
00240 return re_cmp_step(str+1, next_regexp, slot+1, 0);
00241 }
00242 return False;
00243
00244 case rep_null_or_once:
00245
00246 if (matches == 0) {
00247 return re_cmp_step(str, next_regexp, slot+1, 0);
00248 } else if (matches == 1) {
00249 return re_cmp_step(str+1, next_regexp, slot+1, 0);
00250 }
00251 return False;
00252
00253 case rep_null_or_more:
00254 if (matched) {
00255
00256 if (re_cmp_step(str+1, regexp, slot, matches))
00257 return True;
00258 return re_cmp_step(str, next_regexp, slot+1, 0);
00259 }
00260 return re_cmp_step(str, next_regexp, slot+1, 0);
00261 }
00262
00263 return False;
00264 }
00265
00272 static void re_init(void) {
00273 int i;
00274
00275 re_token[0] = (selection *)malloc(sizeof(selection));
00276 if (re_token[0] == NULL)
00277 fatal(OUT_OF_MEMORY);
00278 for (i = 1; i < RE_TOKEN_MAX; i++)
00279 re_token[i] = NULL;
00280
00281 re_init_done = True;
00282 }
00283
00294 static Boolean re_match_token(uchar c, selection *sel) {
00295 switch (sel->type) {
00296 case sel_any:
00297 return True;
00298
00299 case sel_end:
00300 return (c == 0);
00301
00302 case sel_single:
00303 return (tolower(c) == tolower(sel->u.single));
00304
00305 case sel_range:
00306 return (c >= sel->u.range.low && c <= sel->u.range.high);
00307
00308 case sel_array:
00309 return (sel->u.array[c]);
00310
00311 case sel_not_single:
00312 return (tolower(c) != tolower(sel->u.single));
00313
00314 case sel_not_range:
00315 return (c < sel->u.range.low && c > sel->u.range.high);
00316 }
00317
00318 return False;
00319 }
00320
00332 static const char *re_get_token(selection *sel, const char *regexp) {
00333 #ifdef SAFE_CHECKS
00334 # define exit_if_null if (*regexp == 0) return NULL
00335 #else
00336 # define exit_if_null
00337 #endif
00338 Boolean quoted = False;
00339 uchar looking_at;
00340
00341 #ifdef SAFE_CHECKS
00342 if (sel == NULL || regexp == NULL || *regexp == 0)
00343 return NULL;
00344 #endif
00345
00346 do {
00347 looking_at = *regexp++;
00348 switch (looking_at) {
00349 case '$':
00350 if (quoted) {
00351 quoted = False;
00352 sel->type = sel_single;
00353 sel->u.single = looking_at;
00354 } else {
00355 sel->type = sel_end;
00356 }
00357 break;
00358
00359 case '.':
00360 if (quoted) {
00361 quoted = False;
00362 sel->type = sel_single;
00363 sel->u.single = looking_at;
00364 } else {
00365 sel->type = sel_any;
00366 }
00367 break;
00368
00369 case '[':
00370
00371
00372
00373 if (quoted) {
00374 quoted = False;
00375 sel->type = sel_single;
00376 sel->u.single = looking_at;
00377 } else {
00378 Boolean neg = False;
00379 uchar first, last = 0;
00380
00381 exit_if_null;
00382 looking_at = *regexp++;
00383
00384 if (looking_at == '^') {
00385 neg = True;
00386 exit_if_null;
00387 looking_at = *regexp++;
00388 }
00389 first = looking_at;
00390 exit_if_null;
00391 looking_at = *regexp++;
00392 if (looking_at == ']') {
00393
00394 sel->type = neg ? sel_not_single : sel_single;
00395 sel->u.single = first;
00396 break;
00397 } else if (looking_at == '-') {
00398 exit_if_null;
00399 last = *regexp++;
00400 if (last == ']') {
00401
00402
00403
00404
00405 sel->type = sel_array;
00406 memset(sel->u.array, neg, sizeof(sel->u.array));
00407 sel->u.array[first] = sel->u.array['-'] = !neg;
00408 break;
00409 } else {
00410 exit_if_null;
00411 looking_at = *regexp++;
00412 if (looking_at == ']') {
00413
00414
00415
00416 #ifdef SAFE_CHECKS
00417 if (first > last)
00418 return NULL;
00419 #endif
00420 sel->type = neg ? sel_not_range : sel_range;
00421 sel->u.range.low = first;
00422 sel->u.range.high = last;
00423 break;
00424 }
00425 }
00426 }
00427 {
00428
00429
00430
00431 int i;
00432 uchar previous;
00433
00434 sel->type = sel_array;
00435 memset(sel->u.array, neg, sizeof(sel->u.array));
00436 if (last) {
00437
00438 #ifdef SAFE_CHECKS
00439 if (first > last)
00440 return NULL;
00441 #endif
00442 for (i = first; i <= last; i++) {
00443 sel->u.array[i] = !neg;
00444 }
00445 } else {
00446
00447 sel->u.array[first] = !neg;
00448 }
00449 sel->u.array[looking_at] = !neg;
00450
00451 exit_if_null;
00452 previous = looking_at;
00453 looking_at = *regexp++;
00454
00455
00456
00457
00458
00459
00460
00461 while (looking_at != ']') {
00462 if (looking_at == '-') {
00463 exit_if_null;
00464 looking_at = *regexp++;
00465 if (looking_at != ']') {
00466 #ifdef SAFE_CHECKS
00467 if (previous > looking_at)
00468 return NULL;
00469 #endif
00470 for (i = previous+1; i < looking_at; i++) {
00471
00472
00473
00474 sel->u.array[i] = !neg;
00475 }
00476 exit_if_null;
00477 } else {
00478 sel->u.array['-'] = !neg;
00479 break;
00480 }
00481 }
00482 sel->u.array[looking_at] = !neg;
00483 previous = looking_at;
00484 exit_if_null;
00485 looking_at = *regexp++;
00486 }
00487 }
00488 }
00489 break;
00490
00491 case '\\':
00492 if (quoted) {
00493 quoted = False;
00494 sel->type = sel_single;
00495 sel->u.single = looking_at;
00496 } else {
00497 quoted = True;
00498 }
00499 break;
00500
00501 default:
00502 quoted = False;
00503 sel->type = sel_single;
00504 sel->u.single = looking_at;
00505 break;
00506 }
00507 } while (quoted);
00508
00509 if (*regexp == '*') {
00510 sel->repeat = rep_null_or_more;
00511 ++regexp;
00512 } else if (*regexp == '?') {
00513 sel->repeat = rep_null_or_once;
00514 ++regexp;
00515 } else if (*regexp == '+') {
00516 sel->repeat = rep_once_or_more;
00517 ++regexp;
00518 } else {
00519 sel->repeat = rep_once;
00520 }
00521
00522 return regexp;
00523 }
00524
00525
00526
00527 #ifdef DEBUG2
00528
00534 static void re_dump_sel(selection *sel) {
00535 switch (sel->type) {
00536 case sel_any:
00537 printf(".");
00538 break;
00539
00540 case sel_end:
00541 printf("$");
00542 break;
00543
00544 case sel_single:
00545 printf("<%c>", sel->u.single);
00546 break;
00547
00548 case sel_range:
00549 printf("[%c-%c]", sel->u.range.low, sel->u.range.high);
00550 break;
00551
00552 case sel_array: {
00553 int i;
00554 printf("[");
00555 for (i = 0; i < UCHAR_MAX; i++) {
00556 if (sel->u.array[i]) {
00557 printf("%c", i);
00558 }
00559 }
00560 printf("]");
00561 }
00562 break;
00563
00564 case sel_not_single:
00565 printf("[^%c]", sel->u.single);
00566 break;
00567
00568 case sel_not_range:
00569 printf("[^%c-%c]", sel->u.range.low, sel->u.range.high);
00570 break;
00571
00572 default:
00573 printf("<UNKNOWN TOKEN!>");
00574 break;
00575 }
00576
00577 switch (sel->repeat) {
00578 case rep_once:
00579 break;
00580
00581 case rep_null_or_once:
00582 printf("?");
00583 break;
00584
00585 case rep_null_or_more:
00586 printf("*");
00587 break;
00588
00589 case rep_once_or_more:
00590 printf("+");
00591 break;
00592
00593 default:
00594 printf("<UNKNOWN REP-TOKEN!>");
00595 break;
00596 }
00597 }
00598
00599 int main(int argc, char *argv[]) {
00600 char *re, *m;
00601 selection sel;
00602
00603 re = re_get_token(&sel, argv[1]);
00604
00605 printf("'%s' -> '%s'\n", argv[1], re);
00606 re_dump_sel(&sel);
00607 printf("\n");
00608 m = re_cmp(argv[2], argv[1]);
00609 if (m)
00610 printf("MATCH! -> '%s'\n", m);
00611 return 0;
00612 }
00613 #endif