39 |
|
|
40 |
#include "config.h" |
#include "config.h" |
41 |
|
|
42 |
|
/* Standard C headers plus the external interface definition. The only time |
43 |
|
setjmp and stdarg are used is when NO_RECURSE is set. */ |
44 |
|
|
45 |
|
#include <ctype.h> |
46 |
|
#include <limits.h> |
47 |
|
#include <setjmp.h> |
48 |
|
#include <stdarg.h> |
49 |
|
#include <stddef.h> |
50 |
|
#include <stdio.h> |
51 |
|
#include <stdlib.h> |
52 |
|
#include <string.h> |
53 |
|
|
54 |
|
#ifndef PCRE_SPY |
55 |
|
#define PCRE_DEFINITION /* Win32 __declspec(export) trigger for .dll */ |
56 |
|
#endif |
57 |
|
|
58 |
|
#include "pcre.h" |
59 |
|
|
60 |
/* When compiling for use with the Virtual Pascal compiler, these functions |
/* When compiling for use with the Virtual Pascal compiler, these functions |
61 |
need to have their names changed. PCRE must be compiled with the -DVPCOMPAT |
need to have their names changed. PCRE must be compiled with the -DVPCOMPAT |
62 |
option on the command line. */ |
option on the command line. */ |
169 |
#define PUT2INC(a,n,d) PUT2(a,n,d), a += 2 |
#define PUT2INC(a,n,d) PUT2(a,n,d), a += 2 |
170 |
|
|
171 |
|
|
|
/* Standard C headers plus the external interface definition */ |
|
|
|
|
|
#include <ctype.h> |
|
|
#include <limits.h> |
|
|
#include <stddef.h> |
|
|
#include <stdio.h> |
|
|
#include <stdlib.h> |
|
|
#include <string.h> |
|
|
|
|
|
#ifndef PCRE_SPY |
|
|
#define PCRE_DEFINITION /* Win32 __declspec(export) trigger for .dll */ |
|
|
#endif |
|
|
|
|
|
#include "pcre.h" |
|
|
|
|
172 |
/* In case there is no definition of offsetof() provided - though any proper |
/* In case there is no definition of offsetof() provided - though any proper |
173 |
Standard C system should have one. */ |
Standard C system should have one. */ |
174 |
|
|
201 |
#define PUBLIC_OPTIONS \ |
#define PUBLIC_OPTIONS \ |
202 |
(PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \ |
(PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \ |
203 |
PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \ |
PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \ |
204 |
PCRE_NO_AUTO_CAPTURE) |
PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK) |
205 |
|
|
206 |
#define PUBLIC_EXEC_OPTIONS \ |
#define PUBLIC_EXEC_OPTIONS \ |
207 |
(PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY) |
(PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK) |
208 |
|
|
209 |
#define PUBLIC_STUDY_OPTIONS 0 /* None defined */ |
#define PUBLIC_STUDY_OPTIONS 0 /* None defined */ |
210 |
|
|
358 |
class - the difference is relevant only when a UTF-8 |
class - the difference is relevant only when a UTF-8 |
359 |
character > 255 is encountered. */ |
character > 255 is encountered. */ |
360 |
|
|
361 |
OP_XCLASS, /* 56 Extended class for handling UTF-8 chars within the |
OP_XCLASS, /* 57 Extended class for handling UTF-8 chars within the |
362 |
class. This does both positive and negative. */ |
class. This does both positive and negative. */ |
363 |
|
|
364 |
OP_REF, /* 57 Match a back reference */ |
OP_REF, /* 58 Match a back reference */ |
365 |
OP_RECURSE, /* 58 Match a numbered subpattern (possibly recursive) */ |
OP_RECURSE, /* 59 Match a numbered subpattern (possibly recursive) */ |
366 |
OP_CALLOUT, /* 59 Call out to external function if provided */ |
OP_CALLOUT, /* 60 Call out to external function if provided */ |
367 |
|
|
368 |
OP_ALT, /* 60 Start of alternation */ |
OP_ALT, /* 61 Start of alternation */ |
369 |
OP_KET, /* 61 End of group that doesn't have an unbounded repeat */ |
OP_KET, /* 62 End of group that doesn't have an unbounded repeat */ |
370 |
OP_KETRMAX, /* 62 These two must remain together and in this */ |
OP_KETRMAX, /* 63 These two must remain together and in this */ |
371 |
OP_KETRMIN, /* 63 order. They are for groups the repeat for ever. */ |
OP_KETRMIN, /* 64 order. They are for groups the repeat for ever. */ |
372 |
|
|
373 |
/* The assertions must come before ONCE and COND */ |
/* The assertions must come before ONCE and COND */ |
374 |
|
|
375 |
OP_ASSERT, /* 64 Positive lookahead */ |
OP_ASSERT, /* 65 Positive lookahead */ |
376 |
OP_ASSERT_NOT, /* 65 Negative lookahead */ |
OP_ASSERT_NOT, /* 66 Negative lookahead */ |
377 |
OP_ASSERTBACK, /* 66 Positive lookbehind */ |
OP_ASSERTBACK, /* 67 Positive lookbehind */ |
378 |
OP_ASSERTBACK_NOT, /* 67 Negative lookbehind */ |
OP_ASSERTBACK_NOT, /* 68 Negative lookbehind */ |
379 |
OP_REVERSE, /* 68 Move pointer back - used in lookbehind assertions */ |
OP_REVERSE, /* 69 Move pointer back - used in lookbehind assertions */ |
380 |
|
|
381 |
/* ONCE and COND must come after the assertions, with ONCE first, as there's |
/* ONCE and COND must come after the assertions, with ONCE first, as there's |
382 |
a test for >= ONCE for a subpattern that isn't an assertion. */ |
a test for >= ONCE for a subpattern that isn't an assertion. */ |
383 |
|
|
384 |
OP_ONCE, /* 69 Once matched, don't back up into the subpattern */ |
OP_ONCE, /* 70 Once matched, don't back up into the subpattern */ |
385 |
OP_COND, /* 70 Conditional group */ |
OP_COND, /* 71 Conditional group */ |
386 |
OP_CREF, /* 71 Used to hold an extraction string number (cond ref) */ |
OP_CREF, /* 72 Used to hold an extraction string number (cond ref) */ |
387 |
|
|
388 |
OP_BRAZERO, /* 72 These two must remain together and in this */ |
OP_BRAZERO, /* 73 These two must remain together and in this */ |
389 |
OP_BRAMINZERO, /* 73 order. */ |
OP_BRAMINZERO, /* 74 order. */ |
390 |
|
|
391 |
OP_BRANUMBER, /* 74 Used for extracting brackets whose number is greater |
OP_BRANUMBER, /* 75 Used for extracting brackets whose number is greater |
392 |
than can fit into an opcode. */ |
than can fit into an opcode. */ |
393 |
|
|
394 |
OP_BRA /* 75 This and greater values are used for brackets that |
OP_BRA /* 76 This and greater values are used for brackets that |
395 |
extract substrings up to a basic limit. After that, |
extract substrings up to a basic limit. After that, |
396 |
use is made of OP_BRANUMBER. */ |
use is made of OP_BRANUMBER. */ |
397 |
}; |
}; |
434 |
1, 1, 1, 1, 2, 1, 1, /* Any, Anybyte, \Z, \z, Opt, ^, $ */ \ |
1, 1, 1, 1, 2, 1, 1, /* Any, Anybyte, \Z, \z, Opt, ^, $ */ \ |
435 |
2, /* Chars - the minimum length */ \ |
2, /* Chars - the minimum length */ \ |
436 |
2, /* not */ \ |
2, /* not */ \ |
437 |
/* Positive single-char repeats */ \ |
/* Positive single-char repeats ** These are */ \ |
438 |
2, 2, 2, 2, 2, 2, /* *, *?, +, +?, ?, ?? ** These are */ \ |
2, 2, 2, 2, 2, 2, /* *, *?, +, +?, ?, ?? ** minima in */ \ |
439 |
4, 4, 4, /* upto, minupto, exact ** minima */ \ |
4, 4, 4, /* upto, minupto, exact ** UTF-8 mode */ \ |
440 |
/* Negative single-char repeats */ \ |
/* Negative single-char repeats - only for chars < 256 */ \ |
441 |
2, 2, 2, 2, 2, 2, /* NOT *, *?, +, +?, ?, ?? */ \ |
2, 2, 2, 2, 2, 2, /* NOT *, *?, +, +?, ?, ?? */ \ |
442 |
4, 4, 4, /* NOT upto, minupto, exact */ \ |
4, 4, 4, /* NOT upto, minupto, exact */ \ |
443 |
/* Positive type repeats */ \ |
/* Positive type repeats */ \ |
529 |
#define ERR41 "unrecognized character after (?P" |
#define ERR41 "unrecognized character after (?P" |
530 |
#define ERR42 "syntax error after (?P" |
#define ERR42 "syntax error after (?P" |
531 |
#define ERR43 "two named groups have the same name" |
#define ERR43 "two named groups have the same name" |
532 |
|
#define ERR44 "invalid UTF-8 string" |
533 |
|
|
534 |
/* All character handling must be done as unsigned characters. Otherwise there |
/* All character handling must be done as unsigned characters. Otherwise there |
535 |
are problems with top-bit-set characters and functions such as isspace(). |
are problems with top-bit-set characters and functions such as isspace(). |
593 |
call within the pattern. */ |
call within the pattern. */ |
594 |
|
|
595 |
typedef struct recursion_info { |
typedef struct recursion_info { |
596 |
struct recursion_info *prev; /* Previous recursion record (or NULL) */ |
struct recursion_info *prevrec; /* Previous recursion record (or NULL) */ |
597 |
int group_num; /* Number of group that was called */ |
int group_num; /* Number of group that was called */ |
598 |
const uschar *after_call; /* "Return value": points after the call in the expr */ |
const uschar *after_call; /* "Return value": points after the call in the expr */ |
599 |
const uschar *save_start; /* Old value of md->start_match */ |
const uschar *save_start; /* Old value of md->start_match */ |
601 |
int saved_max; /* Number of saved offsets */ |
int saved_max; /* Number of saved offsets */ |
602 |
} recursion_info; |
} recursion_info; |
603 |
|
|
604 |
|
/* When compiling in a mode that doesn't use recursive calls to match(), |
605 |
|
a structure is used to remember local variables on the heap. It is defined in |
606 |
|
pcre.c, close to the match() function, so that it is easy to keep it in step |
607 |
|
with any changes of local variable. However, the pointer to the current frame |
608 |
|
must be saved in some "static" place over a longjmp(). We declare the |
609 |
|
structure here so that we can put a pointer in the match_data structure. |
610 |
|
NOTE: This isn't used for a "normal" compilation of pcre. */ |
611 |
|
|
612 |
|
struct heapframe; |
613 |
|
|
614 |
/* Structure for passing "static" information around between the functions |
/* Structure for passing "static" information around between the functions |
615 |
doing the matching, so that they are thread-safe. */ |
doing the matching, so that they are thread-safe. */ |
616 |
|
|
638 |
int start_offset; /* The start offset value */ |
int start_offset; /* The start offset value */ |
639 |
recursion_info *recursive; /* Linked list of recursion data */ |
recursion_info *recursive; /* Linked list of recursion data */ |
640 |
void *callout_data; /* To pass back to callouts */ |
void *callout_data; /* To pass back to callouts */ |
641 |
|
struct heapframe *thisframe; /* Used only when compiling for no recursion */ |
642 |
} match_data; |
} match_data; |
643 |
|
|
644 |
/* Bit definitions for entries in the pcre_ctypes table. */ |
/* Bit definitions for entries in the pcre_ctypes table. */ |