7 |
and semantics are as close as possible to those of the Perl 5 language. |
and semantics are as close as possible to those of the Perl 5 language. |
8 |
|
|
9 |
Written by Philip Hazel |
Written by Philip Hazel |
10 |
Copyright (c) 1997-2005 University of Cambridge |
Copyright (c) 1997-2006 University of Cambridge |
11 |
|
|
12 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
13 |
Redistribution and use in source and binary forms, with or without |
Redistribution and use in source and binary forms, with or without |
114 |
|
|
115 |
typedef unsigned char uschar; |
typedef unsigned char uschar; |
116 |
|
|
117 |
|
/* When PCRE is compiled as a C++ library, the subject pointer can be replaced |
118 |
|
with a custom type. This makes it possible, for example, to allow pcre_exec() |
119 |
|
to process subject strings that are discontinuous by using a smart pointer |
120 |
|
class. It must always be possible to inspect all of the subject string in |
121 |
|
pcre_exec() because of the way it backtracks. Two macros are required in the |
122 |
|
normal case, for sign-unspecified and unsigned char pointers. The former is |
123 |
|
used for the external interface and appears in pcre.h, which is why its name |
124 |
|
must begin with PCRE_. */ |
125 |
|
|
126 |
|
#ifdef CUSTOM_SUBJECT_PTR |
127 |
|
#define PCRE_SPTR CUSTOM_SUBJECT_PTR |
128 |
|
#define USPTR CUSTOM_SUBJECT_PTR |
129 |
|
#else |
130 |
|
#define PCRE_SPTR const char * |
131 |
|
#define USPTR const unsigned char * |
132 |
|
#endif |
133 |
|
|
134 |
/* Include the public PCRE header and the definitions of UCP character property |
/* Include the public PCRE header and the definitions of UCP character property |
135 |
values. */ |
values. */ |
136 |
|
|
163 |
void * |
void * |
164 |
pcre_memmove(unsigned char *dest, const unsigned char *src, size_t n) |
pcre_memmove(unsigned char *dest, const unsigned char *src, size_t n) |
165 |
{ |
{ |
166 |
int i; |
size_t i; |
167 |
dest += n; |
dest += n; |
168 |
src += n; |
src += n; |
169 |
for (i = 0; i < n; ++i) *(--dest) = *(--src); |
for (i = 0; i < n; ++i) *(--dest) = *(--src); |
170 |
|
return dest; |
171 |
} |
} |
172 |
#define memmove(a, b, c) pcre_memmove(a, b, c) |
#define memmove(a, b, c) pcre_memmove(a, b, c) |
173 |
#endif /* not HAVE_BCOPY */ |
#endif /* not HAVE_BCOPY */ |
461 |
#define ESC_tee '\t' |
#define ESC_tee '\t' |
462 |
#endif |
#endif |
463 |
|
|
464 |
|
/* Codes for different types of Unicode property */ |
465 |
|
|
466 |
|
#define PT_ANY 0 /* Any property - matches all chars */ |
467 |
|
#define PT_LAMP 1 /* L& - the union of Lu, Ll, Lt */ |
468 |
|
#define PT_GC 2 /* General characteristic (e.g. L) */ |
469 |
|
#define PT_PC 3 /* Particular characteristic (e.g. Lu) */ |
470 |
|
#define PT_SC 4 /* Script (e.g. Han) */ |
471 |
|
|
472 |
|
/* Flag bits and data types for the extended class (OP_XCLASS) for classes that |
473 |
|
contain UTF-8 characters with values greater than 255. */ |
474 |
|
|
475 |
|
#define XCL_NOT 0x01 /* Flag: this is a negative class */ |
476 |
|
#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */ |
477 |
|
|
478 |
|
#define XCL_END 0 /* Marks end of individual items */ |
479 |
|
#define XCL_SINGLE 1 /* Single item (one multibyte char) follows */ |
480 |
|
#define XCL_RANGE 2 /* A range (two multibyte chars) follows */ |
481 |
|
#define XCL_PROP 3 /* Unicode property (2-byte property code follows) */ |
482 |
|
#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */ |
483 |
|
|
484 |
/* These are escaped items that aren't just an encoding of a particular data |
/* These are escaped items that aren't just an encoding of a particular data |
485 |
value such as \n. They must have non-zero values, as check_escape() returns |
value such as \n. They must have non-zero values, as check_escape() returns |
486 |
their negation. Also, they must appear in the same order as in the opcode |
their negation. Also, they must appear in the same order as in the opcode |
496 |
ESC_w, ESC_dum1, ESC_C, ESC_P, ESC_p, ESC_X, ESC_Z, ESC_z, ESC_E, |
ESC_w, ESC_dum1, ESC_C, ESC_P, ESC_p, ESC_X, ESC_Z, ESC_z, ESC_E, |
497 |
ESC_Q, ESC_REF }; |
ESC_Q, ESC_REF }; |
498 |
|
|
|
/* Flag bits and data types for the extended class (OP_XCLASS) for classes that |
|
|
contain UTF-8 characters with values greater than 255. */ |
|
|
|
|
|
#define XCL_NOT 0x01 /* Flag: this is a negative class */ |
|
|
#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */ |
|
|
|
|
|
#define XCL_END 0 /* Marks end of individual items */ |
|
|
#define XCL_SINGLE 1 /* Single item (one multibyte char) follows */ |
|
|
#define XCL_RANGE 2 /* A range (two multibyte chars) follows */ |
|
|
#define XCL_PROP 3 /* Unicode property (one property code) follows */ |
|
|
#define XCL_NOTPROP 4 /* Unicode inverted property (ditto) */ |
|
|
|
|
|
|
|
499 |
/* Opcode table: OP_BRA must be last, as all values >= it are used for brackets |
/* Opcode table: OP_BRA must be last, as all values >= it are used for brackets |
500 |
that extract substrings. Starting from 1 (i.e. after OP_END), the values up to |
that extract substrings. Starting from 1 (i.e. after OP_END), the values up to |
501 |
OP_EOD must correspond in order to the list of escapes immediately above. |
OP_EOD must correspond in order to the list of escapes immediately above. |
659 |
1, /* End */ \ |
1, /* End */ \ |
660 |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* \A, \G, \B, \B, \D, \d, \S, \s, \W, \w */ \ |
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, /* \A, \G, \B, \B, \D, \d, \S, \s, \W, \w */ \ |
661 |
1, 1, /* Any, Anybyte */ \ |
1, 1, /* Any, Anybyte */ \ |
662 |
2, 2, 1, /* NOTPROP, PROP, EXTUNI */ \ |
3, 3, 1, /* NOTPROP, PROP, EXTUNI */ \ |
663 |
1, 1, 2, 1, 1, /* \Z, \z, Opt, ^, $ */ \ |
1, 1, 2, 1, 1, /* \Z, \z, Opt, ^, $ */ \ |
664 |
2, /* Char - the minimum length */ \ |
2, /* Char - the minimum length */ \ |
665 |
2, /* Charnc - the minimum length */ \ |
2, /* Charnc - the minimum length */ \ |
791 |
struct recursion_info *prevrec; /* Previous recursion record (or NULL) */ |
struct recursion_info *prevrec; /* Previous recursion record (or NULL) */ |
792 |
int group_num; /* Number of group that was called */ |
int group_num; /* Number of group that was called */ |
793 |
const uschar *after_call; /* "Return value": points after the call in the expr */ |
const uschar *after_call; /* "Return value": points after the call in the expr */ |
794 |
const uschar *save_start; /* Old value of md->start_match */ |
USPTR save_start; /* Old value of md->start_match */ |
795 |
int *offset_save; /* Pointer to start of saved offsets */ |
int *offset_save; /* Pointer to start of saved offsets */ |
796 |
int saved_max; /* Number of saved offsets */ |
int saved_max; /* Number of saved offsets */ |
797 |
} recursion_info; |
} recursion_info; |
810 |
doing traditional NFA matching, so that they are thread-safe. */ |
doing traditional NFA matching, so that they are thread-safe. */ |
811 |
|
|
812 |
typedef struct match_data { |
typedef struct match_data { |
813 |
unsigned long int match_call_count; /* As it says */ |
unsigned long int match_call_count; /* As it says */ |
814 |
unsigned long int match_limit;/* As it says */ |
unsigned long int match_limit; /* As it says */ |
815 |
|
unsigned long int match_limit_recursion; /* As it says */ |
816 |
int *offset_vector; /* Offset vector */ |
int *offset_vector; /* Offset vector */ |
817 |
int offset_end; /* One past the end */ |
int offset_end; /* One past the end */ |
818 |
int offset_max; /* The maximum usable for return data */ |
int offset_max; /* The maximum usable for return data */ |
827 |
BOOL partial; /* PARTIAL flag */ |
BOOL partial; /* PARTIAL flag */ |
828 |
BOOL hitend; /* Hit the end of the subject at some point */ |
BOOL hitend; /* Hit the end of the subject at some point */ |
829 |
const uschar *start_code; /* For use when recursing */ |
const uschar *start_code; /* For use when recursing */ |
830 |
const uschar *start_subject; /* Start of the subject string */ |
USPTR start_subject; /* Start of the subject string */ |
831 |
const uschar *end_subject; /* End of the subject string */ |
USPTR end_subject; /* End of the subject string */ |
832 |
const uschar *start_match; /* Start of this match attempt */ |
USPTR start_match; /* Start of this match attempt */ |
833 |
const uschar *end_match_ptr; /* Subject position at end match */ |
USPTR end_match_ptr; /* Subject position at end match */ |
834 |
int end_offset_top; /* Highwater mark at end of match */ |
int end_offset_top; /* Highwater mark at end of match */ |
835 |
int capture_last; /* Most recent capture number */ |
int capture_last; /* Most recent capture number */ |
836 |
int start_offset; /* The start offset value */ |
int start_offset; /* The start offset value */ |
885 |
#define ctypes_offset (cbits_offset + cbit_length) |
#define ctypes_offset (cbits_offset + cbit_length) |
886 |
#define tables_length (ctypes_offset + 256) |
#define tables_length (ctypes_offset + 256) |
887 |
|
|
888 |
/* Layout of the UCP type table that translates property names into codes for |
/* Layout of the UCP type table that translates property names into types and |
889 |
pcre_ucp_findchar(). */ |
codes. */ |
890 |
|
|
891 |
typedef struct { |
typedef struct { |
892 |
const char *name; |
const char *name; |
893 |
int value; |
pcre_uint16 type; |
894 |
|
pcre_uint16 value; |
895 |
} ucp_type_table; |
} ucp_type_table; |
896 |
|
|
897 |
|
|
922 |
extern int _pcre_ord2utf8(int, uschar *); |
extern int _pcre_ord2utf8(int, uschar *); |
923 |
extern real_pcre * _pcre_try_flipped(const real_pcre *, real_pcre *, |
extern real_pcre * _pcre_try_flipped(const real_pcre *, real_pcre *, |
924 |
const pcre_study_data *, pcre_study_data *); |
const pcre_study_data *, pcre_study_data *); |
925 |
extern int _pcre_ucp_findchar(const int, int *, int *); |
extern int _pcre_ucp_findprop(const int, int *, int *); |
926 |
|
extern int _pcre_ucp_othercase(const int); |
927 |
extern int _pcre_valid_utf8(const uschar *, int); |
extern int _pcre_valid_utf8(const uschar *, int); |
928 |
extern BOOL _pcre_xclass(int, const uschar *); |
extern BOOL _pcre_xclass(int, const uschar *); |
929 |
|
|