7 |
and semantics are as close as possible to those of the Perl 5 language. |
and semantics are as close as possible to those of the Perl 5 language. |
8 |
|
|
9 |
Written by Philip Hazel |
Written by Philip Hazel |
10 |
Copyright (c) 1997-2006 University of Cambridge |
Copyright (c) 1997-2007 University of Cambridge |
11 |
|
|
12 |
----------------------------------------------------------------------------- |
----------------------------------------------------------------------------- |
13 |
Redistribution and use in source and binary forms, with or without |
Redistribution and use in source and binary forms, with or without |
83 |
#include <stdlib.h> |
#include <stdlib.h> |
84 |
#include <string.h> |
#include <string.h> |
85 |
|
|
86 |
#ifndef PCRE_SPY |
/* When compiling a DLL for Windows, the exported symbols have to be declared |
87 |
#define PCRE_DEFINITION /* Win32 __declspec(export) trigger for .dll */ |
using some MS magic. I found some useful information on this web page: |
88 |
|
http://msdn2.microsoft.com/en-us/library/y4h7bcy6(VS.80).aspx. According to the |
89 |
|
information there, using __declspec(dllesport) without "extern" we have a |
90 |
|
definition; with "extern" we have a declaration. The settings here override the |
91 |
|
setting in pcre.h (which is included below); it defines only PCRE_EXP_DECL, |
92 |
|
which is all that is needed for applications, which import the symbols. We use: |
93 |
|
|
94 |
|
PCRE_EXP_DECL for declarations |
95 |
|
PCRE_EXP_DEFN for definitions of exported functions |
96 |
|
PCRE_EXP_DATA_DEFN for definitions of exported variables |
97 |
|
|
98 |
|
The reason for the two DEFN macros is that in non-Windows environments, one |
99 |
|
does not want to have "extern" before variable definitions because it leads to |
100 |
|
compiler warnings. So we distinguish between functions and variables. In |
101 |
|
Windows, the two should always be the same. |
102 |
|
|
103 |
|
The reason for wrapping this in #ifndef PCRE_EXP_DECL is so that pcretest, |
104 |
|
which is an application, but needs to import this file in order to "peek" at |
105 |
|
internals, can #include pcre.h first, can get an application's-eye view. |
106 |
|
|
107 |
|
In principle, people compiling for non-Windows, non-Unix-like (i.e. uncommon, |
108 |
|
special-purpose environments) might want to stick other stuff in front of |
109 |
|
exported symbols. That's why, in the non-Windows case, we set PCRE_EXP_DEFN and |
110 |
|
PCRE_EXP_DATA_DEFN only if they are not already set. */ |
111 |
|
|
112 |
|
#ifndef PCRE_EXP_DECL |
113 |
|
# ifdef _WIN32 |
114 |
|
# ifdef DLL_EXPORT |
115 |
|
# define PCRE_EXP_DECL extern __declspec(dllexport) |
116 |
|
# define PCRE_EXP_DEFN __declspec(dllexport) |
117 |
|
# define PCRE_EXP_DATA_DEFN __declspec(dllexport) |
118 |
|
# else |
119 |
|
# define PCRE_EXP_DECL extern |
120 |
|
# define PCRE_EXP_DEFN |
121 |
|
# define PCRE_EXP_DATA_DEFN |
122 |
|
# endif |
123 |
|
# |
124 |
|
# else |
125 |
|
# ifdef __cplusplus |
126 |
|
# define PCRE_EXP_DECL extern "C" |
127 |
|
# else |
128 |
|
# define PCRE_EXP_DECL extern |
129 |
|
# endif |
130 |
|
# ifndef PCRE_EXP_DEFN |
131 |
|
# define PCRE_EXP_DEFN PCRE_EXP_DECL |
132 |
|
# endif |
133 |
|
# ifndef PCRE_EXP_DATA_DEFN |
134 |
|
# define PCRE_EXP_DATA_DEFN |
135 |
|
# endif |
136 |
|
# endif |
137 |
#endif |
#endif |
138 |
|
|
139 |
/* We need to have types that specify unsigned 16-bit and 32-bit integers. We |
/* We need to have types that specify unsigned 16-bit and 32-bit integers. We |
174 |
#define NOTACHAR 0xffffffff |
#define NOTACHAR 0xffffffff |
175 |
|
|
176 |
/* PCRE is able to support several different kinds of newline (CR, LF, CRLF, |
/* PCRE is able to support several different kinds of newline (CR, LF, CRLF, |
177 |
and "all" at present). The following macros are used to package up testing for |
"any" and "anycrlf" at present). The following macros are used to package up |
178 |
newlines. NLBLOCK, PSSTART, and PSEND are defined in the various modules to |
testing for newlines. NLBLOCK, PSSTART, and PSEND are defined in the various |
179 |
indicate in which datablock the parameters exist, and what the start/end of |
modules to indicate in which datablock the parameters exist, and what the |
180 |
string field names are. */ |
start/end of string field names are. */ |
181 |
|
|
182 |
#define NLTYPE_FIXED 0 /* Newline is a fixed length string */ |
#define NLTYPE_FIXED 0 /* Newline is a fixed length string */ |
183 |
#define NLTYPE_ANY 1 /* Newline is any Unicode line ending */ |
#define NLTYPE_ANY 1 /* Newline is any Unicode line ending */ |
184 |
|
#define NLTYPE_ANYCRLF 2 /* Newline is CR, LF, or CRLF */ |
185 |
|
|
186 |
/* This macro checks for a newline at the given position */ |
/* This macro checks for a newline at the given position */ |
187 |
|
|
188 |
#define IS_NEWLINE(p) \ |
#define IS_NEWLINE(p) \ |
189 |
((NLBLOCK->nltype != NLTYPE_FIXED)? \ |
((NLBLOCK->nltype != NLTYPE_FIXED)? \ |
190 |
((p) < NLBLOCK->PSEND && \ |
((p) < NLBLOCK->PSEND && \ |
191 |
_pcre_is_newline((p), NLBLOCK->PSEND, &(NLBLOCK->nllen), utf8) \ |
_pcre_is_newline((p), NLBLOCK->nltype, NLBLOCK->PSEND, &(NLBLOCK->nllen),\ |
192 |
) \ |
utf8)) \ |
193 |
: \ |
: \ |
194 |
((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \ |
((p) <= NLBLOCK->PSEND - NLBLOCK->nllen && \ |
195 |
(p)[0] == NLBLOCK->nl[0] && \ |
(p)[0] == NLBLOCK->nl[0] && \ |
202 |
#define WAS_NEWLINE(p) \ |
#define WAS_NEWLINE(p) \ |
203 |
((NLBLOCK->nltype != NLTYPE_FIXED)? \ |
((NLBLOCK->nltype != NLTYPE_FIXED)? \ |
204 |
((p) > NLBLOCK->PSSTART && \ |
((p) > NLBLOCK->PSSTART && \ |
205 |
_pcre_was_newline((p), NLBLOCK->PSSTART, &(NLBLOCK->nllen), utf8) \ |
_pcre_was_newline((p), NLBLOCK->nltype, NLBLOCK->PSSTART, \ |
206 |
) \ |
&(NLBLOCK->nllen), utf8)) \ |
207 |
: \ |
: \ |
208 |
((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \ |
((p) >= NLBLOCK->PSSTART + NLBLOCK->nllen && \ |
209 |
(p)[-NLBLOCK->nllen] == NLBLOCK->nl[0] && \ |
(p)[-NLBLOCK->nllen] == NLBLOCK->nl[0] && \ |
228 |
#define USPTR const unsigned char * |
#define USPTR const unsigned char * |
229 |
#endif |
#endif |
230 |
|
|
231 |
|
|
232 |
|
|
233 |
/* Include the public PCRE header and the definitions of UCP character property |
/* Include the public PCRE header and the definitions of UCP character property |
234 |
values. */ |
values. */ |
235 |
|
|
236 |
#include "pcre.h" |
#include <pcre.h> |
237 |
#include "ucp.h" |
#include "ucp.h" |
238 |
|
|
239 |
/* When compiling for use with the Virtual Pascal compiler, these functions |
/* When compiling for use with the Virtual Pascal compiler, these functions |
241 |
option on the command line. */ |
option on the command line. */ |
242 |
|
|
243 |
#ifdef VPCOMPAT |
#ifdef VPCOMPAT |
244 |
|
#define strlen(s) _strlen(s) |
245 |
#define strncmp(s1,s2,m) _strncmp(s1,s2,m) |
#define strncmp(s1,s2,m) _strncmp(s1,s2,m) |
246 |
|
#define memcmp(s,c,n) _memcmp(s,c,n) |
247 |
#define memcpy(d,s,n) _memcpy(d,s,n) |
#define memcpy(d,s,n) _memcpy(d,s,n) |
248 |
#define memmove(d,s,n) _memmove(d,s,n) |
#define memmove(d,s,n) _memmove(d,s,n) |
249 |
#define memset(s,c,n) _memset(s,c,n) |
#define memset(s,c,n) _memset(s,c,n) |
252 |
/* To cope with SunOS4 and other systems that lack memmove() but have bcopy(), |
/* To cope with SunOS4 and other systems that lack memmove() but have bcopy(), |
253 |
define a macro for memmove() if HAVE_MEMMOVE is false, provided that HAVE_BCOPY |
define a macro for memmove() if HAVE_MEMMOVE is false, provided that HAVE_BCOPY |
254 |
is set. Otherwise, include an emulating function for those systems that have |
is set. Otherwise, include an emulating function for those systems that have |
255 |
neither (there some non-Unix environments where this is the case). This assumes |
neither (there some non-Unix environments where this is the case). */ |
|
that all calls to memmove are moving strings upwards in store, which is the |
|
|
case in PCRE. */ |
|
256 |
|
|
257 |
#if ! HAVE_MEMMOVE |
#ifndef HAVE_MEMMOVE |
258 |
#undef memmove /* some systems may have a macro */ |
#undef memmove /* some systems may have a macro */ |
259 |
#if HAVE_BCOPY |
#ifdef HAVE_BCOPY |
260 |
#define memmove(a, b, c) bcopy(b, a, c) |
#define memmove(a, b, c) bcopy(b, a, c) |
261 |
#else /* HAVE_BCOPY */ |
#else /* HAVE_BCOPY */ |
262 |
static void * |
static void * |
263 |
pcre_memmove(unsigned char *dest, const unsigned char *src, size_t n) |
pcre_memmove(void *d, const void *s, size_t n) |
264 |
{ |
{ |
265 |
size_t i; |
size_t i; |
266 |
dest += n; |
unsigned char *dest = (unsigned char *)d; |
267 |
src += n; |
const unsigned char *src = (const unsigned char *)s; |
268 |
for (i = 0; i < n; ++i) *(--dest) = *(--src); |
if (dest > src) |
269 |
return dest; |
{ |
270 |
|
dest += n; |
271 |
|
src += n; |
272 |
|
for (i = 0; i < n; ++i) *(--dest) = *(--src); |
273 |
|
return (void *)dest; |
274 |
|
} |
275 |
|
else |
276 |
|
{ |
277 |
|
for (i = 0; i < n; ++i) *dest++ = *src++; |
278 |
|
return (void *)(dest - n); |
279 |
|
} |
280 |
} |
} |
281 |
#define memmove(a, b, c) pcre_memmove(a, b, c) |
#define memmove(a, b, c) pcre_memmove(a, b, c) |
282 |
#endif /* not HAVE_BCOPY */ |
#endif /* not HAVE_BCOPY */ |
501 |
/* Masks for identifying the public options that are permitted at compile |
/* Masks for identifying the public options that are permitted at compile |
502 |
time, run time, or study time, respectively. */ |
time, run time, or study time, respectively. */ |
503 |
|
|
504 |
#define PCRE_NEWLINE_BITS (PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_ANY) |
#define PCRE_NEWLINE_BITS (PCRE_NEWLINE_CR|PCRE_NEWLINE_LF|PCRE_NEWLINE_ANY| \ |
505 |
|
PCRE_NEWLINE_ANYCRLF) |
506 |
|
|
507 |
#define PUBLIC_OPTIONS \ |
#define PUBLIC_OPTIONS \ |
508 |
(PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \ |
(PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \ |
1089 |
one of the exported public functions. They have to be "external" in the C |
one of the exported public functions. They have to be "external" in the C |
1090 |
sense, but are not part of the PCRE public API. */ |
sense, but are not part of the PCRE public API. */ |
1091 |
|
|
1092 |
extern BOOL _pcre_is_newline(const uschar *, const uschar *, int *, |
extern BOOL _pcre_is_newline(const uschar *, int, const uschar *, |
1093 |
BOOL); |
int *, BOOL); |
1094 |
extern int _pcre_ord2utf8(int, uschar *); |
extern int _pcre_ord2utf8(int, uschar *); |
1095 |
extern real_pcre *_pcre_try_flipped(const real_pcre *, real_pcre *, |
extern real_pcre *_pcre_try_flipped(const real_pcre *, real_pcre *, |
1096 |
const pcre_study_data *, pcre_study_data *); |
const pcre_study_data *, pcre_study_data *); |
1097 |
extern int _pcre_ucp_findprop(const unsigned int, int *, int *); |
extern int _pcre_ucp_findprop(const unsigned int, int *, int *); |
1098 |
extern unsigned int _pcre_ucp_othercase(const unsigned int); |
extern unsigned int _pcre_ucp_othercase(const unsigned int); |
1099 |
extern int _pcre_valid_utf8(const uschar *, int); |
extern int _pcre_valid_utf8(const uschar *, int); |
1100 |
extern BOOL _pcre_was_newline(const uschar *, const uschar *, int *, |
extern BOOL _pcre_was_newline(const uschar *, int, const uschar *, |
1101 |
BOOL); |
int *, BOOL); |
1102 |
extern BOOL _pcre_xclass(int, const uschar *); |
extern BOOL _pcre_xclass(int, const uschar *); |
1103 |
|
|
1104 |
#endif |
#endif |