/[pcre]/code/trunk/pcre_internal.h
ViewVC logotype

Diff of /code/trunk/pcre_internal.h

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 168 by ph10, Tue May 29 15:18:18 2007 UTC revision 208 by ph10, Mon Aug 6 15:23:29 2007 UTC
# Line 67  be absolutely sure we get our version. * Line 67  be absolutely sure we get our version. *
67  #endif  #endif
68    
69    
 /* Get the definitions provided by running "configure" */  
   
 #include "config.h"  
   
70  /* Standard C headers plus the external interface definition. The only time  /* Standard C headers plus the external interface definition. The only time
71  setjmp and stdarg are used is when NO_RECURSE is set. */  setjmp and stdarg are used is when NO_RECURSE is set. */
72    
# Line 112  PCRE_EXP_DATA_DEFN only if they are not Line 108  PCRE_EXP_DATA_DEFN only if they are not
108    
109  #ifndef PCRE_EXP_DECL  #ifndef PCRE_EXP_DECL
110  #  ifdef _WIN32  #  ifdef _WIN32
111  #    ifdef DLL_EXPORT  #    ifndef PCRE_STATIC
112  #      define PCRE_EXP_DECL       extern __declspec(dllexport)  #      define PCRE_EXP_DECL       extern __declspec(dllexport)
113  #      define PCRE_EXP_DEFN       __declspec(dllexport)  #      define PCRE_EXP_DEFN       __declspec(dllexport)
114  #      define PCRE_EXP_DATA_DEFN  __declspec(dllexport)  #      define PCRE_EXP_DATA_DEFN  __declspec(dllexport)
# Line 121  PCRE_EXP_DATA_DEFN only if they are not Line 117  PCRE_EXP_DATA_DEFN only if they are not
117  #      define PCRE_EXP_DEFN  #      define PCRE_EXP_DEFN
118  #      define PCRE_EXP_DATA_DEFN  #      define PCRE_EXP_DATA_DEFN
119  #    endif  #    endif
 #  
120  #  else  #  else
121  #    ifdef __cplusplus  #    ifdef __cplusplus
122  #      define PCRE_EXP_DECL       extern "C"  #      define PCRE_EXP_DECL       extern "C"
# Line 234  must begin with PCRE_. */ Line 229  must begin with PCRE_. */
229  /* Include the public PCRE header and the definitions of UCP character property  /* Include the public PCRE header and the definitions of UCP character property
230  values. */  values. */
231    
232  #include <pcre.h>  #include "pcre.h"
233  #include "ucp.h"  #include "ucp.h"
234    
235  /* When compiling for use with the Virtual Pascal compiler, these functions  /* When compiling for use with the Virtual Pascal compiler, these functions
# Line 363  capturing parenthesis numbers in back re Line 358  capturing parenthesis numbers in back re
358    
359  /* When UTF-8 encoding is being used, a character is no longer just a single  /* When UTF-8 encoding is being used, a character is no longer just a single
360  byte. The macros for character handling generate simple sequences when used in  byte. The macros for character handling generate simple sequences when used in
361  byte-mode, and more complicated ones for UTF-8 characters. */  byte-mode, and more complicated ones for UTF-8 characters. BACKCHAR should
362    never be called in byte mode. To make sure it can never even appear when UTF-8
363    support is omitted, we don't even define it. */
364    
365  #ifndef SUPPORT_UTF8  #ifndef SUPPORT_UTF8
366  #define GETCHAR(c, eptr) c = *eptr;  #define GETCHAR(c, eptr) c = *eptr;
# Line 371  byte-mode, and more complicated ones for Line 368  byte-mode, and more complicated ones for
368  #define GETCHARINC(c, eptr) c = *eptr++;  #define GETCHARINC(c, eptr) c = *eptr++;
369  #define GETCHARINCTEST(c, eptr) c = *eptr++;  #define GETCHARINCTEST(c, eptr) c = *eptr++;
370  #define GETCHARLEN(c, eptr, len) c = *eptr;  #define GETCHARLEN(c, eptr, len) c = *eptr;
371  #define BACKCHAR(eptr)  /* #define BACKCHAR(eptr) */
372    
373  #else   /* SUPPORT_UTF8 */  #else   /* SUPPORT_UTF8 */
374    
# Line 464  if there are extra bytes. This is called Line 461  if there are extra bytes. This is called
461      }      }
462    
463  /* If the pointer is not at the start of a character, move it back until  /* If the pointer is not at the start of a character, move it back until
464  it is. Called only in UTF-8 mode. */  it is. This is called only in UTF-8 mode - we don't put a test within the macro
465    because almost all calls are already within a block of UTF-8 only code. */
466    
467  #define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr--;  #define BACKCHAR(eptr) while((*eptr & 0xc0) == 0x80) eptr--
468    
469  #endif  #endif
470    
# Line 605  ESC_Z to detect the types that may be re Line 603  ESC_Z to detect the types that may be re
603  consume characters. If any new escapes are put in between that don't consume a  consume characters. If any new escapes are put in between that don't consume a
604  character, that code will have to change. */  character, that code will have to change. */
605    
606  enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,  enum { ESC_A = 1, ESC_G, ESC_K, ESC_B, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s,
607         ESC_W, ESC_w, ESC_dum1, ESC_C, ESC_P, ESC_p, ESC_R, ESC_X, ESC_Z, ESC_z,         ESC_W, ESC_w, ESC_dum1, ESC_C, ESC_P, ESC_p, ESC_R, ESC_H, ESC_h,
608         ESC_E, ESC_Q, ESC_k, ESC_REF };         ESC_V, ESC_v, ESC_X, ESC_Z, ESC_z, ESC_E, ESC_Q, ESC_k, ESC_REF };
609    
610    
611  /* Opcode table: OP_BRA must be last, as all values >= it are used for brackets  /* Opcode table: OP_BRA must be last, as all values >= it are used for brackets
# Line 629  enum { Line 627  enum {
627    
628    OP_SOD,            /* 1 Start of data: \A */    OP_SOD,            /* 1 Start of data: \A */
629    OP_SOM,            /* 2 Start of match (subject + offset): \G */    OP_SOM,            /* 2 Start of match (subject + offset): \G */
630    OP_SET_SOM,        /* 3 Set start of match (\K) */    OP_SET_SOM,        /* 3 Set start of match (\K) */
631    OP_NOT_WORD_BOUNDARY,  /*  4 \B */    OP_NOT_WORD_BOUNDARY,  /*  4 \B */
632    OP_WORD_BOUNDARY,      /*  5 \b */    OP_WORD_BOUNDARY,      /*  5 \b */
633    OP_NOT_DIGIT,          /*  6 \D */    OP_NOT_DIGIT,          /*  6 \D */
# Line 643  enum { Line 641  enum {
641    OP_NOTPROP,        /* 14 \P (not Unicode property) */    OP_NOTPROP,        /* 14 \P (not Unicode property) */
642    OP_PROP,           /* 15 \p (Unicode property) */    OP_PROP,           /* 15 \p (Unicode property) */
643    OP_ANYNL,          /* 16 \R (any newline sequence) */    OP_ANYNL,          /* 16 \R (any newline sequence) */
644    OP_EXTUNI,         /* 17 \X (extended Unicode sequence */    OP_NOT_HSPACE,     /* 17 \H (not horizontal whitespace) */
645    OP_EODN,           /* 18 End of data or \n at end of data: \Z. */    OP_HSPACE,         /* 18 \h (horizontal whitespace) */
646    OP_EOD,            /* 19 End of data: \z */    OP_NOT_VSPACE,     /* 19 \V (not vertical whitespace) */
647      OP_VSPACE,         /* 20 \v (vertical whitespace) */
648    OP_OPT,            /* 20 Set runtime options */    OP_EXTUNI,         /* 21 \X (extended Unicode sequence */
649    OP_CIRC,           /* 21 Start of line - varies with multiline switch */    OP_EODN,           /* 22 End of data or \n at end of data: \Z. */
650    OP_DOLL,           /* 22 End of line - varies with multiline switch */    OP_EOD,            /* 23 End of data: \z */
651    OP_CHAR,           /* 23 Match one character, casefully */  
652    OP_CHARNC,         /* 24 Match one character, caselessly */    OP_OPT,            /* 24 Set runtime options */
653    OP_NOT,            /* 25 Match one character, not the following one */    OP_CIRC,           /* 25 Start of line - varies with multiline switch */
654      OP_DOLL,           /* 26 End of line - varies with multiline switch */
655    OP_STAR,           /* 26 The maximizing and minimizing versions of */    OP_CHAR,           /* 27 Match one character, casefully */
656    OP_MINSTAR,        /* 27 these six opcodes must come in pairs, with */    OP_CHARNC,         /* 28 Match one character, caselessly */
657    OP_PLUS,           /* 28 the minimizing one second. */    OP_NOT,            /* 29 Match one character, not the following one */
658    OP_MINPLUS,        /* 29 This first set applies to single characters.*/  
659    OP_QUERY,          /* 30 */    OP_STAR,           /* 30 The maximizing and minimizing versions of */
660    OP_MINQUERY,       /* 31 */    OP_MINSTAR,        /* 31 these six opcodes must come in pairs, with */
661      OP_PLUS,           /* 32 the minimizing one second. */
662    OP_UPTO,           /* 32 From 0 to n matches */    OP_MINPLUS,        /* 33 This first set applies to single characters.*/
663    OP_MINUPTO,        /* 33 */    OP_QUERY,          /* 34 */
664    OP_EXACT,          /* 34 Exactly n matches */    OP_MINQUERY,       /* 35 */
665    
666    OP_POSSTAR,        /* 35 Possessified star */    OP_UPTO,           /* 36 From 0 to n matches */
667    OP_POSPLUS,        /* 36 Possessified plus */    OP_MINUPTO,        /* 37 */
668    OP_POSQUERY,       /* 37 Posesssified query */    OP_EXACT,          /* 38 Exactly n matches */
669    OP_POSUPTO,        /* 38 Possessified upto */  
670      OP_POSSTAR,        /* 39 Possessified star */
671    OP_NOTSTAR,        /* 39 The maximizing and minimizing versions of */    OP_POSPLUS,        /* 40 Possessified plus */
672    OP_NOTMINSTAR,     /* 40 these six opcodes must come in pairs, with */    OP_POSQUERY,       /* 41 Posesssified query */
673    OP_NOTPLUS,        /* 41 the minimizing one second. They must be in */    OP_POSUPTO,        /* 42 Possessified upto */
674    OP_NOTMINPLUS,     /* 42 exactly the same order as those above. */  
675    OP_NOTQUERY,       /* 43 This set applies to "not" single characters. */    OP_NOTSTAR,        /* 43 The maximizing and minimizing versions of */
676    OP_NOTMINQUERY,    /* 44 */    OP_NOTMINSTAR,     /* 44 these six opcodes must come in pairs, with */
677      OP_NOTPLUS,        /* 45 the minimizing one second. They must be in */
678    OP_NOTUPTO,        /* 45 From 0 to n matches */    OP_NOTMINPLUS,     /* 46 exactly the same order as those above. */
679    OP_NOTMINUPTO,     /* 46 */    OP_NOTQUERY,       /* 47 This set applies to "not" single characters. */
680    OP_NOTEXACT,       /* 47 Exactly n matches */    OP_NOTMINQUERY,    /* 48 */
681    
682    OP_NOTPOSSTAR,     /* 48 Possessified versions */    OP_NOTUPTO,        /* 49 From 0 to n matches */
683    OP_NOTPOSPLUS,     /* 49 */    OP_NOTMINUPTO,     /* 50 */
684    OP_NOTPOSQUERY,    /* 50 */    OP_NOTEXACT,       /* 51 Exactly n matches */
685    OP_NOTPOSUPTO,     /* 51 */  
686      OP_NOTPOSSTAR,     /* 52 Possessified versions */
687    OP_TYPESTAR,       /* 52 The maximizing and minimizing versions of */    OP_NOTPOSPLUS,     /* 53 */
688    OP_TYPEMINSTAR,    /* 53 these six opcodes must come in pairs, with */    OP_NOTPOSQUERY,    /* 54 */
689    OP_TYPEPLUS,       /* 54 the minimizing one second. These codes must */    OP_NOTPOSUPTO,     /* 55 */
690    OP_TYPEMINPLUS,    /* 55 be in exactly the same order as those above. */  
691    OP_TYPEQUERY,      /* 56 This set applies to character types such as \d */    OP_TYPESTAR,       /* 56 The maximizing and minimizing versions of */
692    OP_TYPEMINQUERY,   /* 57 */    OP_TYPEMINSTAR,    /* 57 these six opcodes must come in pairs, with */
693      OP_TYPEPLUS,       /* 58 the minimizing one second. These codes must */
694    OP_TYPEUPTO,       /* 58 From 0 to n matches */    OP_TYPEMINPLUS,    /* 59 be in exactly the same order as those above. */
695    OP_TYPEMINUPTO,    /* 59 */    OP_TYPEQUERY,      /* 60 This set applies to character types such as \d */
696    OP_TYPEEXACT,      /* 60 Exactly n matches */    OP_TYPEMINQUERY,   /* 61 */
697    
698    OP_TYPEPOSSTAR,    /* 61 Possessified versions */    OP_TYPEUPTO,       /* 62 From 0 to n matches */
699    OP_TYPEPOSPLUS,    /* 62 */    OP_TYPEMINUPTO,    /* 63 */
700    OP_TYPEPOSQUERY,   /* 63 */    OP_TYPEEXACT,      /* 64 Exactly n matches */
701    OP_TYPEPOSUPTO,    /* 64 */  
702      OP_TYPEPOSSTAR,    /* 65 Possessified versions */
703    OP_CRSTAR,         /* 65 The maximizing and minimizing versions of */    OP_TYPEPOSPLUS,    /* 66 */
704    OP_CRMINSTAR,      /* 66 all these opcodes must come in pairs, with */    OP_TYPEPOSQUERY,   /* 67 */
705    OP_CRPLUS,         /* 67 the minimizing one second. These codes must */    OP_TYPEPOSUPTO,    /* 68 */
706    OP_CRMINPLUS,      /* 68 be in exactly the same order as those above. */  
707    OP_CRQUERY,        /* 69 These are for character classes and back refs */    OP_CRSTAR,         /* 69 The maximizing and minimizing versions of */
708    OP_CRMINQUERY,     /* 70 */    OP_CRMINSTAR,      /* 70 all these opcodes must come in pairs, with */
709    OP_CRRANGE,        /* 71 These are different to the three sets above. */    OP_CRPLUS,         /* 71 the minimizing one second. These codes must */
710    OP_CRMINRANGE,     /* 72 */    OP_CRMINPLUS,      /* 72 be in exactly the same order as those above. */
711      OP_CRQUERY,        /* 73 These are for character classes and back refs */
712      OP_CRMINQUERY,     /* 74 */
713      OP_CRRANGE,        /* 75 These are different to the three sets above. */
714      OP_CRMINRANGE,     /* 76 */
715    
716    OP_CLASS,          /* 73 Match a character class, chars < 256 only */    OP_CLASS,          /* 77 Match a character class, chars < 256 only */
717    OP_NCLASS,         /* 74 Same, but the bitmap was created from a negative    OP_NCLASS,         /* 78 Same, but the bitmap was created from a negative
718                             class - the difference is relevant only when a UTF-8                             class - the difference is relevant only when a UTF-8
719                             character > 255 is encountered. */                             character > 255 is encountered. */
720    
721    OP_XCLASS,         /* 75 Extended class for handling UTF-8 chars within the    OP_XCLASS,         /* 79 Extended class for handling UTF-8 chars within the
722                             class. This does both positive and negative. */                             class. This does both positive and negative. */
723    
724    OP_REF,            /* 76 Match a back reference */    OP_REF,            /* 80 Match a back reference */
725    OP_RECURSE,        /* 77 Match a numbered subpattern (possibly recursive) */    OP_RECURSE,        /* 81 Match a numbered subpattern (possibly recursive) */
726    OP_CALLOUT,        /* 78 Call out to external function if provided */    OP_CALLOUT,        /* 82 Call out to external function if provided */
727    
728    OP_ALT,            /* 79 Start of alternation */    OP_ALT,            /* 83 Start of alternation */
729    OP_KET,            /* 80 End of group that doesn't have an unbounded repeat */    OP_KET,            /* 84 End of group that doesn't have an unbounded repeat */
730    OP_KETRMAX,        /* 81 These two must remain together and in this */    OP_KETRMAX,        /* 85 These two must remain together and in this */
731    OP_KETRMIN,        /* 82 order. They are for groups the repeat for ever. */    OP_KETRMIN,        /* 86 order. They are for groups the repeat for ever. */
732    
733    /* The assertions must come before BRA, CBRA, ONCE, and COND.*/    /* The assertions must come before BRA, CBRA, ONCE, and COND.*/
734    
735    OP_ASSERT,         /* 83 Positive lookahead */    OP_ASSERT,         /* 87 Positive lookahead */
736    OP_ASSERT_NOT,     /* 84 Negative lookahead */    OP_ASSERT_NOT,     /* 88 Negative lookahead */
737    OP_ASSERTBACK,     /* 85 Positive lookbehind */    OP_ASSERTBACK,     /* 89 Positive lookbehind */
738    OP_ASSERTBACK_NOT, /* 86 Negative lookbehind */    OP_ASSERTBACK_NOT, /* 90 Negative lookbehind */
739    OP_REVERSE,        /* 87 Move pointer back - used in lookbehind assertions */    OP_REVERSE,        /* 91 Move pointer back - used in lookbehind assertions */
740    
741    /* ONCE, BRA, CBRA, and COND must come after the assertions, with ONCE first,    /* ONCE, BRA, CBRA, and COND must come after the assertions, with ONCE first,
742    as there's a test for >= ONCE for a subpattern that isn't an assertion. */    as there's a test for >= ONCE for a subpattern that isn't an assertion. */
743    
744    OP_ONCE,           /* 88 Atomic group */    OP_ONCE,           /* 92 Atomic group */
745    OP_BRA,            /* 89 Start of non-capturing bracket */    OP_BRA,            /* 93 Start of non-capturing bracket */
746    OP_CBRA,           /* 90 Start of capturing bracket */    OP_CBRA,           /* 94 Start of capturing bracket */
747    OP_COND,           /* 91 Conditional group */    OP_COND,           /* 95 Conditional group */
748    
749    /* These three must follow the previous three, in the same order. There's a    /* These three must follow the previous three, in the same order. There's a
750    check for >= SBRA to distinguish the two sets. */    check for >= SBRA to distinguish the two sets. */
751    
752    OP_SBRA,           /* 92 Start of non-capturing bracket, check empty  */    OP_SBRA,           /* 96 Start of non-capturing bracket, check empty  */
753    OP_SCBRA,          /* 93 Start of capturing bracket, check empty */    OP_SCBRA,          /* 97 Start of capturing bracket, check empty */
754    OP_SCOND,          /* 94 Conditional group, check empty */    OP_SCOND,          /* 98 Conditional group, check empty */
755    
756    OP_CREF,           /* 95 Used to hold a capture number as condition */    OP_CREF,           /* 99 Used to hold a capture number as condition */
757    OP_RREF,           /* 96 Used to hold a recursion number as condition */    OP_RREF,           /* 100 Used to hold a recursion number as condition */
758    OP_DEF,            /* 97 The DEFINE condition */    OP_DEF,            /* 101 The DEFINE condition */
759    
760    OP_BRAZERO,        /* 98 These two must remain together and in this */    OP_BRAZERO,        /* 102 These two must remain together and in this */
761    OP_BRAMINZERO      /* 99 order. */    OP_BRAMINZERO      /* 103 order. */
762  };  };
763    
764    
# Line 766  for debugging. The macro is referenced o Line 768  for debugging. The macro is referenced o
768  #define OP_NAME_LIST \  #define OP_NAME_LIST \
769    "End", "\\A", "\\G", "\\K", "\\B", "\\b", "\\D", "\\d",         \    "End", "\\A", "\\G", "\\K", "\\B", "\\b", "\\D", "\\d",         \
770    "\\S", "\\s", "\\W", "\\w", "Any", "Anybyte",                   \    "\\S", "\\s", "\\W", "\\w", "Any", "Anybyte",                   \
771    "notprop", "prop", "anynl", "extuni",                           \    "notprop", "prop", "\\R", "\\H", "\\h", "\\V", "\\v",           \
772    "\\Z", "\\z",                                                   \    "extuni",  "\\Z", "\\z",                                        \
773    "Opt", "^", "$", "char", "charnc", "not",                       \    "Opt", "^", "$", "char", "charnc", "not",                       \
774    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",                 \    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",                 \
775    "*+","++", "?+", "{",                                           \    "*+","++", "?+", "{",                                           \
# Line 779  for debugging. The macro is referenced o Line 781  for debugging. The macro is referenced o
781    "class", "nclass", "xclass", "Ref", "Recurse", "Callout",       \    "class", "nclass", "xclass", "Ref", "Recurse", "Callout",       \
782    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",     \    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",     \
783    "AssertB", "AssertB not", "Reverse",                            \    "AssertB", "AssertB not", "Reverse",                            \
784    "Once", "Bra 0", "Bra", "Cond", "SBra 0", "SBra", "SCond",      \    "Once", "Bra", "CBra", "Cond", "SBra", "SCBra", "SCond",        \
785    "Cond ref", "Cond rec", "Cond def", "Brazero", "Braminzero"    "Cond ref", "Cond rec", "Cond def", "Brazero", "Braminzero"
786    
787    
# Line 797  in UTF-8 mode. The code that uses this t Line 799  in UTF-8 mode. The code that uses this t
799    1, 1, 1, 1, 1,                 /* \A, \G, \K, \B, \b                     */ \    1, 1, 1, 1, 1,                 /* \A, \G, \K, \B, \b                     */ \
800    1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */ \    1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */ \
801    1, 1,                          /* Any, Anybyte                           */ \    1, 1,                          /* Any, Anybyte                           */ \
802    3, 3, 1, 1,                    /* NOTPROP, PROP, EXTUNI, ANYNL           */ \    3, 3, 1,                       /* NOTPROP, PROP, EXTUNI                  */ \
803      1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */ \
804    1, 1, 2, 1, 1,                 /* \Z, \z, Opt, ^, $                      */ \    1, 1, 2, 1, 1,                 /* \Z, \z, Opt, ^, $                      */ \
805    2,                             /* Char  - the minimum length             */ \    2,                             /* Char  - the minimum length             */ \
806    2,                             /* Charnc  - the minimum length           */ \    2,                             /* Charnc  - the minimum length           */ \

Legend:
Removed from v.168  
changed lines
  Added in v.208

  ViewVC Help
Powered by ViewVC 1.1.5