/[pcre]/code/tags/pcre-4.5/internal.h
ViewVC logotype

Diff of /code/tags/pcre-4.5/internal.h

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

code/trunk/internal.h revision 63 by nigel, Sat Feb 24 21:40:03 2007 UTC code/tags/pcre-4.5/internal.h revision 74 by nigel, Sat Feb 24 21:40:32 2007 UTC
# Line 39  modules, but which are not relevant to t Line 39  modules, but which are not relevant to t
39    
40  #include "config.h"  #include "config.h"
41    
42    /* Standard C headers plus the external interface definition. The only time
43    setjmp and stdarg are used is when NO_RECURSE is set. */
44    
45    #include <ctype.h>
46    #include <limits.h>
47    #include <setjmp.h>
48    #include <stdarg.h>
49    #include <stddef.h>
50    #include <stdio.h>
51    #include <stdlib.h>
52    #include <string.h>
53    
54    #ifndef PCRE_SPY
55    #define PCRE_DEFINITION       /* Win32 __declspec(export) trigger for .dll */
56    #endif
57    
58    #include "pcre.h"
59    
60  /* When compiling for use with the Virtual Pascal compiler, these functions  /* When compiling for use with the Virtual Pascal compiler, these functions
61  need to have their names changed. PCRE must be compiled with the -DVPCOMPAT  need to have their names changed. PCRE must be compiled with the -DVPCOMPAT
62  option on the command line. */  option on the command line. */
# Line 151  capturing parenthesis numbers in back re Line 169  capturing parenthesis numbers in back re
169  #define PUT2INC(a,n,d)  PUT2(a,n,d), a += 2  #define PUT2INC(a,n,d)  PUT2(a,n,d), a += 2
170    
171    
 /* Standard C headers plus the external interface definition */  
   
 #include <ctype.h>  
 #include <limits.h>  
 #include <stddef.h>  
 #include <stdio.h>  
 #include <stdlib.h>  
 #include <string.h>  
   
 #ifndef PCRE_SPY  
 #define PCRE_DEFINITION       /* Win32 __declspec(export) trigger for .dll */  
 #endif  
   
 #include "pcre.h"  
   
172  /* In case there is no definition of offsetof() provided - though any proper  /* In case there is no definition of offsetof() provided - though any proper
173  Standard C system should have one. */  Standard C system should have one. */
174    
# Line 198  time, run time or study time, respective Line 201  time, run time or study time, respective
201  #define PUBLIC_OPTIONS \  #define PUBLIC_OPTIONS \
202    (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \    (PCRE_CASELESS|PCRE_EXTENDED|PCRE_ANCHORED|PCRE_MULTILINE| \
203     PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \     PCRE_DOTALL|PCRE_DOLLAR_ENDONLY|PCRE_EXTRA|PCRE_UNGREEDY|PCRE_UTF8| \
204     PCRE_NO_AUTO_CAPTURE)     PCRE_NO_AUTO_CAPTURE|PCRE_NO_UTF8_CHECK)
205    
206  #define PUBLIC_EXEC_OPTIONS \  #define PUBLIC_EXEC_OPTIONS \
207    (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY)    (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK)
208    
209  #define PUBLIC_STUDY_OPTIONS 0   /* None defined */  #define PUBLIC_STUDY_OPTIONS 0   /* None defined */
210    
# Line 214  time, run time or study time, respective Line 217  time, run time or study time, respective
217  #define REQ_UNSET (-2)  #define REQ_UNSET (-2)
218  #define REQ_NONE  (-1)  #define REQ_NONE  (-1)
219    
220  /* Flags added to firstchar or reqchar */  /* Flags added to firstbyte or reqbyte; a "non-literal" item is either a
221    variable-length repeat, or a anything other than literal characters. */
222    
223  #define REQ_CASELESS 0x0100    /* indicates caselessness */  #define REQ_CASELESS 0x0100    /* indicates caselessness */
224  #define REQ_EOL      0x0200    /* indicates reqchar followed by $ */  #define REQ_VARY     0x0200    /* reqbyte followed non-literal item */
225    
226  /* Miscellaneous definitions */  /* Miscellaneous definitions */
227    
# Line 246  ESC_n is defined as yet another macro, w Line 250  ESC_n is defined as yet another macro, w
250  #define ESC_r '\r'  #define ESC_r '\r'
251  #endif  #endif
252    
253  #ifndef ESC_t  /* We can't officially use ESC_t because it is a POSIX reserved identifier
254  #define ESC_t '\t'  (presumably because of all the others like size_t). */
255    
256    #ifndef ESC_tee
257    #define ESC_tee '\t'
258  #endif  #endif
259    
260  /* These are escaped items that aren't just an encoding of a particular data  /* These are escaped items that aren't just an encoding of a particular data
# Line 351  enum { Line 358  enum {
358                             class - the difference is relevant only when a UTF-8                             class - the difference is relevant only when a UTF-8
359                             character > 255 is encountered. */                             character > 255 is encountered. */
360    
361    OP_XCLASS,         /* 56 Extended class for handling UTF-8 chars within the    OP_XCLASS,         /* 57 Extended class for handling UTF-8 chars within the
362                             class. This does both positive and negative. */                             class. This does both positive and negative. */
363    
364    OP_REF,            /* 57 Match a back reference */    OP_REF,            /* 58 Match a back reference */
365    OP_RECURSE,        /* 58 Match a numbered subpattern (possibly recursive) */    OP_RECURSE,        /* 59 Match a numbered subpattern (possibly recursive) */
366    OP_CALLOUT,        /* 59 Call out to external function if provided */    OP_CALLOUT,        /* 60 Call out to external function if provided */
367    
368    OP_ALT,            /* 60 Start of alternation */    OP_ALT,            /* 61 Start of alternation */
369    OP_KET,            /* 61 End of group that doesn't have an unbounded repeat */    OP_KET,            /* 62 End of group that doesn't have an unbounded repeat */
370    OP_KETRMAX,        /* 62 These two must remain together and in this */    OP_KETRMAX,        /* 63 These two must remain together and in this */
371    OP_KETRMIN,        /* 63 order. They are for groups the repeat for ever. */    OP_KETRMIN,        /* 64 order. They are for groups the repeat for ever. */
372    
373    /* The assertions must come before ONCE and COND */    /* The assertions must come before ONCE and COND */
374    
375    OP_ASSERT,         /* 64 Positive lookahead */    OP_ASSERT,         /* 65 Positive lookahead */
376    OP_ASSERT_NOT,     /* 65 Negative lookahead */    OP_ASSERT_NOT,     /* 66 Negative lookahead */
377    OP_ASSERTBACK,     /* 66 Positive lookbehind */    OP_ASSERTBACK,     /* 67 Positive lookbehind */
378    OP_ASSERTBACK_NOT, /* 67 Negative lookbehind */    OP_ASSERTBACK_NOT, /* 68 Negative lookbehind */
379    OP_REVERSE,        /* 68 Move pointer back - used in lookbehind assertions */    OP_REVERSE,        /* 69 Move pointer back - used in lookbehind assertions */
380    
381    /* ONCE and COND must come after the assertions, with ONCE first, as there's    /* ONCE and COND must come after the assertions, with ONCE first, as there's
382    a test for >= ONCE for a subpattern that isn't an assertion. */    a test for >= ONCE for a subpattern that isn't an assertion. */
383    
384    OP_ONCE,           /* 69 Once matched, don't back up into the subpattern */    OP_ONCE,           /* 70 Once matched, don't back up into the subpattern */
385    OP_COND,           /* 70 Conditional group */    OP_COND,           /* 71 Conditional group */
386    OP_CREF,           /* 71 Used to hold an extraction string number (cond ref) */    OP_CREF,           /* 72 Used to hold an extraction string number (cond ref) */
387    
388    OP_BRAZERO,        /* 72 These two must remain together and in this */    OP_BRAZERO,        /* 73 These two must remain together and in this */
389    OP_BRAMINZERO,     /* 73 order. */    OP_BRAMINZERO,     /* 74 order. */
390    
391    OP_BRANUMBER,      /* 74 Used for extracting brackets whose number is greater    OP_BRANUMBER,      /* 75 Used for extracting brackets whose number is greater
392                             than can fit into an opcode. */                             than can fit into an opcode. */
393    
394    OP_BRA             /* 75 This and greater values are used for brackets that    OP_BRA             /* 76 This and greater values are used for brackets that
395                             extract substrings up to a basic limit. After that,                             extract substrings up to a basic limit. After that,
396                             use is made of OP_BRANUMBER. */                             use is made of OP_BRANUMBER. */
397  };  };
# Line 427  in UTF-8 mode. The code that uses this t Line 434  in UTF-8 mode. The code that uses this t
434    1, 1, 1, 1, 2, 1, 1,           /* Any, Anybyte, \Z, \z, Opt, ^, $        */ \    1, 1, 1, 1, 2, 1, 1,           /* Any, Anybyte, \Z, \z, Opt, ^, $        */ \
435    2,                             /* Chars - the minimum length             */ \    2,                             /* Chars - the minimum length             */ \
436    2,                             /* not                                    */ \    2,                             /* not                                    */ \
437    /* Positive single-char repeats                                          */ \    /* Positive single-char repeats                            ** These are  */ \
438    2, 2, 2, 2, 2, 2,              /* *, *?, +, +?, ?, ??      ** These are  */ \    2, 2, 2, 2, 2, 2,              /* *, *?, +, +?, ?, ??      ** minima in  */ \
439    4, 4, 4,                       /* upto, minupto, exact     ** minima     */ \    4, 4, 4,                       /* upto, minupto, exact     ** UTF-8 mode */ \
440    /* Negative single-char repeats                                          */ \    /* Negative single-char repeats - only for chars < 256                   */ \
441    2, 2, 2, 2, 2, 2,              /* NOT *, *?, +, +?, ?, ??                */ \    2, 2, 2, 2, 2, 2,              /* NOT *, *?, +, +?, ?, ??                */ \
442    4, 4, 4,                       /* NOT upto, minupto, exact               */ \    4, 4, 4,                       /* NOT upto, minupto, exact               */ \
443    /* Positive type repeats                                                 */ \    /* Positive type repeats                                                 */ \
# Line 522  just to accommodate the POSIX wrapper. * Line 529  just to accommodate the POSIX wrapper. *
529  #define ERR41 "unrecognized character after (?P"  #define ERR41 "unrecognized character after (?P"
530  #define ERR42 "syntax error after (?P"  #define ERR42 "syntax error after (?P"
531  #define ERR43 "two named groups have the same name"  #define ERR43 "two named groups have the same name"
532    #define ERR44 "invalid UTF-8 string"
533    
534  /* All character handling must be done as unsigned characters. Otherwise there  /* All character handling must be done as unsigned characters. Otherwise there
535  are problems with top-bit-set characters and functions such as isspace().  are problems with top-bit-set characters and functions such as isspace().
# Line 570  typedef struct compile_data { Line 578  typedef struct compile_data {
578    int  name_entry_size;         /* Size of each entry */    int  name_entry_size;         /* Size of each entry */
579    int  top_backref;             /* Maximum back reference */    int  top_backref;             /* Maximum back reference */
580    unsigned int backref_map;     /* Bitmap of low back refs */    unsigned int backref_map;     /* Bitmap of low back refs */
581      int  req_varyopt;             /* "After variable item" flag for reqbyte */
582  } compile_data;  } compile_data;
583    
584  /* Structure for maintaining a chain of pointers to the currently incomplete  /* Structure for maintaining a chain of pointers to the currently incomplete
# Line 584  typedef struct branch_chain { Line 593  typedef struct branch_chain {
593  call within the pattern. */  call within the pattern. */
594    
595  typedef struct recursion_info {  typedef struct recursion_info {
596    struct recursion_info *prev;  /* Previous recursion record (or NULL) */    struct recursion_info *prevrec; /* Previous recursion record (or NULL) */
597    int group_num;                /* Number of group that was called */    int group_num;                /* Number of group that was called */
598    const uschar *after_call;     /* "Return value": points after the call in the expr */    const uschar *after_call;     /* "Return value": points after the call in the expr */
599    const uschar *save_start;     /* Old value of md->start_match */    const uschar *save_start;     /* Old value of md->start_match */
# Line 592  typedef struct recursion_info { Line 601  typedef struct recursion_info {
601    int saved_max;                /* Number of saved offsets */    int saved_max;                /* Number of saved offsets */
602  } recursion_info;  } recursion_info;
603    
604    /* When compiling in a mode that doesn't use recursive calls to match(),
605    a structure is used to remember local variables on the heap. It is defined in
606    pcre.c, close to the match() function, so that it is easy to keep it in step
607    with any changes of local variable. However, the pointer to the current frame
608    must be saved in some "static" place over a longjmp(). We declare the
609    structure here so that we can put a pointer in the match_data structure.
610    NOTE: This isn't used for a "normal" compilation of pcre. */
611    
612    struct heapframe;
613    
614  /* Structure for passing "static" information around between the functions  /* Structure for passing "static" information around between the functions
615  doing the matching, so that they are thread-safe. */  doing the matching, so that they are thread-safe. */
616    
617  typedef struct match_data {  typedef struct match_data {
618    int    match_call_count;      /* As it says */    unsigned long int match_call_count; /* As it says */
619    int    match_limit;           /* As it says */    unsigned long int match_limit;/* As it says */
620    int   *offset_vector;         /* Offset vector */    int   *offset_vector;         /* Offset vector */
621    int    offset_end;            /* One past the end */    int    offset_end;            /* One past the end */
622    int    offset_max;            /* The maximum usable for return data */    int    offset_max;            /* The maximum usable for return data */
# Line 619  typedef struct match_data { Line 638  typedef struct match_data {
638    int    start_offset;          /* The start offset value */    int    start_offset;          /* The start offset value */
639    recursion_info *recursive;    /* Linked list of recursion data */    recursion_info *recursive;    /* Linked list of recursion data */
640    void  *callout_data;          /* To pass back to callouts */    void  *callout_data;          /* To pass back to callouts */
641      struct heapframe *thisframe;  /* Used only when compiling for no recursion */
642  } match_data;  } match_data;
643    
644  /* Bit definitions for entries in the pcre_ctypes table. */  /* Bit definitions for entries in the pcre_ctypes table. */

Legend:
Removed from v.63  
changed lines
  Added in v.74

  ViewVC Help
Powered by ViewVC 1.1.5