/[pcre]/code/trunk/pcre_internal.h
ViewVC logotype

Diff of /code/trunk/pcre_internal.h

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 850 by zherczeg, Wed Jan 4 17:29:11 2012 UTC revision 1033 by ph10, Mon Sep 10 11:02:48 2012 UTC
# Line 254  typedef unsigned char pcre_uchar; Line 254  typedef unsigned char pcre_uchar;
254    
255  #ifdef COMPILE_PCRE16  #ifdef COMPILE_PCRE16
256  #if USHRT_MAX != 65535  #if USHRT_MAX != 65535
257  /* This is a warning message. Change PCRE_SCHAR16 to a 16 bit data type in  /* This is a warning message. Change PCRE_UCHAR16 to a 16 bit data type in
258  pcre.h(.in) and disable (comment out) this message. */  pcre.h(.in) and disable (comment out) this message. */
259  #error Warning: PCRE_SCHAR16 is not a 16 bit data type.  #error Warning: PCRE_UCHAR16 is not a 16 bit data type.
260  #endif  #endif
261    
262  typedef pcre_uint16 pcre_uchar;  typedef pcre_uint16 pcre_uchar;
# Line 523  capturing parenthesis numbers in back re Line 523  capturing parenthesis numbers in back re
523    
524  #define PUT2INC(a,n,d)  PUT2(a,n,d), a += IMM2_SIZE  #define PUT2INC(a,n,d)  PUT2(a,n,d), a += IMM2_SIZE
525    
526    /* The maximum length of a MARK name is currently one data unit; it may be
527    changed in future to be a fixed number of bytes or to depend on LINK_SIZE. */
528    
529    #define MAX_MARK ((1 << (sizeof(pcre_uchar)*8)) - 1)
530    
531  /* When UTF encoding is being used, a character is no longer just a single  /* When UTF encoding is being used, a character is no longer just a single
532  character. The macros for character handling generate simple sequences when  character. The macros for character handling generate simple sequences when
533  used in character-mode, and more complicated ones for UTF characters.  used in character-mode, and more complicated ones for UTF characters.
# Line 887  time, run time, or study time, respectiv Line 892  time, run time, or study time, respectiv
892     PCRE_NO_START_OPTIMIZE)     PCRE_NO_START_OPTIMIZE)
893    
894  #define PUBLIC_STUDY_OPTIONS \  #define PUBLIC_STUDY_OPTIONS \
895     PCRE_STUDY_JIT_COMPILE     (PCRE_STUDY_JIT_COMPILE|PCRE_STUDY_JIT_PARTIAL_SOFT_COMPILE| \
896        PCRE_STUDY_JIT_PARTIAL_HARD_COMPILE|PCRE_STUDY_EXTRA_NEEDED)
897    
898  /* Magic number to provide a small check against being handed junk. */  /* Magic number to provide a small check against being handed junk. */
899    
# Line 939  macros to give the functions distinct na Line 945  macros to give the functions distinct na
945  #ifndef SUPPORT_UTF  #ifndef SUPPORT_UTF
946    
947  /* UTF-8 support is not enabled; use the platform-dependent character literals  /* UTF-8 support is not enabled; use the platform-dependent character literals
948  so that PCRE works on both ASCII and EBCDIC platforms, in non-UTF-mode only. */  so that PCRE works in both ASCII and EBCDIC environments, but only in non-UTF
949    mode. Newline characters are problematic in EBCDIC. Though it has CR and LF
950    characters, a common practice has been to use its NL (0x15) character as the
951    line terminator in C-like processing environments. However, sometimes the LF
952    (0x25) character is used instead, according to this Unicode document:
953    
954    http://unicode.org/standard/reports/tr13/tr13-5.html
955    
956    PCRE defaults EBCDIC NL to 0x15, but has a build-time option to select 0x25
957    instead. Whichever is *not* chosen is defined as NEL.
958    
959    In both ASCII and EBCDIC environments, CHAR_NL and CHAR_LF are synonyms for the
960    same code point. */
961    
962    #ifdef EBCDIC
963    
964    #ifndef EBCDIC_NL25
965    #define CHAR_NL                     '\x15'
966    #define CHAR_NEL                    '\x25'
967    #define STR_NL                      "\x15"
968    #define STR_NEL                     "\x25"
969    #else
970    #define CHAR_NL                     '\x25'
971    #define CHAR_NEL                    '\x15'
972    #define STR_NL                      "\x25"
973    #define STR_NEL                     "\x15"
974    #endif
975    
976    #define CHAR_LF                     CHAR_NL
977    #define STR_LF                      STR_NL
978    
979    #define CHAR_ESC                    '\047'
980    #define CHAR_DEL                    '\007'
981    #define STR_ESC                     "\047"
982    #define STR_DEL                     "\007"
983    
984    #else  /* Not EBCDIC */
985    
986    /* In ASCII/Unicode, linefeed is '\n' and we equate this to NL for
987    compatibility. NEL is the Unicode newline character; make sure it is
988    a positive value. */
989    
990    #define CHAR_LF                     '\n'
991    #define CHAR_NL                     CHAR_LF
992    #define CHAR_NEL                    ((unsigned char)'\x85')
993    #define CHAR_ESC                    '\033'
994    #define CHAR_DEL                    '\177'
995    
996    #define STR_LF                      "\n"
997    #define STR_NL                      STR_LF
998    #define STR_NEL                     "\x85"
999    #define STR_ESC                     "\033"
1000    #define STR_DEL                     "\177"
1001    
1002    #endif  /* EBCDIC */
1003    
1004    /* The remaining definitions work in both environments. */
1005    
1006  #define CHAR_HT                     '\t'  #define CHAR_HT                     '\t'
1007  #define CHAR_VT                     '\v'  #define CHAR_VT                     '\v'
1008  #define CHAR_FF                     '\f'  #define CHAR_FF                     '\f'
1009  #define CHAR_CR                     '\r'  #define CHAR_CR                     '\r'
 #define CHAR_NL                     '\n'  
1010  #define CHAR_BS                     '\b'  #define CHAR_BS                     '\b'
1011  #define CHAR_BEL                    '\a'  #define CHAR_BEL                    '\a'
 #ifdef EBCDIC  
 #define CHAR_ESC                    '\047'  
 #define CHAR_DEL                    '\007'  
 #else  
 #define CHAR_ESC                    '\033'  
 #define CHAR_DEL                    '\177'  
 #endif  
1012    
1013  #define CHAR_SPACE                  ' '  #define CHAR_SPACE                  ' '
1014  #define CHAR_EXCLAMATION_MARK       '!'  #define CHAR_EXCLAMATION_MARK       '!'
# Line 1056  so that PCRE works on both ASCII and EBC Line 1110  so that PCRE works on both ASCII and EBC
1110  #define STR_VT                      "\v"  #define STR_VT                      "\v"
1111  #define STR_FF                      "\f"  #define STR_FF                      "\f"
1112  #define STR_CR                      "\r"  #define STR_CR                      "\r"
 #define STR_NL                      "\n"  
1113  #define STR_BS                      "\b"  #define STR_BS                      "\b"
1114  #define STR_BEL                     "\a"  #define STR_BEL                     "\a"
 #ifdef EBCDIC  
 #define STR_ESC                     "\047"  
 #define STR_DEL                     "\007"  
 #else  
 #define STR_ESC                     "\033"  
 #define STR_DEL                     "\177"  
 #endif  
1115    
1116  #define STR_SPACE                   " "  #define STR_SPACE                   " "
1117  #define STR_EXCLAMATION_MARK        "!"  #define STR_EXCLAMATION_MARK        "!"
# Line 1215  only. */ Line 1261  only. */
1261  #define CHAR_VT                     '\013'  #define CHAR_VT                     '\013'
1262  #define CHAR_FF                     '\014'  #define CHAR_FF                     '\014'
1263  #define CHAR_CR                     '\015'  #define CHAR_CR                     '\015'
1264  #define CHAR_NL                     '\012'  #define CHAR_LF                     '\012'
1265    #define CHAR_NL                     CHAR_LF
1266    #define CHAR_NEL                    ((unsigned char)'\x85')
1267  #define CHAR_BS                     '\010'  #define CHAR_BS                     '\010'
1268  #define CHAR_BEL                    '\007'  #define CHAR_BEL                    '\007'
1269  #define CHAR_ESC                    '\033'  #define CHAR_ESC                    '\033'
# Line 1478  only. */ Line 1526  only. */
1526  #endif  #endif
1527    
1528  #ifndef ESC_n  #ifndef ESC_n
1529  #define ESC_n CHAR_NL  #define ESC_n CHAR_LF
1530  #endif  #endif
1531    
1532  #ifndef ESC_r  #ifndef ESC_r
# Line 1939  enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4, Line 1987  enum { ERR0,  ERR1,  ERR2,  ERR3,  ERR4,
1987         ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,         ERR40, ERR41, ERR42, ERR43, ERR44, ERR45, ERR46, ERR47, ERR48, ERR49,
1988         ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,         ERR50, ERR51, ERR52, ERR53, ERR54, ERR55, ERR56, ERR57, ERR58, ERR59,
1989         ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,         ERR60, ERR61, ERR62, ERR63, ERR64, ERR65, ERR66, ERR67, ERR68, ERR69,
1990         ERR70, ERR71, ERR72, ERR73, ERR74, ERRCOUNT };         ERR70, ERR71, ERR72, ERR73, ERR74, ERR75, ERR76, ERRCOUNT };
1991    
1992    /* JIT compiling modes. The function list is indexed by them. */
1993    enum { JIT_COMPILE, JIT_PARTIAL_SOFT_COMPILE, JIT_PARTIAL_HARD_COMPILE,
1994           JIT_NUMBER_OF_COMPILE_MODES };
1995    
1996  /* The real format of the start of the pcre block; the index of names and the  /* The real format of the start of the pcre block; the index of names and the
1997  code vector run on as long as necessary after the end. We store an explicit  code vector run on as long as necessary after the end. We store an explicit
# Line 1958  fields are present. Currently PCRE alway Line 2010  fields are present. Currently PCRE alway
2010  NOTE NOTE NOTE  NOTE NOTE NOTE
2011  */  */
2012    
2013  typedef struct real_pcre {  #ifdef COMPILE_PCRE8
2014    #define REAL_PCRE real_pcre
2015    #else
2016    #define REAL_PCRE real_pcre16
2017    #endif
2018    
2019    typedef struct REAL_PCRE {
2020    pcre_uint32 magic_number;    pcre_uint32 magic_number;
2021    pcre_uint32 size;               /* Total that was malloced */    pcre_uint32 size;               /* Total that was malloced */
2022    pcre_uint32 options;            /* Public options */    pcre_uint32 options;            /* Public options */
2023    pcre_uint16 flags;              /* Private flags */    pcre_uint16 flags;              /* Private flags */
2024    pcre_uint16 dummy1;             /* For future use */    pcre_uint16 max_lookbehind;     /* Longest lookbehind (characters) */
2025    pcre_uint16 top_bracket;    pcre_uint16 top_bracket;        /* Highest numbered group */
2026    pcre_uint16 top_backref;    pcre_uint16 top_backref;        /* Highest numbered back reference */
2027    pcre_uint16 first_char;         /* Starting character */    pcre_uint16 first_char;         /* Starting character */
2028    pcre_uint16 req_char;           /* This character must be seen */    pcre_uint16 req_char;           /* This character must be seen */
2029    pcre_uint16 name_table_offset;  /* Offset to name table that follows */    pcre_uint16 name_table_offset;  /* Offset to name table that follows */
2030    pcre_uint16 name_entry_size;    /* Size of any name items */    pcre_uint16 name_entry_size;    /* Size of any name items */
2031    pcre_uint16 name_count;         /* Number of name items */    pcre_uint16 name_count;         /* Number of name items */
2032    pcre_uint16 ref_count;          /* Reference count */    pcre_uint16 ref_count;          /* Reference count */
   
2033    const pcre_uint8 *tables;       /* Pointer to tables or NULL for std */    const pcre_uint8 *tables;       /* Pointer to tables or NULL for std */
2034    const pcre_uint8 *nullpad;      /* NULL padding */    const pcre_uint8 *nullpad;      /* NULL padding */
2035  } real_pcre;  } REAL_PCRE;
2036    
2037  /* The format of the block used to store data from pcre_study(). The same  /* The format of the block used to store data from pcre_study(). The same
2038  remark (see NOTE above) about extending this structure applies. */  remark (see NOTE above) about extending this structure applies. */
# Line 2018  typedef struct compile_data { Line 2075  typedef struct compile_data {
2075    int  workspace_size;              /* Size of workspace */    int  workspace_size;              /* Size of workspace */
2076    int  bracount;                    /* Count of capturing parens as we compile */    int  bracount;                    /* Count of capturing parens as we compile */
2077    int  final_bracount;              /* Saved value after first pass */    int  final_bracount;              /* Saved value after first pass */
2078      int  max_lookbehind;              /* Maximum lookbehind (characters) */
2079    int  top_backref;                 /* Maximum back reference */    int  top_backref;                 /* Maximum back reference */
2080    unsigned int backref_map;         /* Bitmap of low back refs */    unsigned int backref_map;         /* Bitmap of low back refs */
2081    int  assert_depth;                /* Depth of nested assertions */    int  assert_depth;                /* Depth of nested assertions */
# Line 2025  typedef struct compile_data { Line 2083  typedef struct compile_data {
2083    int  external_flags;              /* External flag bits to be set */    int  external_flags;              /* External flag bits to be set */
2084    int  req_varyopt;                 /* "After variable item" flag for reqbyte */    int  req_varyopt;                 /* "After variable item" flag for reqbyte */
2085    BOOL had_accept;                  /* (*ACCEPT) encountered */    BOOL had_accept;                  /* (*ACCEPT) encountered */
2086      BOOL had_pruneorskip;             /* (*PRUNE) or (*SKIP) encountered */
2087    BOOL check_lookbehind;            /* Lookbehinds need later checking */    BOOL check_lookbehind;            /* Lookbehinds need later checking */
2088    int  nltype;                      /* Newline type */    int  nltype;                      /* Newline type */
2089    int  nllen;                       /* Newline string length */    int  nllen;                       /* Newline string length */
# Line 2119  typedef struct match_data { Line 2178  typedef struct match_data {
2178    const  pcre_uchar *mark;        /* Mark pointer to pass back on success */    const  pcre_uchar *mark;        /* Mark pointer to pass back on success */
2179    const  pcre_uchar *nomatch_mark;/* Mark pointer to pass back on failure */    const  pcre_uchar *nomatch_mark;/* Mark pointer to pass back on failure */
2180    const  pcre_uchar *once_target; /* Where to back up to for atomic groups */    const  pcre_uchar *once_target; /* Where to back up to for atomic groups */
2181    #ifdef NO_RECURSE
2182      void  *match_frames_base;       /* For remembering malloc'd frames */
2183    #endif
2184  } match_data;  } match_data;
2185    
2186  /* A similar structure is used for the same purpose by the DFA matching  /* A similar structure is used for the same purpose by the DFA matching
# Line 2173  total length. */ Line 2235  total length. */
2235  #define ctypes_offset (cbits_offset + cbit_length)  #define ctypes_offset (cbits_offset + cbit_length)
2236  #define tables_length (ctypes_offset + 256)  #define tables_length (ctypes_offset + 256)
2237    
2238  /* Internal function prefix */  /* Internal function and data prefixes. */
2239    
2240  #ifdef COMPILE_PCRE8  #ifdef COMPILE_PCRE8
2241    #ifndef PUBL
2242  #define PUBL(name) pcre_##name  #define PUBL(name) pcre_##name
2243    #endif
2244    #ifndef PRIV
2245  #define PRIV(name) _pcre_##name  #define PRIV(name) _pcre_##name
2246  #else  #endif
2247    #else /* COMPILE_PCRE8 */
2248  #ifdef COMPILE_PCRE16  #ifdef COMPILE_PCRE16
2249    #ifndef PUBL
2250  #define PUBL(name) pcre16_##name  #define PUBL(name) pcre16_##name
2251    #endif
2252    #ifndef PRIV
2253  #define PRIV(name) _pcre16_##name  #define PRIV(name) _pcre16_##name
2254    #endif
2255  #else  #else
2256  #error Unsupported compiling mode  #error Unsupported compiling mode
2257  #endif /* COMPILE_PCRE16 */  #endif /* COMPILE_PCRE16 */
# Line 2274  extern BOOL              PRIV(was_newlin Line 2344  extern BOOL              PRIV(was_newlin
2344  extern BOOL              PRIV(xclass)(int, const pcre_uchar *, BOOL);  extern BOOL              PRIV(xclass)(int, const pcre_uchar *, BOOL);
2345    
2346  #ifdef SUPPORT_JIT  #ifdef SUPPORT_JIT
2347  extern void              PRIV(jit_compile)(const real_pcre *, PUBL(extra) *);  extern void              PRIV(jit_compile)(const REAL_PCRE *,
2348  extern int               PRIV(jit_exec)(const real_pcre *, void *,                             PUBL(extra) *, int);
2349                             const pcre_uchar *, int, int, int, int, int *, int);  extern int               PRIV(jit_exec)(const REAL_PCRE *, const PUBL(extra) *,
2350                               const pcre_uchar *, int, int, int, int *, int);
2351  extern void              PRIV(jit_free)(void *);  extern void              PRIV(jit_free)(void *);
2352  extern int               PRIV(jit_get_size)(void *);  extern int               PRIV(jit_get_size)(void *);
2353    extern const char*       PRIV(jit_get_target)(void);
2354  #endif  #endif
2355    
2356  /* Unicode character database (UCD) */  /* Unicode character database (UCD) */
2357    
2358  typedef struct {  typedef struct {
2359    pcre_uint8 script;    pcre_uint8 script;     /* ucp_Arabic, etc. */
2360    pcre_uint8 chartype;    pcre_uint8 chartype;   /* ucp_Cc, etc. (general categories) */
2361    pcre_int32 other_case;    pcre_uint8 gbprop;     /* ucp_gbControl, etc. (grapheme break property) */
2362      pcre_int32 other_case; /* offset to other case, or zero if none */
2363  } ucd_record;  } ucd_record;
2364    
2365  extern const ucd_record  PRIV(ucd_records)[];  extern const ucd_record  PRIV(ucd_records)[];
2366  extern const pcre_uint8  PRIV(ucd_stage1)[];  extern const pcre_uint8  PRIV(ucd_stage1)[];
2367  extern const pcre_uint16 PRIV(ucd_stage2)[];  extern const pcre_uint16 PRIV(ucd_stage2)[];
2368  extern const int         PRIV(ucp_gentype)[];  extern const int         PRIV(ucp_gentype)[];
2369    extern const pcre_uint32 PRIV(ucp_gbtable)[];
2370  #ifdef SUPPORT_JIT  #ifdef SUPPORT_JIT
2371  extern const int         PRIV(ucp_typerange)[];  extern const int         PRIV(ucp_typerange)[];
2372  #endif  #endif
# Line 2305  extern const int         PRIV(ucp_typera Line 2379  extern const int         PRIV(ucp_typera
2379          PRIV(ucd_stage2)[PRIV(ucd_stage1)[(ch) / UCD_BLOCK_SIZE] * \          PRIV(ucd_stage2)[PRIV(ucd_stage1)[(ch) / UCD_BLOCK_SIZE] * \
2380          UCD_BLOCK_SIZE + (ch) % UCD_BLOCK_SIZE])          UCD_BLOCK_SIZE + (ch) % UCD_BLOCK_SIZE])
2381    
2382  #define UCD_CHARTYPE(ch)  GET_UCD(ch)->chartype  #define UCD_CHARTYPE(ch)    GET_UCD(ch)->chartype
2383  #define UCD_SCRIPT(ch)    GET_UCD(ch)->script  #define UCD_SCRIPT(ch)      GET_UCD(ch)->script
2384  #define UCD_CATEGORY(ch)  PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]  #define UCD_CATEGORY(ch)    PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]
2385  #define UCD_OTHERCASE(ch) (ch + GET_UCD(ch)->other_case)  #define UCD_GRAPHBREAK(ch)  GET_UCD(ch)->gbprop
2386    #define UCD_OTHERCASE(ch)   (ch + GET_UCD(ch)->other_case)
2387    
2388  #endif /* SUPPORT_UCP */  #endif /* SUPPORT_UCP */
2389    

Legend:
Removed from v.850  
changed lines
  Added in v.1033

  ViewVC Help
Powered by ViewVC 1.1.5