/[pcre]/code/trunk/pcre_internal.h
ViewVC logotype

Diff of /code/trunk/pcre_internal.h

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1022 by ph10, Tue Aug 28 12:28:15 2012 UTC revision 1045 by ph10, Sun Sep 23 16:50:00 2012 UTC
# Line 529  changed in future to be a fixed number o Line 529  changed in future to be a fixed number o
529  #define MAX_MARK ((1 << (sizeof(pcre_uchar)*8)) - 1)  #define MAX_MARK ((1 << (sizeof(pcre_uchar)*8)) - 1)
530    
531  /* When UTF encoding is being used, a character is no longer just a single  /* When UTF encoding is being used, a character is no longer just a single
532  character. The macros for character handling generate simple sequences when  byte. The macros for character handling generate simple sequences when used in
533  used in character-mode, and more complicated ones for UTF characters.  character-mode, and more complicated ones for UTF characters. GETCHARLENTEST
534  GETCHARLENTEST and other macros are not used when UTF is not supported,  and other macros are not used when UTF is not supported, so they are not
535  so they are not defined. To make sure they can never even appear when  defined. To make sure they can never even appear when UTF support is omitted,
536  UTF support is omitted, we don't even define them. */  we don't even define them. */
537    
538  #ifndef SUPPORT_UTF  #ifndef SUPPORT_UTF
539    
# Line 832  code. */ Line 832  code. */
832  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
833    
834    
835    /* Tests for Unicode horizontal and vertical whitespace characters must check a
836    number of different values. Using a switch statement for this generates the
837    fastest code (no loop, no memory access), and there are several places in the
838    interpreter code where this happens. In order to ensure that all the case lists
839    remain in step, we use macros so that there is only one place where the lists
840    are defined.
841    
842    These values are also required as lists in pcre_compile.c when processing \h,
843    \H, \v and \V in a character class. The lists are defined in pcre_tables.c, but
844    macros that define the values are here so that all the definitions are
845    together. The lists must be in ascending character order, terminated by
846    NOTACHAR (which is 0xffffffff).
847    
848    Any changes should ensure that the various macros are kept in step with each
849    other. NOTE: The values also appear in pcre_jit_compile.c. */
850    
851    /* ------ ASCII/Unicode environments ------ */
852    
853    #ifndef EBCDIC
854    
855    #define HSPACE_LIST \
856      CHAR_HT, CHAR_SPACE, 0xa0, \
857      0x1680, 0x180e, 0x2000, 0x2001, 0x2002, 0x2003, 0x2004, 0x2005, \
858      0x2006, 0x2007, 0x2008, 0x2009, 0x200A, 0x202f, 0x205f, 0x3000, \
859      NOTACHAR
860    
861    #define HSPACE_MULTIBYTE_CASES \
862      case 0x1680:  /* OGHAM SPACE MARK */ \
863      case 0x180e:  /* MONGOLIAN VOWEL SEPARATOR */ \
864      case 0x2000:  /* EN QUAD */ \
865      case 0x2001:  /* EM QUAD */ \
866      case 0x2002:  /* EN SPACE */ \
867      case 0x2003:  /* EM SPACE */ \
868      case 0x2004:  /* THREE-PER-EM SPACE */ \
869      case 0x2005:  /* FOUR-PER-EM SPACE */ \
870      case 0x2006:  /* SIX-PER-EM SPACE */ \
871      case 0x2007:  /* FIGURE SPACE */ \
872      case 0x2008:  /* PUNCTUATION SPACE */ \
873      case 0x2009:  /* THIN SPACE */ \
874      case 0x200A:  /* HAIR SPACE */ \
875      case 0x202f:  /* NARROW NO-BREAK SPACE */ \
876      case 0x205f:  /* MEDIUM MATHEMATICAL SPACE */ \
877      case 0x3000   /* IDEOGRAPHIC SPACE */
878    
879    #define HSPACE_BYTE_CASES \
880      case CHAR_HT: \
881      case CHAR_SPACE: \
882      case 0xa0     /* NBSP */
883    
884    #define HSPACE_CASES \
885      HSPACE_BYTE_CASES: \
886      HSPACE_MULTIBYTE_CASES
887    
888    #define VSPACE_LIST \
889      CHAR_LF, CHAR_VT, CHAR_FF, CHAR_CR, CHAR_NEL, 0x2028, 0x2029, NOTACHAR
890    
891    #define VSPACE_MULTIBYTE_CASES \
892      case 0x2028:    /* LINE SEPARATOR */ \
893      case 0x2029     /* PARAGRAPH SEPARATOR */
894    
895    #define VSPACE_BYTE_CASES \
896      case CHAR_LF: \
897      case CHAR_VT: \
898      case CHAR_FF: \
899      case CHAR_CR: \
900      case CHAR_NEL
901    
902    #define VSPACE_CASES \
903      VSPACE_BYTE_CASES: \
904      VSPACE_MULTIBYTE_CASES
905    
906    /* ------ EBCDIC environments ------ */
907    
908    #else
909    #define HSPACE_LIST CHAR_HT, CHAR_SPACE
910    
911    #define HSPACE_BYTE_CASES \
912      case CHAR_HT: \
913      case CHAR_SPACE
914    
915    #define HSPACE_CASES HSPACE_BYTE_CASES
916    
917    #ifdef EBCDIC_NL25
918    #define VSPACE_LIST \
919      CHAR_VT, CHAR_FF, CHAR_CR, CHAR_NEL, CHAR_LF, NOTACHAR
920    #else
921    #define VSPACE_LIST \
922      CHAR_VT, CHAR_FF, CHAR_CR, CHAR_LF, CHAR_NEL, NOTACHAR
923    #endif
924    
925    #define VSPACE_BYTE_CASES \
926      case CHAR_LF: \
927      case CHAR_VT: \
928      case CHAR_FF: \
929      case CHAR_CR: \
930      case CHAR_NEL
931    
932    #define VSPACE_CASES VSPACE_BYTE_CASES
933    #endif  /* EBCDIC */
934    
935    /* ------ End of whitespace macros ------ */
936    
937    
938  /* In case there is no definition of offsetof() provided - though any proper  /* In case there is no definition of offsetof() provided - though any proper
939  Standard C system should have one. */  Standard C system should have one. */
940    
# Line 945  macros to give the functions distinct na Line 1048  macros to give the functions distinct na
1048  #ifndef SUPPORT_UTF  #ifndef SUPPORT_UTF
1049    
1050  /* UTF-8 support is not enabled; use the platform-dependent character literals  /* UTF-8 support is not enabled; use the platform-dependent character literals
1051  so that PCRE works on both ASCII and EBCDIC platforms, in non-UTF-mode only. */  so that PCRE works in both ASCII and EBCDIC environments, but only in non-UTF
1052    mode. Newline characters are problematic in EBCDIC. Though it has CR and LF
1053    characters, a common practice has been to use its NL (0x15) character as the
1054    line terminator in C-like processing environments. However, sometimes the LF
1055    (0x25) character is used instead, according to this Unicode document:
1056    
1057    http://unicode.org/standard/reports/tr13/tr13-5.html
1058    
1059    PCRE defaults EBCDIC NL to 0x15, but has a build-time option to select 0x25
1060    instead. Whichever is *not* chosen is defined as NEL.
1061    
1062    In both ASCII and EBCDIC environments, CHAR_NL and CHAR_LF are synonyms for the
1063    same code point. */
1064    
1065    #ifdef EBCDIC
1066    
1067    #ifndef EBCDIC_NL25
1068    #define CHAR_NL                     '\x15'
1069    #define CHAR_NEL                    '\x25'
1070    #define STR_NL                      "\x15"
1071    #define STR_NEL                     "\x25"
1072    #else
1073    #define CHAR_NL                     '\x25'
1074    #define CHAR_NEL                    '\x15'
1075    #define STR_NL                      "\x25"
1076    #define STR_NEL                     "\x15"
1077    #endif
1078    
1079    #define CHAR_LF                     CHAR_NL
1080    #define STR_LF                      STR_NL
1081    
1082    #define CHAR_ESC                    '\047'
1083    #define CHAR_DEL                    '\007'
1084    #define STR_ESC                     "\047"
1085    #define STR_DEL                     "\007"
1086    
1087    #else  /* Not EBCDIC */
1088    
1089    /* In ASCII/Unicode, linefeed is '\n' and we equate this to NL for
1090    compatibility. NEL is the Unicode newline character; make sure it is
1091    a positive value. */
1092    
1093    #define CHAR_LF                     '\n'
1094    #define CHAR_NL                     CHAR_LF
1095    #define CHAR_NEL                    ((unsigned char)'\x85')
1096    #define CHAR_ESC                    '\033'
1097    #define CHAR_DEL                    '\177'
1098    
1099    #define STR_LF                      "\n"
1100    #define STR_NL                      STR_LF
1101    #define STR_NEL                     "\x85"
1102    #define STR_ESC                     "\033"
1103    #define STR_DEL                     "\177"
1104    
1105    #endif  /* EBCDIC */
1106    
1107    /* The remaining definitions work in both environments. */
1108    
1109  #define CHAR_HT                     '\t'  #define CHAR_HT                     '\t'
1110  #define CHAR_VT                     '\v'  #define CHAR_VT                     '\v'
1111  #define CHAR_FF                     '\f'  #define CHAR_FF                     '\f'
1112  #define CHAR_CR                     '\r'  #define CHAR_CR                     '\r'
 #define CHAR_NL                     '\n'  
1113  #define CHAR_BS                     '\b'  #define CHAR_BS                     '\b'
1114  #define CHAR_BEL                    '\a'  #define CHAR_BEL                    '\a'
 #ifdef EBCDIC  
 #define CHAR_ESC                    '\047'  
 #define CHAR_DEL                    '\007'  
 #else  
 #define CHAR_ESC                    '\033'  
 #define CHAR_DEL                    '\177'  
 #endif  
1115    
1116  #define CHAR_SPACE                  ' '  #define CHAR_SPACE                  ' '
1117  #define CHAR_EXCLAMATION_MARK       '!'  #define CHAR_EXCLAMATION_MARK       '!'
# Line 1062  so that PCRE works on both ASCII and EBC Line 1213  so that PCRE works on both ASCII and EBC
1213  #define STR_VT                      "\v"  #define STR_VT                      "\v"
1214  #define STR_FF                      "\f"  #define STR_FF                      "\f"
1215  #define STR_CR                      "\r"  #define STR_CR                      "\r"
 #define STR_NL                      "\n"  
1216  #define STR_BS                      "\b"  #define STR_BS                      "\b"
1217  #define STR_BEL                     "\a"  #define STR_BEL                     "\a"
 #ifdef EBCDIC  
 #define STR_ESC                     "\047"  
 #define STR_DEL                     "\007"  
 #else  
 #define STR_ESC                     "\033"  
 #define STR_DEL                     "\177"  
 #endif  
1218    
1219  #define STR_SPACE                   " "  #define STR_SPACE                   " "
1220  #define STR_EXCLAMATION_MARK        "!"  #define STR_EXCLAMATION_MARK        "!"
# Line 1221  only. */ Line 1364  only. */
1364  #define CHAR_VT                     '\013'  #define CHAR_VT                     '\013'
1365  #define CHAR_FF                     '\014'  #define CHAR_FF                     '\014'
1366  #define CHAR_CR                     '\015'  #define CHAR_CR                     '\015'
1367  #define CHAR_NL                     '\012'  #define CHAR_LF                     '\012'
1368    #define CHAR_NL                     CHAR_LF
1369    #define CHAR_NEL                    ((unsigned char)'\x85')
1370  #define CHAR_BS                     '\010'  #define CHAR_BS                     '\010'
1371  #define CHAR_BEL                    '\007'  #define CHAR_BEL                    '\007'
1372  #define CHAR_ESC                    '\033'  #define CHAR_ESC                    '\033'
# Line 1484  only. */ Line 1629  only. */
1629  #endif  #endif
1630    
1631  #ifndef ESC_n  #ifndef ESC_n
1632  #define ESC_n CHAR_NL  #define ESC_n CHAR_LF
1633  #endif  #endif
1634    
1635  #ifndef ESC_r  #ifndef ESC_r
# Line 2041  typedef struct compile_data { Line 2186  typedef struct compile_data {
2186    int  external_flags;              /* External flag bits to be set */    int  external_flags;              /* External flag bits to be set */
2187    int  req_varyopt;                 /* "After variable item" flag for reqbyte */    int  req_varyopt;                 /* "After variable item" flag for reqbyte */
2188    BOOL had_accept;                  /* (*ACCEPT) encountered */    BOOL had_accept;                  /* (*ACCEPT) encountered */
2189    BOOL had_pruneorskip;             /* (*PRUNE) or (*SKIP) encountered */    BOOL had_pruneorskip;             /* (*PRUNE) or (*SKIP) encountered */
2190    BOOL check_lookbehind;            /* Lookbehinds need later checking */    BOOL check_lookbehind;            /* Lookbehinds need later checking */
2191    int  nltype;                      /* Newline type */    int  nltype;                      /* Newline type */
2192    int  nllen;                       /* Newline string length */    int  nllen;                       /* Newline string length */
# Line 2233  but are not part of the PCRE public API. Line 2378  but are not part of the PCRE public API.
2378  pcre_tables.c module. */  pcre_tables.c module. */
2379    
2380  #ifdef COMPILE_PCRE8  #ifdef COMPILE_PCRE8
   
2381  extern const int            PRIV(utf8_table1)[];  extern const int            PRIV(utf8_table1)[];
2382  extern const int            PRIV(utf8_table1_size);  extern const int            PRIV(utf8_table1_size);
2383  extern const int            PRIV(utf8_table2)[];  extern const int            PRIV(utf8_table2)[];
2384  extern const int            PRIV(utf8_table3)[];  extern const int            PRIV(utf8_table3)[];
2385  extern const pcre_uint8     PRIV(utf8_table4)[];  extern const pcre_uint8     PRIV(utf8_table4)[];
   
2386  #endif /* COMPILE_PCRE8 */  #endif /* COMPILE_PCRE8 */
2387    
2388  extern const char           PRIV(utt_names)[];  extern const char           PRIV(utt_names)[];
2389  extern const ucp_type_table PRIV(utt)[];  extern const ucp_type_table PRIV(utt)[];
2390  extern const int            PRIV(utt_size);  extern const int            PRIV(utt_size);
2391    
2392    extern const pcre_uint8     PRIV(OP_lengths)[];
2393  extern const pcre_uint8     PRIV(default_tables)[];  extern const pcre_uint8     PRIV(default_tables)[];
2394    
2395  extern const pcre_uint8     PRIV(OP_lengths)[];  extern const pcre_uint32    PRIV(hspace_list)[];
2396    extern const pcre_uint32    PRIV(vspace_list)[];
2397    
2398    
2399  /* Internal shared functions. These are functions that are used by more than  /* Internal shared functions. These are functions that are used by more than
# Line 2317  typedef struct { Line 2462  typedef struct {
2462    pcre_uint8 script;     /* ucp_Arabic, etc. */    pcre_uint8 script;     /* ucp_Arabic, etc. */
2463    pcre_uint8 chartype;   /* ucp_Cc, etc. (general categories) */    pcre_uint8 chartype;   /* ucp_Cc, etc. (general categories) */
2464    pcre_uint8 gbprop;     /* ucp_gbControl, etc. (grapheme break property) */    pcre_uint8 gbprop;     /* ucp_gbControl, etc. (grapheme break property) */
2465      pcre_uint8 caseset;    /* offset to multichar other cases or zero */
2466    pcre_int32 other_case; /* offset to other case, or zero if none */    pcre_int32 other_case; /* offset to other case, or zero if none */
2467  } ucd_record;  } ucd_record;
2468    
2469    extern const pcre_uint32 PRIV(ucd_caseless_sets)[];
2470  extern const ucd_record  PRIV(ucd_records)[];  extern const ucd_record  PRIV(ucd_records)[];
2471  extern const pcre_uint8  PRIV(ucd_stage1)[];  extern const pcre_uint8  PRIV(ucd_stage1)[];
2472  extern const pcre_uint16 PRIV(ucd_stage2)[];  extern const pcre_uint16 PRIV(ucd_stage2)[];
# Line 2341  extern const int         PRIV(ucp_typera Line 2488  extern const int         PRIV(ucp_typera
2488  #define UCD_SCRIPT(ch)      GET_UCD(ch)->script  #define UCD_SCRIPT(ch)      GET_UCD(ch)->script
2489  #define UCD_CATEGORY(ch)    PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]  #define UCD_CATEGORY(ch)    PRIV(ucp_gentype)[UCD_CHARTYPE(ch)]
2490  #define UCD_GRAPHBREAK(ch)  GET_UCD(ch)->gbprop  #define UCD_GRAPHBREAK(ch)  GET_UCD(ch)->gbprop
2491    #define UCD_CASESET(ch)     GET_UCD(ch)->caseset
2492  #define UCD_OTHERCASE(ch)   (ch + GET_UCD(ch)->other_case)  #define UCD_OTHERCASE(ch)   (ch + GET_UCD(ch)->other_case)
2493    
2494  #endif /* SUPPORT_UCP */  #endif /* SUPPORT_UCP */

Legend:
Removed from v.1022  
changed lines
  Added in v.1045

  ViewVC Help
Powered by ViewVC 1.1.5