/[pcre]/code/trunk/pcre_internal.h
ViewVC logotype

Diff of /code/trunk/pcre_internal.h

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1033 by ph10, Mon Sep 10 11:02:48 2012 UTC revision 1041 by ph10, Sun Sep 16 10:16:27 2012 UTC
# Line 529  changed in future to be a fixed number o Line 529  changed in future to be a fixed number o
529  #define MAX_MARK ((1 << (sizeof(pcre_uchar)*8)) - 1)  #define MAX_MARK ((1 << (sizeof(pcre_uchar)*8)) - 1)
530    
531  /* When UTF encoding is being used, a character is no longer just a single  /* When UTF encoding is being used, a character is no longer just a single
532  character. The macros for character handling generate simple sequences when  byte. The macros for character handling generate simple sequences when used in
533  used in character-mode, and more complicated ones for UTF characters.  character-mode, and more complicated ones for UTF characters. GETCHARLENTEST
534  GETCHARLENTEST and other macros are not used when UTF is not supported,  and other macros are not used when UTF is not supported, so they are not
535  so they are not defined. To make sure they can never even appear when  defined. To make sure they can never even appear when UTF support is omitted,
536  UTF support is omitted, we don't even define them. */  we don't even define them. */
537    
538  #ifndef SUPPORT_UTF  #ifndef SUPPORT_UTF
539    
# Line 832  code. */ Line 832  code. */
832  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
833    
834    
835    /* Tests for Unicode horizontal and vertical whitespace characters must check a
836    number of different values. Using a switch statement for this generates the
837    fastest code (no loop, no memory access), and there are several places where
838    this happens. In order to ensure that all the case lists remain in step, we use
839    macros so that there is only one place where the lists are defined.
840    
841    NOTE: These values are also used explicitly in pcre_compile.c when processing
842    \h, \H, \v and \V in a character class, so any changes here should be
843    duplicated there as well. They also appear in pcre_jit_compile.c. */
844    
845    #ifndef EBCDIC
846    #define HSPACE_MULTIBYTE_CASES \
847          case 0x1680:    /* OGHAM SPACE MARK */ \
848          case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */ \
849          case 0x2000:    /* EN QUAD */ \
850          case 0x2001:    /* EM QUAD */ \
851          case 0x2002:    /* EN SPACE */ \
852          case 0x2003:    /* EM SPACE */ \
853          case 0x2004:    /* THREE-PER-EM SPACE */ \
854          case 0x2005:    /* FOUR-PER-EM SPACE */ \
855          case 0x2006:    /* SIX-PER-EM SPACE */ \
856          case 0x2007:    /* FIGURE SPACE */ \
857          case 0x2008:    /* PUNCTUATION SPACE */ \
858          case 0x2009:    /* THIN SPACE */ \
859          case 0x200A:    /* HAIR SPACE */ \
860          case 0x202f:    /* NARROW NO-BREAK SPACE */ \
861          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */ \
862          case 0x3000     /* IDEOGRAPHIC SPACE */
863    
864    #define HSPACE_BYTE_CASES \
865          case CHAR_HT: \
866          case CHAR_SPACE: \
867          case 0xa0       /* NBSP */
868    
869    #define VSPACE_MULTIBYTE_CASES \
870          case 0x2028:    /* LINE SEPARATOR */ \
871          case 0x2029     /* PARAGRAPH SEPARATOR */
872    
873    #else   /* EBCDIC */
874    #define HSPACE_MULTIBYTE_CASES
875    #define VSPACE_MULTIBYTE_CASES
876    
877    #define HSPACE_BYTE_CASES \
878          case CHAR_HT: \
879          case CHAR_SPACE
880    #endif  /* EBCDIC */
881    
882    #define VSPACE_BYTE_CASES \
883          case CHAR_LF: \
884          case CHAR_VT: \
885          case CHAR_FF: \
886          case CHAR_CR: \
887          case CHAR_NEL
888    
889    #define HSPACE_CASES \
890            HSPACE_BYTE_CASES: \
891            HSPACE_MULTIBYTE_CASES
892    
893    #define VSPACE_CASES \
894            VSPACE_BYTE_CASES: \
895            VSPACE_MULTIBYTE_CASES
896    
897  /* In case there is no definition of offsetof() provided - though any proper  /* In case there is no definition of offsetof() provided - though any proper
898  Standard C system should have one. */  Standard C system should have one. */
899    
# Line 946  macros to give the functions distinct na Line 1008  macros to give the functions distinct na
1008    
1009  /* UTF-8 support is not enabled; use the platform-dependent character literals  /* UTF-8 support is not enabled; use the platform-dependent character literals
1010  so that PCRE works in both ASCII and EBCDIC environments, but only in non-UTF  so that PCRE works in both ASCII and EBCDIC environments, but only in non-UTF
1011  mode. Newline characters are problematic in EBCDIC. Though it has CR and LF  mode. Newline characters are problematic in EBCDIC. Though it has CR and LF
1012  characters, a common practice has been to use its NL (0x15) character as the  characters, a common practice has been to use its NL (0x15) character as the
1013  line terminator in C-like processing environments. However, sometimes the LF  line terminator in C-like processing environments. However, sometimes the LF
1014  (0x25) character is used instead, according to this Unicode document:  (0x25) character is used instead, according to this Unicode document:
1015    
1016  http://unicode.org/standard/reports/tr13/tr13-5.html  http://unicode.org/standard/reports/tr13/tr13-5.html
1017    
1018  PCRE defaults EBCDIC NL to 0x15, but has a build-time option to select 0x25  PCRE defaults EBCDIC NL to 0x15, but has a build-time option to select 0x25
1019  instead. Whichever is *not* chosen is defined as NEL.  instead. Whichever is *not* chosen is defined as NEL.
1020    
1021  In both ASCII and EBCDIC environments, CHAR_NL and CHAR_LF are synonyms for the  In both ASCII and EBCDIC environments, CHAR_NL and CHAR_LF are synonyms for the
1022  same code point. */  same code point. */
# Line 983  same code point. */ Line 1045  same code point. */
1045    
1046  #else  /* Not EBCDIC */  #else  /* Not EBCDIC */
1047    
1048  /* In ASCII/Unicode, linefeed is '\n' and we equate this to NL for  /* In ASCII/Unicode, linefeed is '\n' and we equate this to NL for
1049  compatibility. NEL is the Unicode newline character; make sure it is  compatibility. NEL is the Unicode newline character; make sure it is
1050  a positive value. */  a positive value. */
1051    
# Line 2083  typedef struct compile_data { Line 2145  typedef struct compile_data {
2145    int  external_flags;              /* External flag bits to be set */    int  external_flags;              /* External flag bits to be set */
2146    int  req_varyopt;                 /* "After variable item" flag for reqbyte */    int  req_varyopt;                 /* "After variable item" flag for reqbyte */
2147    BOOL had_accept;                  /* (*ACCEPT) encountered */    BOOL had_accept;                  /* (*ACCEPT) encountered */
2148    BOOL had_pruneorskip;             /* (*PRUNE) or (*SKIP) encountered */    BOOL had_pruneorskip;             /* (*PRUNE) or (*SKIP) encountered */
2149    BOOL check_lookbehind;            /* Lookbehinds need later checking */    BOOL check_lookbehind;            /* Lookbehinds need later checking */
2150    int  nltype;                      /* Newline type */    int  nltype;                      /* Newline type */
2151    int  nllen;                       /* Newline string length */    int  nllen;                       /* Newline string length */

Legend:
Removed from v.1033  
changed lines
  Added in v.1041

  ViewVC Help
Powered by ViewVC 1.1.5