/[pcre]/code/trunk/pcre_internal.h
ViewVC logotype

Diff of /code/trunk/pcre_internal.h

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 391 by ph10, Tue Mar 17 21:16:01 2009 UTC revision 442 by ph10, Fri Sep 11 10:21:02 2009 UTC
# Line 51  functions whose names all begin with "_p Line 51  functions whose names all begin with "_p
51  #define DEBUG  #define DEBUG
52  #endif  #endif
53    
54  /* We do not support both EBCDIC and UTF-8 at the same time. The "configure"  /* We do not support both EBCDIC and UTF-8 at the same time. The "configure"
55  script prevents both being selected, but not everybody uses "configure". */  script prevents both being selected, but not everybody uses "configure". */
56    
57  #if defined EBCDIC && defined SUPPORT_UTF8  #if defined EBCDIC && defined SUPPORT_UTF8
58  #error The use of both EBCDIC and SUPPORT_UTF8 is not supported.  #error The use of both EBCDIC and SUPPORT_UTF8 is not supported.
59  #endif  #endif
60    
61    /* If SUPPORT_UCP is defined, SUPPORT_UTF8 must also be defined. The
62    "configure" script ensures this, but not everybody uses "configure". */
63    
64    #if defined SUPPORT_UCP && !defined SUPPORT_UTF8
65    #define SUPPORT_UTF8 1
66    #endif
67    
68  /* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef  /* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef
69  inline, and there are *still* stupid compilers about that don't like indented  inline, and there are *still* stupid compilers about that don't like indented
70  pre-processor statements, or at least there were when I first wrote this. After  pre-processor statements, or at least there were when I first wrote this. After
# Line 528  Standard C system should have one. */ Line 535  Standard C system should have one. */
535    
536  /* Private flags containing information about the compiled regex. They used to  /* Private flags containing information about the compiled regex. They used to
537  live at the top end of the options word, but that got almost full, so now they  live at the top end of the options word, but that got almost full, so now they
538  are in a 16-bit flags word. */  are in a 16-bit flags word. From release 8.00, PCRE_NOPARTIAL is unused, as
539    the restrictions on partial matching have been lifted. It remains for backwards
540    compatibility. */
541    
542  #define PCRE_NOPARTIAL     0x0001  /* can't use partial with this regex */  #define PCRE_NOPARTIAL     0x0001  /* can't use partial with this regex */
543  #define PCRE_FIRSTSET      0x0002  /* first_byte is set */  #define PCRE_FIRSTSET      0x0002  /* first_byte is set */
# Line 555  time, run time, or study time, respectiv Line 564  time, run time, or study time, respectiv
564     PCRE_JAVASCRIPT_COMPAT)     PCRE_JAVASCRIPT_COMPAT)
565    
566  #define PUBLIC_EXEC_OPTIONS \  #define PUBLIC_EXEC_OPTIONS \
567    (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \    (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NOTEMPTY_ATSTART| \
568     PCRE_PARTIAL|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \     PCRE_NO_UTF8_CHECK|PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT|PCRE_NEWLINE_BITS| \
569     PCRE_NO_START_OPTIMIZE)     PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE|PCRE_NO_START_OPTIMIZE)
570    
571  #define PUBLIC_DFA_EXEC_OPTIONS \  #define PUBLIC_DFA_EXEC_OPTIONS \
572    (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NO_UTF8_CHECK| \    (PCRE_ANCHORED|PCRE_NOTBOL|PCRE_NOTEOL|PCRE_NOTEMPTY|PCRE_NOTEMPTY_ATSTART| \
573     PCRE_PARTIAL|PCRE_DFA_SHORTEST|PCRE_DFA_RESTART|PCRE_NEWLINE_BITS| \     PCRE_NO_UTF8_CHECK|PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT|PCRE_DFA_SHORTEST| \
574     PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE|PCRE_NO_START_OPTIMIZE)     PCRE_DFA_RESTART|PCRE_NEWLINE_BITS|PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE| \
575       PCRE_NO_START_OPTIMIZE)
576    
577  #define PUBLIC_STUDY_OPTIONS 0   /* None defined */  #define PUBLIC_STUDY_OPTIONS 0   /* None defined */
578    
# Line 600  typedef int BOOL; Line 610  typedef int BOOL;
610    
611  /* If PCRE is to support UTF-8 on EBCDIC platforms, we cannot use normal  /* If PCRE is to support UTF-8 on EBCDIC platforms, we cannot use normal
612  character constants like '*' because the compiler would emit their EBCDIC code,  character constants like '*' because the compiler would emit their EBCDIC code,
613  which is different from their ASCII/UTF-8 code. Instead we define macros for  which is different from their ASCII/UTF-8 code. Instead we define macros for
614  the characters so that they always use the ASCII/UTF-8 code when UTF-8 support  the characters so that they always use the ASCII/UTF-8 code when UTF-8 support
615  is enabled. When UTF-8 support is not enabled, the definitions use character  is enabled. When UTF-8 support is not enabled, the definitions use character
616  literals. Both character and string versions of each character are needed, and  literals. Both character and string versions of each character are needed, and
617  there are some longer strings as well.  there are some longer strings as well.
618    
619  This means that, on EBCDIC platforms, the PCRE library can handle either  This means that, on EBCDIC platforms, the PCRE library can handle either
620  EBCDIC, or UTF-8, but not both. To support both in the same compiled library  EBCDIC, or UTF-8, but not both. To support both in the same compiled library
621  would need different lookups depending on whether PCRE_UTF8 was set or not.  would need different lookups depending on whether PCRE_UTF8 was set or not.
622  This would make it impossible to use characters in switch/case statements,  This would make it impossible to use characters in switch/case statements,
623  which would reduce performance. For a theoretical use (which nobody has asked  which would reduce performance. For a theoretical use (which nobody has asked
624  for) in a minority area (EBCDIC platforms), this is not sensible. Any  for) in a minority area (EBCDIC platforms), this is not sensible. Any
625  application that did need both could compile two versions of the library, using  application that did need both could compile two versions of the library, using
626  macros to give the functions distinct names. */  macros to give the functions distinct names. */
627    
628  #ifndef SUPPORT_UTF8  #ifndef SUPPORT_UTF8
# Line 874  so that PCRE works on both ASCII and EBC Line 884  so that PCRE works on both ASCII and EBC
884  #define STRING_ANYCRLF_RIGHTPAR     "ANYCRLF)"  #define STRING_ANYCRLF_RIGHTPAR     "ANYCRLF)"
885  #define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)"  #define STRING_BSR_ANYCRLF_RIGHTPAR "BSR_ANYCRLF)"
886  #define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)"  #define STRING_BSR_UNICODE_RIGHTPAR "BSR_UNICODE)"
887    #define STRING_UTF8_RIGHTPAR        "UTF8)"
888    
889  #else  /* SUPPORT_UTF8 */  #else  /* SUPPORT_UTF8 */
890    
# Line 1125  only. */ Line 1136  only. */
1136  #define STRING_ANYCRLF_RIGHTPAR     STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS  #define STRING_ANYCRLF_RIGHTPAR     STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
1137  #define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS  #define STRING_BSR_ANYCRLF_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_A STR_N STR_Y STR_C STR_R STR_L STR_F STR_RIGHT_PARENTHESIS
1138  #define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS  #define STRING_BSR_UNICODE_RIGHTPAR STR_B STR_S STR_R STR_UNDERSCORE STR_U STR_N STR_I STR_C STR_O STR_D STR_E STR_RIGHT_PARENTHESIS
1139    #define STRING_UTF8_RIGHTPAR        STR_U STR_T STR_F STR_8 STR_RIGHT_PARENTHESIS
1140    
1141  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
1142    
# Line 1590  typedef struct match_data { Line 1602  typedef struct match_data {
1602    BOOL   jscript_compat;        /* JAVASCRIPT_COMPAT flag */    BOOL   jscript_compat;        /* JAVASCRIPT_COMPAT flag */
1603    BOOL   endonly;               /* Dollar not before final \n */    BOOL   endonly;               /* Dollar not before final \n */
1604    BOOL   notempty;              /* Empty string match not wanted */    BOOL   notempty;              /* Empty string match not wanted */
1605    BOOL   partial;               /* PARTIAL flag */    BOOL   notempty_atstart;      /* Empty string match at start not wanted */
1606    BOOL   hitend;                /* Hit the end of the subject at some point */    BOOL   hitend;                /* Hit the end of the subject at some point */
1607    BOOL   bsr_anycrlf;           /* \R is just any CRLF, not full Unicode */    BOOL   bsr_anycrlf;           /* \R is just any CRLF, not full Unicode */
1608    const uschar *start_code;     /* For use when recursing */    const uschar *start_code;     /* For use when recursing */
# Line 1598  typedef struct match_data { Line 1610  typedef struct match_data {
1610    USPTR  end_subject;           /* End of the subject string */    USPTR  end_subject;           /* End of the subject string */
1611    USPTR  start_match_ptr;       /* Start of matched string */    USPTR  start_match_ptr;       /* Start of matched string */
1612    USPTR  end_match_ptr;         /* Subject position at end match */    USPTR  end_match_ptr;         /* Subject position at end match */
1613      USPTR  start_used_ptr;        /* Earliest consulted character */
1614      int    partial;               /* PARTIAL options */
1615    int    end_offset_top;        /* Highwater mark at end of match */    int    end_offset_top;        /* Highwater mark at end of match */
1616    int    capture_last;          /* Most recent capture number */    int    capture_last;          /* Most recent capture number */
1617    int    start_offset;          /* The start offset value */    int    start_offset;          /* The start offset value */
# Line 1614  typedef struct dfa_match_data { Line 1628  typedef struct dfa_match_data {
1628    const uschar *start_code;     /* Start of the compiled pattern */    const uschar *start_code;     /* Start of the compiled pattern */
1629    const uschar *start_subject;  /* Start of the subject string */    const uschar *start_subject;  /* Start of the subject string */
1630    const uschar *end_subject;    /* End of subject string */    const uschar *end_subject;    /* End of subject string */
1631      const uschar *start_used_ptr; /* Earliest consulted character */
1632    const uschar *tables;         /* Character tables */    const uschar *tables;         /* Character tables */
1633      int   start_offset;           /* The start offset value */
1634    int   moptions;               /* Match options */    int   moptions;               /* Match options */
1635    int   poptions;               /* Pattern options */    int   poptions;               /* Pattern options */
1636    int    nltype;                /* Newline type */    int    nltype;                /* Newline type */

Legend:
Removed from v.391  
changed lines
  Added in v.442

  ViewVC Help
Powered by ViewVC 1.1.5