/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1060 by chpe, Tue Oct 16 15:53:57 2012 UTC revision 1064 by chpe, Tue Oct 16 15:54:12 2012 UTC
# Line 750  return (*p == CHAR_RIGHT_CURLY_BRACKET); Line 750  return (*p == CHAR_RIGHT_CURLY_BRACKET);
750    
751  /* This function is called when a \ has been encountered. It either returns a  /* This function is called when a \ has been encountered. It either returns a
752  positive value for a simple escape such as \n, or 0 for a data character  positive value for a simple escape such as \n, or 0 for a data character
753  which will be placed in chptr. A backreference to group  which will be placed in chptr. A backreference to group n is returned as
754  n is returned as ESC_REF + n; ESC_REF is the highest ESC_xxx macro. When  negative n. When UTF-8 is enabled, a positive value greater than 255 may
755  UTF-8 is enabled, a positive value greater than 255 may be returned in chptr.  be returned in chptr.
756  On entry,ptr is pointing at the \. On exit, it is on the final character of the  On entry,ptr is pointing at the \. On exit, it is on the final character of the
757  escape sequence.  escape sequence.
758    
# Line 766  Arguments: Line 766  Arguments:
766    
767  Returns:         zero => a data character  Returns:         zero => a data character
768                   positive => a special escape sequence                   positive => a special escape sequence
769                     negative => a back reference
770                   on error, errorcodeptr is set                   on error, errorcodeptr is set
771  */  */
772    
773  static int  static int
774  check_escape(const pcre_uchar **ptrptr, int *chptr, int *errorcodeptr,  check_escape(const pcre_uchar **ptrptr, pcre_uint32 *chptr, int *errorcodeptr,
775    int bracount, int options, BOOL isclass)    int bracount, int options, BOOL isclass)
776  {  {
777  /* PCRE_UTF16 has the same value as PCRE_UTF8. */  /* PCRE_UTF16 has the same value as PCRE_UTF8. */
# Line 794  Otherwise further processing may be requ Line 795  Otherwise further processing may be requ
795  #ifndef EBCDIC  /* ASCII/UTF-8 coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
796  /* Not alphanumeric */  /* Not alphanumeric */
797  else if (c < CHAR_0 || c > CHAR_z) {}  else if (c < CHAR_0 || c > CHAR_z) {}
798  else if ((i = escapes[c - CHAR_0]) != 0) { if (i > 0) c = i; else escape = -i; }  else if ((i = escapes[c - CHAR_0]) != 0) { if (i > 0) c = (pcre_uint32)i; else escape = -i; }
799    
800  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
801  /* Not alphanumeric */  /* Not alphanumeric */
802  else if (c < CHAR_a || (!MAX_255(c) || (ebcdic_chartab[c] & 0x0E) == 0)) {}  else if (c < CHAR_a || (!MAX_255(c) || (ebcdic_chartab[c] & 0x0E) == 0)) {}
803  else if ((i = escapes[c - 0x48]) != 0)  { if (i > 0) c = i; else escape = -i; }  else if ((i = escapes[c - 0x48]) != 0)  { if (i > 0) c = (pcre_uint32)i; else escape = -i; }
804  #endif  #endif
805    
806  /* Escapes that need further processing, or are illegal. */  /* Escapes that need further processing, or are illegal. */
# Line 807  else if ((i = escapes[c - 0x48]) != 0) Line 808  else if ((i = escapes[c - 0x48]) != 0)
808  else  else
809    {    {
810    const pcre_uchar *oldptr;    const pcre_uchar *oldptr;
811    BOOL braced, negated;    BOOL braced, negated, overflow;
812      int s;
813    
814    switch (c)    switch (c)
815      {      {
# Line 914  else Line 916  else
916      else negated = FALSE;      else negated = FALSE;
917    
918      /* The integer range is limited by the machine's int representation. */      /* The integer range is limited by the machine's int representation. */
919      c = 0;      s = 0;
920        overflow = FALSE;
921      while (IS_DIGIT(ptr[1]))      while (IS_DIGIT(ptr[1]))
922        {        {
923        if (((unsigned int)c) > INT_MAX / 10) /* Integer overflow */        if (s > INT_MAX / 10 - 1) /* Integer overflow */
924          {          {
925          c = -1;          overflow = TRUE;
926          break;          break;
927          }          }
928        c = c * 10 + *(++ptr) - CHAR_0;        s = s * 10 + (int)(*(++ptr) - CHAR_0);
929        }        }
930      if (((unsigned int)c) > INT_MAX) /* Integer overflow */      if (overflow) /* Integer overflow */
931        {        {
932        while (IS_DIGIT(ptr[1]))        while (IS_DIGIT(ptr[1]))
933          ptr++;          ptr++;
# Line 938  else Line 941  else
941        break;        break;
942        }        }
943    
944      if (c == 0)      if (s == 0)
945        {        {
946        *errorcodeptr = ERR58;        *errorcodeptr = ERR58;
947        break;        break;
# Line 946  else Line 949  else
949    
950      if (negated)      if (negated)
951        {        {
952        if (c > bracount)        if (s > bracount)
953          {          {
954          *errorcodeptr = ERR15;          *errorcodeptr = ERR15;
955          break;          break;
956          }          }
957        c = bracount - (c - 1);        s = bracount - (s - 1);
958        }        }
959    
960      escape = ESC_REF + c;      escape = -s;
961      break;      break;
962    
963      /* The handling of escape sequences consisting of a string of digits      /* The handling of escape sequences consisting of a string of digits
# Line 976  else Line 979  else
979        {        {
980        oldptr = ptr;        oldptr = ptr;
981        /* The integer range is limited by the machine's int representation. */        /* The integer range is limited by the machine's int representation. */
982        c -= CHAR_0;        s = (int)(c -CHAR_0);
983          overflow = FALSE;
984        while (IS_DIGIT(ptr[1]))        while (IS_DIGIT(ptr[1]))
985          {          {
986          if (((unsigned int)c) > INT_MAX / 10) /* Integer overflow */          if (s > INT_MAX / 10 - 1) /* Integer overflow */
987            {            {
988            c = -1;            overflow = TRUE;
989            break;            break;
990            }            }
991          c = c * 10 + *(++ptr) - CHAR_0;          s = s * 10 + (int)(*(++ptr) - CHAR_0);
992          }          }
993        if (((unsigned int)c) > INT_MAX) /* Integer overflow */        if (overflow) /* Integer overflow */
994          {          {
995          while (IS_DIGIT(ptr[1]))          while (IS_DIGIT(ptr[1]))
996            ptr++;            ptr++;
997          *errorcodeptr = ERR61;          *errorcodeptr = ERR61;
998          break;          break;
999          }          }
1000        if (c < 10 || c <= bracount)        if (s < 10 || s <= bracount)
1001          {          {
1002          escape = ESC_REF + c;          escape = -s;
1003          break;          break;
1004          }          }
1005        ptr = oldptr;      /* Put the pointer back and fall through */        ptr = oldptr;      /* Put the pointer back and fall through */
# Line 1058  else Line 1062  else
1062      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
1063        {        {
1064        const pcre_uchar *pt = ptr + 2;        const pcre_uchar *pt = ptr + 2;
       BOOL overflow;  
1065    
1066        c = 0;        c = 0;
1067        overflow = FALSE;        overflow = FALSE;
# Line 2904  Yield:        -1 when no more Line 2907  Yield:        -1 when no more
2907  */  */
2908    
2909  static int  static int
2910  get_othercase_range(unsigned int *cptr, unsigned int d, unsigned int *ocptr,  get_othercase_range(pcre_uint32 *cptr, pcre_uint32 d, pcre_uint32 *ocptr,
2911    unsigned int *odptr)    pcre_uint32 *odptr)
2912  {  {
2913  unsigned int c, othercase, next;  pcre_uint32 c, othercase, next;
2914  int co;  int co;
2915    
2916  /* Find the first character that has an other case. If it has multiple other  /* Find the first character that has an other case. If it has multiple other
# Line 2959  Returns:       TRUE if auto-possessifyin Line 2962  Returns:       TRUE if auto-possessifyin
2962  */  */
2963    
2964  static BOOL  static BOOL
2965  check_char_prop(int c, int ptype, int pdata, BOOL negated)  check_char_prop(pcre_uint32 c, int ptype, int pdata, BOOL negated)
2966  {  {
2967  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2968  const pcre_uint32 *p;  const pcre_uint32 *p;
# Line 3045  static BOOL Line 3048  static BOOL
3048  check_auto_possessive(const pcre_uchar *previous, BOOL utf,  check_auto_possessive(const pcre_uchar *previous, BOOL utf,
3049    const pcre_uchar *ptr, int options, compile_data *cd)    const pcre_uchar *ptr, int options, compile_data *cd)
3050  {  {
3051  pcre_int32 c = NOTACHAR; // FIXMEchpe pcre_uint32  pcre_uint32 c = NOTACHAR;
3052  pcre_int32 next;  pcre_uint32 next;
3053  int escape;  int escape;
3054  int op_code = *previous++;  int op_code = *previous++;
3055    
# Line 3144  if (escape == 0) Line 3147  if (escape == 0)
3147    case, which maps to the special PT_CLIST property. Check this first. */    case, which maps to the special PT_CLIST property. Check this first. */
3148    
3149  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3150    if (utf && (unsigned int)c != NOTACHAR && (options & PCRE_CASELESS) != 0)    if (utf && c != NOTACHAR && (options & PCRE_CASELESS) != 0)
3151      {      {
3152      int ocs = UCD_CASESET(next);      int ocs = UCD_CASESET(next);
3153      if (ocs > 0) return check_char_prop(c, PT_CLIST, ocs, op_code >= OP_NOT);      if (ocs > 0) return check_char_prop(c, PT_CLIST, ocs, op_code >= OP_NOT);
# Line 3166  if (escape == 0) Line 3169  if (escape == 0)
3169  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3170      if (utf)      if (utf)
3171        {        {
3172        unsigned int othercase;        pcre_uint32 othercase;
3173        if (next < 128) othercase = cd->fcc[next]; else        if (next < 128) othercase = cd->fcc[next]; else
3174  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3175        othercase = UCD_OTHERCASE((unsigned int)next);        othercase = UCD_OTHERCASE(next);
3176  #else  #else
3177        othercase = NOTACHAR;        othercase = NOTACHAR;
3178  #endif  #endif
3179        return (unsigned int)c != othercase;        return c != othercase;
3180        }        }
3181      else      else
3182  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
3183      return (c != TABLE_GET((unsigned int)next, cd->fcc, next));  /* Not UTF */      return (c != TABLE_GET(next, cd->fcc, next));  /* Not UTF */
3184    
3185      case OP_NOT:      case OP_NOT:
3186      return c == next;      return c == next;
# Line 3187  if (escape == 0) Line 3190  if (escape == 0)
3190  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3191      if (utf)      if (utf)
3192        {        {
3193        unsigned int othercase;        pcre_uint32 othercase;
3194        if (next < 128) othercase = cd->fcc[next]; else        if (next < 128) othercase = cd->fcc[next]; else
3195  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3196        othercase = UCD_OTHERCASE((unsigned int)next);        othercase = UCD_OTHERCASE(next);
3197  #else  #else
3198        othercase = NOTACHAR;        othercase = NOTACHAR;
3199  #endif  #endif
3200        return (unsigned int)c == othercase;        return c == othercase;
3201        }        }
3202      else      else
3203  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
3204      return (c == TABLE_GET((unsigned int)next, cd->fcc, next));  /* Not UTF */      return (c == TABLE_GET(next, cd->fcc, next));  /* Not UTF */
3205    
3206      /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set.      /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set.
3207      When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */      When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
# Line 3689  pcre_int32 req_caseopt, reqvary, tempreq Line 3692  pcre_int32 req_caseopt, reqvary, tempreq
3692  int options = *optionsptr;               /* May change dynamically */  int options = *optionsptr;               /* May change dynamically */
3693  int after_manual_callout = 0;  int after_manual_callout = 0;
3694  int length_prevgroup = 0;  int length_prevgroup = 0;
3695  register int c;  register pcre_uint32 c;
3696  int escape;  int escape;
3697  register pcre_uchar *code = *codeptr;  register pcre_uchar *code = *codeptr;
3698  pcre_uchar *last_code = code;  pcre_uchar *last_code = code;
# Line 4459  for (;; ptr++) Line 4462  for (;; ptr++)
4462    
4463            /* \b is backspace; any other special means the '-' was literal. */            /* \b is backspace; any other special means the '-' was literal. */
4464    
4465            if (descape > 0)            if (descape != 0)
4466              {              {
4467              if (descape == ESC_b) d = CHAR_BS; else              if (descape == ESC_b) d = CHAR_BS; else
4468                {                {
# Line 6673  for (;; ptr++) Line 6676  for (;; ptr++)
6676      /* Handle metasequences introduced by \. For ones like \d, the ESC_ values      /* Handle metasequences introduced by \. For ones like \d, the ESC_ values
6677      are arranged to be the negation of the corresponding OP_values in the      are arranged to be the negation of the corresponding OP_values in the
6678      default case when PCRE_UCP is not set. For the back references, the values      default case when PCRE_UCP is not set. For the back references, the values
6679      are ESC_REF plus the reference number. Only back references and those types      are negative the reference number. Only back references and those types
6680      that consume a character may be repeated. We can test for values between      that consume a character may be repeated. We can test for values between
6681      ESC_b and ESC_Z for the latter; this may have to change if any new ones are      ESC_b and ESC_Z for the latter; this may have to change if any new ones are
6682      ever created. */      ever created. */
# Line 6713  for (;; ptr++) Line 6716  for (;; ptr++)
6716        is a subroutine call by number (Oniguruma syntax). In fact, the value        is a subroutine call by number (Oniguruma syntax). In fact, the value
6717        ESC_g is returned only for these cases. So we don't need to check for <        ESC_g is returned only for these cases. So we don't need to check for <
6718        or ' if the value is ESC_g. For the Perl syntax \g{n} the value is        or ' if the value is ESC_g. For the Perl syntax \g{n} the value is
6719        ESC_REF+n, and for the Perl syntax \g{name} the result is ESC_k (as        -n, and for the Perl syntax \g{name} the result is ESC_k (as
6720        that is a synonym for a named back reference). */        that is a synonym for a named back reference). */
6721    
6722        if (escape == ESC_g)        if (escape == ESC_g)
# Line 6791  for (;; ptr++) Line 6794  for (;; ptr++)
6794        not set to cope with cases like (?=(\w+))\1: which would otherwise set        not set to cope with cases like (?=(\w+))\1: which would otherwise set
6795        ':' later. */        ':' later. */
6796    
6797        if (escape >= ESC_REF)        if (escape < 0)
6798          {          {
6799          open_capitem *oc;          open_capitem *oc;
6800          recno = escape - ESC_REF;          recno = -escape;
6801    
6802          HANDLE_REFERENCE:    /* Come here from named backref handling */          HANDLE_REFERENCE:    /* Come here from named backref handling */
6803          if (firstchar == REQ_UNSET) firstchar = REQ_NONE;          if (firstchar == REQ_UNSET) firstchar = REQ_NONE;

Legend:
Removed from v.1060  
changed lines
  Added in v.1064

  ViewVC Help
Powered by ViewVC 1.1.5