/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1060 by chpe, Tue Oct 16 15:53:57 2012 UTC revision 1067 by chpe, Tue Oct 16 15:54:22 2012 UTC
# Line 750  return (*p == CHAR_RIGHT_CURLY_BRACKET); Line 750  return (*p == CHAR_RIGHT_CURLY_BRACKET);
750    
751  /* This function is called when a \ has been encountered. It either returns a  /* This function is called when a \ has been encountered. It either returns a
752  positive value for a simple escape such as \n, or 0 for a data character  positive value for a simple escape such as \n, or 0 for a data character
753  which will be placed in chptr. A backreference to group  which will be placed in chptr. A backreference to group n is returned as
754  n is returned as ESC_REF + n; ESC_REF is the highest ESC_xxx macro. When  negative n. When UTF-8 is enabled, a positive value greater than 255 may
755  UTF-8 is enabled, a positive value greater than 255 may be returned in chptr.  be returned in chptr.
756  On entry,ptr is pointing at the \. On exit, it is on the final character of the  On entry,ptr is pointing at the \. On exit, it is on the final character of the
757  escape sequence.  escape sequence.
758    
# Line 766  Arguments: Line 766  Arguments:
766    
767  Returns:         zero => a data character  Returns:         zero => a data character
768                   positive => a special escape sequence                   positive => a special escape sequence
769                     negative => a back reference
770                   on error, errorcodeptr is set                   on error, errorcodeptr is set
771  */  */
772    
773  static int  static int
774  check_escape(const pcre_uchar **ptrptr, int *chptr, int *errorcodeptr,  check_escape(const pcre_uchar **ptrptr, pcre_uint32 *chptr, int *errorcodeptr,
775    int bracount, int options, BOOL isclass)    int bracount, int options, BOOL isclass)
776  {  {
777  /* PCRE_UTF16 has the same value as PCRE_UTF8. */  /* PCRE_UTF16 has the same value as PCRE_UTF8. */
# Line 794  Otherwise further processing may be requ Line 795  Otherwise further processing may be requ
795  #ifndef EBCDIC  /* ASCII/UTF-8 coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
796  /* Not alphanumeric */  /* Not alphanumeric */
797  else if (c < CHAR_0 || c > CHAR_z) {}  else if (c < CHAR_0 || c > CHAR_z) {}
798  else if ((i = escapes[c - CHAR_0]) != 0) { if (i > 0) c = i; else escape = -i; }  else if ((i = escapes[c - CHAR_0]) != 0) { if (i > 0) c = (pcre_uint32)i; else escape = -i; }
799    
800  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
801  /* Not alphanumeric */  /* Not alphanumeric */
802  else if (c < CHAR_a || (!MAX_255(c) || (ebcdic_chartab[c] & 0x0E) == 0)) {}  else if (c < CHAR_a || (!MAX_255(c) || (ebcdic_chartab[c] & 0x0E) == 0)) {}
803  else if ((i = escapes[c - 0x48]) != 0)  { if (i > 0) c = i; else escape = -i; }  else if ((i = escapes[c - 0x48]) != 0)  { if (i > 0) c = (pcre_uint32)i; else escape = -i; }
804  #endif  #endif
805    
806  /* Escapes that need further processing, or are illegal. */  /* Escapes that need further processing, or are illegal. */
# Line 807  else if ((i = escapes[c - 0x48]) != 0) Line 808  else if ((i = escapes[c - 0x48]) != 0)
808  else  else
809    {    {
810    const pcre_uchar *oldptr;    const pcre_uchar *oldptr;
811    BOOL braced, negated;    BOOL braced, negated, overflow;
812      int s;
813    
814    switch (c)    switch (c)
815      {      {
# Line 914  else Line 916  else
916      else negated = FALSE;      else negated = FALSE;
917    
918      /* The integer range is limited by the machine's int representation. */      /* The integer range is limited by the machine's int representation. */
919      c = 0;      s = 0;
920        overflow = FALSE;
921      while (IS_DIGIT(ptr[1]))      while (IS_DIGIT(ptr[1]))
922        {        {
923        if (((unsigned int)c) > INT_MAX / 10) /* Integer overflow */        if (s > INT_MAX / 10 - 1) /* Integer overflow */
924          {          {
925          c = -1;          overflow = TRUE;
926          break;          break;
927          }          }
928        c = c * 10 + *(++ptr) - CHAR_0;        s = s * 10 + (int)(*(++ptr) - CHAR_0);
929        }        }
930      if (((unsigned int)c) > INT_MAX) /* Integer overflow */      if (overflow) /* Integer overflow */
931        {        {
932        while (IS_DIGIT(ptr[1]))        while (IS_DIGIT(ptr[1]))
933          ptr++;          ptr++;
# Line 938  else Line 941  else
941        break;        break;
942        }        }
943    
944      if (c == 0)      if (s == 0)
945        {        {
946        *errorcodeptr = ERR58;        *errorcodeptr = ERR58;
947        break;        break;
# Line 946  else Line 949  else
949    
950      if (negated)      if (negated)
951        {        {
952        if (c > bracount)        if (s > bracount)
953          {          {
954          *errorcodeptr = ERR15;          *errorcodeptr = ERR15;
955          break;          break;
956          }          }
957        c = bracount - (c - 1);        s = bracount - (s - 1);
958        }        }
959    
960      escape = ESC_REF + c;      escape = -s;
961      break;      break;
962    
963      /* The handling of escape sequences consisting of a string of digits      /* The handling of escape sequences consisting of a string of digits
# Line 976  else Line 979  else
979        {        {
980        oldptr = ptr;        oldptr = ptr;
981        /* The integer range is limited by the machine's int representation. */        /* The integer range is limited by the machine's int representation. */
982        c -= CHAR_0;        s = (int)(c -CHAR_0);
983          overflow = FALSE;
984        while (IS_DIGIT(ptr[1]))        while (IS_DIGIT(ptr[1]))
985          {          {
986          if (((unsigned int)c) > INT_MAX / 10) /* Integer overflow */          if (s > INT_MAX / 10 - 1) /* Integer overflow */
987            {            {
988            c = -1;            overflow = TRUE;
989            break;            break;
990            }            }
991          c = c * 10 + *(++ptr) - CHAR_0;          s = s * 10 + (int)(*(++ptr) - CHAR_0);
992          }          }
993        if (((unsigned int)c) > INT_MAX) /* Integer overflow */        if (overflow) /* Integer overflow */
994          {          {
995          while (IS_DIGIT(ptr[1]))          while (IS_DIGIT(ptr[1]))
996            ptr++;            ptr++;
997          *errorcodeptr = ERR61;          *errorcodeptr = ERR61;
998          break;          break;
999          }          }
1000        if (c < 10 || c <= bracount)        if (s < 10 || s <= bracount)
1001          {          {
1002          escape = ESC_REF + c;          escape = -s;
1003          break;          break;
1004          }          }
1005        ptr = oldptr;      /* Put the pointer back and fall through */        ptr = oldptr;      /* Put the pointer back and fall through */
# Line 1058  else Line 1062  else
1062      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
1063        {        {
1064        const pcre_uchar *pt = ptr + 2;        const pcre_uchar *pt = ptr + 2;
       BOOL overflow;  
1065    
1066        c = 0;        c = 0;
1067        overflow = FALSE;        overflow = FALSE;
# Line 1207  Returns:         type value from ucp_typ Line 1210  Returns:         type value from ucp_typ
1210  static int  static int
1211  get_ucp(const pcre_uchar **ptrptr, BOOL *negptr, int *dptr, int *errorcodeptr)  get_ucp(const pcre_uchar **ptrptr, BOOL *negptr, int *dptr, int *errorcodeptr)
1212  {  {
1213  int c, i, bot, top;  pcre_uchar c;
1214    int i, bot, top;
1215  const pcre_uchar *ptr = *ptrptr;  const pcre_uchar *ptr = *ptrptr;
1216  pcre_uchar name[32];  pcre_uchar name[32];
1217    
# Line 1254  top = PRIV(utt_size); Line 1258  top = PRIV(utt_size);
1258    
1259  while (bot < top)  while (bot < top)
1260    {    {
1261      int r;
1262    i = (bot + top) >> 1;    i = (bot + top) >> 1;
1263    c = STRCMP_UC_C8(name, PRIV(utt_names) + PRIV(utt)[i].name_offset);    r = STRCMP_UC_C8(name, PRIV(utt_names) + PRIV(utt)[i].name_offset);
1264    if (c == 0)    if (r == 0)
1265      {      {
1266      *dptr = PRIV(utt)[i].value;      *dptr = PRIV(utt)[i].value;
1267      return PRIV(utt)[i].type;      return PRIV(utt)[i].type;
1268      }      }
1269    if (c > 0) bot = i + 1; else top = i;    if (r > 0) bot = i + 1; else top = i;
1270    }    }
1271    
1272  *errorcodeptr = ERR47;  *errorcodeptr = ERR47;
# Line 1306  int max = -1; Line 1311  int max = -1;
1311  /* Read the minimum value and do a paranoid check: a negative value indicates  /* Read the minimum value and do a paranoid check: a negative value indicates
1312  an integer overflow. */  an integer overflow. */
1313    
1314  while (IS_DIGIT(*p)) min = min * 10 + *p++ - CHAR_0;  while (IS_DIGIT(*p)) min = min * 10 + (int)(*p++ - CHAR_0);
1315  if (min < 0 || min > 65535)  if (min < 0 || min > 65535)
1316    {    {
1317    *errorcodeptr = ERR5;    *errorcodeptr = ERR5;
# Line 1321  if (*p == CHAR_RIGHT_CURLY_BRACKET) max Line 1326  if (*p == CHAR_RIGHT_CURLY_BRACKET) max
1326    if (*(++p) != CHAR_RIGHT_CURLY_BRACKET)    if (*(++p) != CHAR_RIGHT_CURLY_BRACKET)
1327      {      {
1328      max = 0;      max = 0;
1329      while(IS_DIGIT(*p)) max = max * 10 + *p++ - CHAR_0;      while(IS_DIGIT(*p)) max = max * 10 + (int)(*p++ - CHAR_0);
1330      if (max < 0 || max > 65535)      if (max < 0 || max > 65535)
1331        {        {
1332        *errorcodeptr = ERR5;        *errorcodeptr = ERR5;
# Line 1452  if (ptr[0] == CHAR_LEFT_PARENTHESIS) Line 1457  if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1457      if ((*ptr == CHAR_LESS_THAN_SIGN && ptr[1] != CHAR_EXCLAMATION_MARK &&      if ((*ptr == CHAR_LESS_THAN_SIGN && ptr[1] != CHAR_EXCLAMATION_MARK &&
1458          ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE)          ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE)
1459        {        {
1460        int term;        pcre_uchar term;
1461        const pcre_uchar *thisname;        const pcre_uchar *thisname;
1462        *count += 1;        *count += 1;
1463        if (name == NULL && *count == lorn) return *count;        if (name == NULL && *count == lorn) return *count;
# Line 1460  if (ptr[0] == CHAR_LEFT_PARENTHESIS) Line 1465  if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1465        if (term == CHAR_LESS_THAN_SIGN) term = CHAR_GREATER_THAN_SIGN;        if (term == CHAR_LESS_THAN_SIGN) term = CHAR_GREATER_THAN_SIGN;
1466        thisname = ptr;        thisname = ptr;
1467        while (*ptr != term) ptr++;        while (*ptr != term) ptr++;
1468        if (name != NULL && lorn == ptr - thisname &&        if (name != NULL && lorn == (int)(ptr - thisname) &&
1469            STRNCMP_UC_UC(name, thisname, lorn) == 0)            STRNCMP_UC_UC(name, thisname, (unsigned int)lorn) == 0)
1470          return *count;          return *count;
1471        term++;        term++;
1472        }        }
# Line 2904  Yield:        -1 when no more Line 2909  Yield:        -1 when no more
2909  */  */
2910    
2911  static int  static int
2912  get_othercase_range(unsigned int *cptr, unsigned int d, unsigned int *ocptr,  get_othercase_range(pcre_uint32 *cptr, pcre_uint32 d, pcre_uint32 *ocptr,
2913    unsigned int *odptr)    pcre_uint32 *odptr)
2914  {  {
2915  unsigned int c, othercase, next;  pcre_uint32 c, othercase, next;
2916  int co;  int co;
2917    
2918  /* Find the first character that has an other case. If it has multiple other  /* Find the first character that has an other case. If it has multiple other
# Line 2959  Returns:       TRUE if auto-possessifyin Line 2964  Returns:       TRUE if auto-possessifyin
2964  */  */
2965    
2966  static BOOL  static BOOL
2967  check_char_prop(int c, int ptype, int pdata, BOOL negated)  check_char_prop(pcre_uint32 c, int ptype, int pdata, BOOL negated)
2968  {  {
2969  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2970  const pcre_uint32 *p;  const pcre_uint32 *p;
# Line 3045  static BOOL Line 3050  static BOOL
3050  check_auto_possessive(const pcre_uchar *previous, BOOL utf,  check_auto_possessive(const pcre_uchar *previous, BOOL utf,
3051    const pcre_uchar *ptr, int options, compile_data *cd)    const pcre_uchar *ptr, int options, compile_data *cd)
3052  {  {
3053  pcre_int32 c = NOTACHAR; // FIXMEchpe pcre_uint32  pcre_uint32 c = NOTACHAR;
3054  pcre_int32 next;  pcre_uint32 next;
3055  int escape;  int escape;
3056  int op_code = *previous++;  int op_code = *previous++;
3057    
# Line 3144  if (escape == 0) Line 3149  if (escape == 0)
3149    case, which maps to the special PT_CLIST property. Check this first. */    case, which maps to the special PT_CLIST property. Check this first. */
3150    
3151  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3152    if (utf && (unsigned int)c != NOTACHAR && (options & PCRE_CASELESS) != 0)    if (utf && c != NOTACHAR && (options & PCRE_CASELESS) != 0)
3153      {      {
3154      int ocs = UCD_CASESET(next);      int ocs = UCD_CASESET(next);
3155      if (ocs > 0) return check_char_prop(c, PT_CLIST, ocs, op_code >= OP_NOT);      if (ocs > 0) return check_char_prop(c, PT_CLIST, ocs, op_code >= OP_NOT);
# Line 3166  if (escape == 0) Line 3171  if (escape == 0)
3171  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3172      if (utf)      if (utf)
3173        {        {
3174        unsigned int othercase;        pcre_uint32 othercase;
3175        if (next < 128) othercase = cd->fcc[next]; else        if (next < 128) othercase = cd->fcc[next]; else
3176  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3177        othercase = UCD_OTHERCASE((unsigned int)next);        othercase = UCD_OTHERCASE(next);
3178  #else  #else
3179        othercase = NOTACHAR;        othercase = NOTACHAR;
3180  #endif  #endif
3181        return (unsigned int)c != othercase;        return c != othercase;
3182        }        }
3183      else      else
3184  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
3185      return (c != TABLE_GET((unsigned int)next, cd->fcc, next));  /* Not UTF */      return (c != TABLE_GET(next, cd->fcc, next));  /* Not UTF */
3186    
3187      case OP_NOT:      case OP_NOT:
3188      return c == next;      return c == next;
# Line 3187  if (escape == 0) Line 3192  if (escape == 0)
3192  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3193      if (utf)      if (utf)
3194        {        {
3195        unsigned int othercase;        pcre_uint32 othercase;
3196        if (next < 128) othercase = cd->fcc[next]; else        if (next < 128) othercase = cd->fcc[next]; else
3197  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3198        othercase = UCD_OTHERCASE((unsigned int)next);        othercase = UCD_OTHERCASE(next);
3199  #else  #else
3200        othercase = NOTACHAR;        othercase = NOTACHAR;
3201  #endif  #endif
3202        return (unsigned int)c == othercase;        return c == othercase;
3203        }        }
3204      else      else
3205  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
3206      return (c == TABLE_GET((unsigned int)next, cd->fcc, next));  /* Not UTF */      return (c == TABLE_GET(next, cd->fcc, next));  /* Not UTF */
3207    
3208      /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set.      /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set.
3209      When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */      When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
# Line 3689  pcre_int32 req_caseopt, reqvary, tempreq Line 3694  pcre_int32 req_caseopt, reqvary, tempreq
3694  int options = *optionsptr;               /* May change dynamically */  int options = *optionsptr;               /* May change dynamically */
3695  int after_manual_callout = 0;  int after_manual_callout = 0;
3696  int length_prevgroup = 0;  int length_prevgroup = 0;
3697  register int c;  register pcre_uint32 c;
3698  int escape;  int escape;
3699  register pcre_uchar *code = *codeptr;  register pcre_uchar *code = *codeptr;
3700  pcre_uchar *last_code = code;  pcre_uchar *last_code = code;
# Line 4459  for (;; ptr++) Line 4464  for (;; ptr++)
4464    
4465            /* \b is backspace; any other special means the '-' was literal. */            /* \b is backspace; any other special means the '-' was literal. */
4466    
4467            if (descape > 0)            if (descape != 0)
4468              {              {
4469              if (descape == ESC_b) d = CHAR_BS; else              if (descape == ESC_b) d = CHAR_BS; else
4470                {                {
# Line 6673  for (;; ptr++) Line 6678  for (;; ptr++)
6678      /* Handle metasequences introduced by \. For ones like \d, the ESC_ values      /* Handle metasequences introduced by \. For ones like \d, the ESC_ values
6679      are arranged to be the negation of the corresponding OP_values in the      are arranged to be the negation of the corresponding OP_values in the
6680      default case when PCRE_UCP is not set. For the back references, the values      default case when PCRE_UCP is not set. For the back references, the values
6681      are ESC_REF plus the reference number. Only back references and those types      are negative the reference number. Only back references and those types
6682      that consume a character may be repeated. We can test for values between      that consume a character may be repeated. We can test for values between
6683      ESC_b and ESC_Z for the latter; this may have to change if any new ones are      ESC_b and ESC_Z for the latter; this may have to change if any new ones are
6684      ever created. */      ever created. */
# Line 6713  for (;; ptr++) Line 6718  for (;; ptr++)
6718        is a subroutine call by number (Oniguruma syntax). In fact, the value        is a subroutine call by number (Oniguruma syntax). In fact, the value
6719        ESC_g is returned only for these cases. So we don't need to check for <        ESC_g is returned only for these cases. So we don't need to check for <
6720        or ' if the value is ESC_g. For the Perl syntax \g{n} the value is        or ' if the value is ESC_g. For the Perl syntax \g{n} the value is
6721        ESC_REF+n, and for the Perl syntax \g{name} the result is ESC_k (as        -n, and for the Perl syntax \g{name} the result is ESC_k (as
6722        that is a synonym for a named back reference). */        that is a synonym for a named back reference). */
6723    
6724        if (escape == ESC_g)        if (escape == ESC_g)
# Line 6791  for (;; ptr++) Line 6796  for (;; ptr++)
6796        not set to cope with cases like (?=(\w+))\1: which would otherwise set        not set to cope with cases like (?=(\w+))\1: which would otherwise set
6797        ':' later. */        ':' later. */
6798    
6799        if (escape >= ESC_REF)        if (escape < 0)
6800          {          {
6801          open_capitem *oc;          open_capitem *oc;
6802          recno = escape - ESC_REF;          recno = -escape;
6803    
6804          HANDLE_REFERENCE:    /* Come here from named backref handling */          HANDLE_REFERENCE:    /* Come here from named backref handling */
6805          if (firstchar == REQ_UNSET) firstchar = REQ_NONE;          if (firstchar == REQ_UNSET) firstchar = REQ_NONE;

Legend:
Removed from v.1060  
changed lines
  Added in v.1067

  ViewVC Help
Powered by ViewVC 1.1.5