/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1061 by chpe, Tue Oct 16 15:54:02 2012 UTC revision 1072 by chpe, Tue Oct 16 15:54:40 2012 UTC
# Line 771  Returns:         zero => a data characte Line 771  Returns:         zero => a data characte
771  */  */
772    
773  static int  static int
774  check_escape(const pcre_uchar **ptrptr, int *chptr, int *errorcodeptr,  check_escape(const pcre_uchar **ptrptr, pcre_uint32 *chptr, int *errorcodeptr,
775    int bracount, int options, BOOL isclass)    int bracount, int options, BOOL isclass)
776  {  {
777  /* PCRE_UTF16 has the same value as PCRE_UTF8. */  /* PCRE_UTF16 has the same value as PCRE_UTF8. */
# Line 795  Otherwise further processing may be requ Line 795  Otherwise further processing may be requ
795  #ifndef EBCDIC  /* ASCII/UTF-8 coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
796  /* Not alphanumeric */  /* Not alphanumeric */
797  else if (c < CHAR_0 || c > CHAR_z) {}  else if (c < CHAR_0 || c > CHAR_z) {}
798  else if ((i = escapes[c - CHAR_0]) != 0) { if (i > 0) c = i; else escape = -i; }  else if ((i = escapes[c - CHAR_0]) != 0) { if (i > 0) c = (pcre_uint32)i; else escape = -i; }
799    
800  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
801  /* Not alphanumeric */  /* Not alphanumeric */
802  else if (c < CHAR_a || (!MAX_255(c) || (ebcdic_chartab[c] & 0x0E) == 0)) {}  else if (c < CHAR_a || (!MAX_255(c) || (ebcdic_chartab[c] & 0x0E) == 0)) {}
803  else if ((i = escapes[c - 0x48]) != 0)  { if (i > 0) c = i; else escape = -i; }  else if ((i = escapes[c - 0x48]) != 0)  { if (i > 0) c = (pcre_uint32)i; else escape = -i; }
804  #endif  #endif
805    
806  /* Escapes that need further processing, or are illegal. */  /* Escapes that need further processing, or are illegal. */
# Line 808  else if ((i = escapes[c - 0x48]) != 0) Line 808  else if ((i = escapes[c - 0x48]) != 0)
808  else  else
809    {    {
810    const pcre_uchar *oldptr;    const pcre_uchar *oldptr;
811    BOOL braced, negated;    BOOL braced, negated, overflow;
812      int s;
813    
814    switch (c)    switch (c)
815      {      {
# Line 915  else Line 916  else
916      else negated = FALSE;      else negated = FALSE;
917    
918      /* The integer range is limited by the machine's int representation. */      /* The integer range is limited by the machine's int representation. */
919      c = 0;      s = 0;
920        overflow = FALSE;
921      while (IS_DIGIT(ptr[1]))      while (IS_DIGIT(ptr[1]))
922        {        {
923        if (((unsigned int)c) > INT_MAX / 10) /* Integer overflow */        if (s > INT_MAX / 10 - 1) /* Integer overflow */
924          {          {
925          c = -1;          overflow = TRUE;
926          break;          break;
927          }          }
928        c = c * 10 + *(++ptr) - CHAR_0;        s = s * 10 + (int)(*(++ptr) - CHAR_0);
929        }        }
930      if (((unsigned int)c) > INT_MAX) /* Integer overflow */      if (overflow) /* Integer overflow */
931        {        {
932        while (IS_DIGIT(ptr[1]))        while (IS_DIGIT(ptr[1]))
933          ptr++;          ptr++;
# Line 939  else Line 941  else
941        break;        break;
942        }        }
943    
944      if (c == 0)      if (s == 0)
945        {        {
946        *errorcodeptr = ERR58;        *errorcodeptr = ERR58;
947        break;        break;
# Line 947  else Line 949  else
949    
950      if (negated)      if (negated)
951        {        {
952        if (c > bracount)        if (s > bracount)
953          {          {
954          *errorcodeptr = ERR15;          *errorcodeptr = ERR15;
955          break;          break;
956          }          }
957        c = bracount - (c - 1);        s = bracount - (s - 1);
958        }        }
959    
960      escape = -c;      escape = -s;
961      break;      break;
962    
963      /* The handling of escape sequences consisting of a string of digits      /* The handling of escape sequences consisting of a string of digits
# Line 977  else Line 979  else
979        {        {
980        oldptr = ptr;        oldptr = ptr;
981        /* The integer range is limited by the machine's int representation. */        /* The integer range is limited by the machine's int representation. */
982        c -= CHAR_0;        s = (int)(c -CHAR_0);
983          overflow = FALSE;
984        while (IS_DIGIT(ptr[1]))        while (IS_DIGIT(ptr[1]))
985          {          {
986          if (((unsigned int)c) > INT_MAX / 10) /* Integer overflow */          if (s > INT_MAX / 10 - 1) /* Integer overflow */
987            {            {
988            c = -1;            overflow = TRUE;
989            break;            break;
990            }            }
991          c = c * 10 + *(++ptr) - CHAR_0;          s = s * 10 + (int)(*(++ptr) - CHAR_0);
992          }          }
993        if (((unsigned int)c) > INT_MAX) /* Integer overflow */        if (overflow) /* Integer overflow */
994          {          {
995          while (IS_DIGIT(ptr[1]))          while (IS_DIGIT(ptr[1]))
996            ptr++;            ptr++;
997          *errorcodeptr = ERR61;          *errorcodeptr = ERR61;
998          break;          break;
999          }          }
1000        if (c < 10 || c <= bracount)        if (s < 10 || s <= bracount)
1001          {          {
1002          escape = -c;          escape = -s;
1003          break;          break;
1004          }          }
1005        ptr = oldptr;      /* Put the pointer back and fall through */        ptr = oldptr;      /* Put the pointer back and fall through */
# Line 1059  else Line 1062  else
1062      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)      if (ptr[1] == CHAR_LEFT_CURLY_BRACKET)
1063        {        {
1064        const pcre_uchar *pt = ptr + 2;        const pcre_uchar *pt = ptr + 2;
       BOOL overflow;  
1065    
1066        c = 0;        c = 0;
1067        overflow = FALSE;        overflow = FALSE;
# Line 1208  Returns:         type value from ucp_typ Line 1210  Returns:         type value from ucp_typ
1210  static int  static int
1211  get_ucp(const pcre_uchar **ptrptr, BOOL *negptr, int *dptr, int *errorcodeptr)  get_ucp(const pcre_uchar **ptrptr, BOOL *negptr, int *dptr, int *errorcodeptr)
1212  {  {
1213  int c, i, bot, top;  pcre_uchar c;
1214    int i, bot, top;
1215  const pcre_uchar *ptr = *ptrptr;  const pcre_uchar *ptr = *ptrptr;
1216  pcre_uchar name[32];  pcre_uchar name[32];
1217    
# Line 1255  top = PRIV(utt_size); Line 1258  top = PRIV(utt_size);
1258    
1259  while (bot < top)  while (bot < top)
1260    {    {
1261      int r;
1262    i = (bot + top) >> 1;    i = (bot + top) >> 1;
1263    c = STRCMP_UC_C8(name, PRIV(utt_names) + PRIV(utt)[i].name_offset);    r = STRCMP_UC_C8(name, PRIV(utt_names) + PRIV(utt)[i].name_offset);
1264    if (c == 0)    if (r == 0)
1265      {      {
1266      *dptr = PRIV(utt)[i].value;      *dptr = PRIV(utt)[i].value;
1267      return PRIV(utt)[i].type;      return PRIV(utt)[i].type;
1268      }      }
1269    if (c > 0) bot = i + 1; else top = i;    if (r > 0) bot = i + 1; else top = i;
1270    }    }
1271    
1272  *errorcodeptr = ERR47;  *errorcodeptr = ERR47;
# Line 1307  int max = -1; Line 1311  int max = -1;
1311  /* Read the minimum value and do a paranoid check: a negative value indicates  /* Read the minimum value and do a paranoid check: a negative value indicates
1312  an integer overflow. */  an integer overflow. */
1313    
1314  while (IS_DIGIT(*p)) min = min * 10 + *p++ - CHAR_0;  while (IS_DIGIT(*p)) min = min * 10 + (int)(*p++ - CHAR_0);
1315  if (min < 0 || min > 65535)  if (min < 0 || min > 65535)
1316    {    {
1317    *errorcodeptr = ERR5;    *errorcodeptr = ERR5;
# Line 1322  if (*p == CHAR_RIGHT_CURLY_BRACKET) max Line 1326  if (*p == CHAR_RIGHT_CURLY_BRACKET) max
1326    if (*(++p) != CHAR_RIGHT_CURLY_BRACKET)    if (*(++p) != CHAR_RIGHT_CURLY_BRACKET)
1327      {      {
1328      max = 0;      max = 0;
1329      while(IS_DIGIT(*p)) max = max * 10 + *p++ - CHAR_0;      while(IS_DIGIT(*p)) max = max * 10 + (int)(*p++ - CHAR_0);
1330      if (max < 0 || max > 65535)      if (max < 0 || max > 65535)
1331        {        {
1332        *errorcodeptr = ERR5;        *errorcodeptr = ERR5;
# Line 1453  if (ptr[0] == CHAR_LEFT_PARENTHESIS) Line 1457  if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1457      if ((*ptr == CHAR_LESS_THAN_SIGN && ptr[1] != CHAR_EXCLAMATION_MARK &&      if ((*ptr == CHAR_LESS_THAN_SIGN && ptr[1] != CHAR_EXCLAMATION_MARK &&
1458          ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE)          ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE)
1459        {        {
1460        int term;        pcre_uchar term;
1461        const pcre_uchar *thisname;        const pcre_uchar *thisname;
1462        *count += 1;        *count += 1;
1463        if (name == NULL && *count == lorn) return *count;        if (name == NULL && *count == lorn) return *count;
# Line 1461  if (ptr[0] == CHAR_LEFT_PARENTHESIS) Line 1465  if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1465        if (term == CHAR_LESS_THAN_SIGN) term = CHAR_GREATER_THAN_SIGN;        if (term == CHAR_LESS_THAN_SIGN) term = CHAR_GREATER_THAN_SIGN;
1466        thisname = ptr;        thisname = ptr;
1467        while (*ptr != term) ptr++;        while (*ptr != term) ptr++;
1468        if (name != NULL && lorn == ptr - thisname &&        if (name != NULL && lorn == (int)(ptr - thisname) &&
1469            STRNCMP_UC_UC(name, thisname, lorn) == 0)            STRNCMP_UC_UC(name, thisname, (unsigned int)lorn) == 0)
1470          return *count;          return *count;
1471        term++;        term++;
1472        }        }
# Line 1740  for (;;) Line 1744  for (;;)
1744    {    {
1745    int d;    int d;
1746    pcre_uchar *ce, *cs;    pcre_uchar *ce, *cs;
1747    register int op = *cc;    register pcre_uchar op = *cc;
1748    
1749    switch (op)    switch (op)
1750      {      {
# Line 1860  for (;;) Line 1864  for (;;)
1864      case OP_EXACTI:      case OP_EXACTI:
1865      case OP_NOTEXACT:      case OP_NOTEXACT:
1866      case OP_NOTEXACTI:      case OP_NOTEXACTI:
1867      branchlength += GET2(cc,1);      branchlength += (int)GET2(cc,1);
1868      cc += 2 + IMM2_SIZE;      cc += 2 + IMM2_SIZE;
1869  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
1870      if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);      if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
# Line 1928  for (;;) Line 1932  for (;;)
1932        case OP_CRRANGE:        case OP_CRRANGE:
1933        case OP_CRMINRANGE:        case OP_CRMINRANGE:
1934        if (GET2(cc,1) != GET2(cc,1+IMM2_SIZE)) return -1;        if (GET2(cc,1) != GET2(cc,1+IMM2_SIZE)) return -1;
1935        branchlength += GET2(cc,1);        branchlength += (int)GET2(cc,1);
1936        cc += 1 + 2 * IMM2_SIZE;        cc += 1 + 2 * IMM2_SIZE;
1937        break;        break;
1938    
# Line 2055  PRIV(find_bracket)(const pcre_uchar *cod Line 2059  PRIV(find_bracket)(const pcre_uchar *cod
2059  {  {
2060  for (;;)  for (;;)
2061    {    {
2062    register int c = *code;    register pcre_uchar c = *code;
2063    
2064    if (c == OP_END) return NULL;    if (c == OP_END) return NULL;
2065    
# Line 2078  for (;;) Line 2082  for (;;)
2082    else if (c == OP_CBRA || c == OP_SCBRA ||    else if (c == OP_CBRA || c == OP_SCBRA ||
2083             c == OP_CBRAPOS || c == OP_SCBRAPOS)             c == OP_CBRAPOS || c == OP_SCBRAPOS)
2084      {      {
2085      int n = GET2(code, 1+LINK_SIZE);      int n = (int)GET2(code, 1+LINK_SIZE);
2086      if (n == number) return (pcre_uchar *)code;      if (n == number) return (pcre_uchar *)code;
2087      code += PRIV(OP_lengths)[c];      code += PRIV(OP_lengths)[c];
2088      }      }
# Line 2193  find_recurse(const pcre_uchar *code, BOO Line 2197  find_recurse(const pcre_uchar *code, BOO
2197  {  {
2198  for (;;)  for (;;)
2199    {    {
2200    register int c = *code;    register pcre_uchar c = *code;
2201    if (c == OP_END) return NULL;    if (c == OP_END) return NULL;
2202    if (c == OP_RECURSE) return code;    if (c == OP_RECURSE) return code;
2203    
# Line 2347  static BOOL Line 2351  static BOOL
2351  could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,  could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,
2352    BOOL utf, compile_data *cd)    BOOL utf, compile_data *cd)
2353  {  {
2354  register int c;  register pcre_uchar c;
2355  for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);  for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
2356       code < endcode;       code < endcode;
2357       code = first_significant_code(code + PRIV(OP_lengths)[c], TRUE))       code = first_significant_code(code + PRIV(OP_lengths)[c], TRUE))
# Line 2381  for (code = first_significant_code(code Line 2385  for (code = first_significant_code(code
2385      /* Test for forward reference */      /* Test for forward reference */
2386    
2387      for (scode = cd->start_workspace; scode < cd->hwm; scode += LINK_SIZE)      for (scode = cd->start_workspace; scode < cd->hwm; scode += LINK_SIZE)
2388        if (GET(scode, 0) == code + 1 - cd->start_code) return TRUE;        if ((int)GET(scode, 0) == (int)(code + 1 - cd->start_code)) return TRUE;
2389    
2390      /* Not a forward reference, test for completed backward reference */      /* Not a forward reference, test for completed backward reference */
2391    
# Line 2702  Returns:   TRUE or FALSE Line 2706  Returns:   TRUE or FALSE
2706  static BOOL  static BOOL
2707  check_posix_syntax(const pcre_uchar *ptr, const pcre_uchar **endptr)  check_posix_syntax(const pcre_uchar *ptr, const pcre_uchar **endptr)
2708  {  {
2709  int terminator;          /* Don't combine these lines; the Solaris cc */  pcre_uchar terminator;          /* Don't combine these lines; the Solaris cc */
2710  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
2711  for (++ptr; *ptr != 0; ptr++)  for (++ptr; *ptr != 0; ptr++)
2712    {    {
# Line 2905  Yield:        -1 when no more Line 2909  Yield:        -1 when no more
2909  */  */
2910    
2911  static int  static int
2912  get_othercase_range(unsigned int *cptr, unsigned int d, unsigned int *ocptr,  get_othercase_range(pcre_uint32 *cptr, pcre_uint32 d, pcre_uint32 *ocptr,
2913    unsigned int *odptr)    pcre_uint32 *odptr)
2914  {  {
2915  unsigned int c, othercase, next;  pcre_uint32 c, othercase, next;
2916  int co;  int co;
2917    
2918  /* Find the first character that has an other case. If it has multiple other  /* Find the first character that has an other case. If it has multiple other
# Line 2960  Returns:       TRUE if auto-possessifyin Line 2964  Returns:       TRUE if auto-possessifyin
2964  */  */
2965    
2966  static BOOL  static BOOL
2967  check_char_prop(int c, int ptype, int pdata, BOOL negated)  check_char_prop(pcre_uint32 c, int ptype, int pdata, BOOL negated)
2968  {  {
2969  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2970  const pcre_uint32 *p;  const pcre_uint32 *p;
# Line 3046  static BOOL Line 3050  static BOOL
3050  check_auto_possessive(const pcre_uchar *previous, BOOL utf,  check_auto_possessive(const pcre_uchar *previous, BOOL utf,
3051    const pcre_uchar *ptr, int options, compile_data *cd)    const pcre_uchar *ptr, int options, compile_data *cd)
3052  {  {
3053  pcre_int32 c = NOTACHAR; // FIXMEchpe pcre_uint32  pcre_uint32 c = NOTACHAR;
3054  pcre_int32 next;  pcre_uint32 next;
3055  int escape;  int escape;
3056  int op_code = *previous++;  int op_code = *previous++;
3057    
# Line 3145  if (escape == 0) Line 3149  if (escape == 0)
3149    case, which maps to the special PT_CLIST property. Check this first. */    case, which maps to the special PT_CLIST property. Check this first. */
3150    
3151  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3152    if (utf && (unsigned int)c != NOTACHAR && (options & PCRE_CASELESS) != 0)    if (utf && c != NOTACHAR && (options & PCRE_CASELESS) != 0)
3153      {      {
3154      int ocs = UCD_CASESET(next);      int ocs = UCD_CASESET(next);
3155      if (ocs > 0) return check_char_prop(c, PT_CLIST, ocs, op_code >= OP_NOT);      if (ocs > 0) return check_char_prop(c, PT_CLIST, ocs, op_code >= OP_NOT);
# Line 3167  if (escape == 0) Line 3171  if (escape == 0)
3171  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3172      if (utf)      if (utf)
3173        {        {
3174        unsigned int othercase;        pcre_uint32 othercase;
3175        if (next < 128) othercase = cd->fcc[next]; else        if (next < 128) othercase = cd->fcc[next]; else
3176  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3177        othercase = UCD_OTHERCASE((unsigned int)next);        othercase = UCD_OTHERCASE(next);
3178  #else  #else
3179        othercase = NOTACHAR;        othercase = NOTACHAR;
3180  #endif  #endif
3181        return (unsigned int)c != othercase;        return c != othercase;
3182        }        }
3183      else      else
3184  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
3185      return (c != TABLE_GET((unsigned int)next, cd->fcc, next));  /* Not UTF */      return (c != TABLE_GET(next, cd->fcc, next));  /* Not UTF */
3186    
3187      case OP_NOT:      case OP_NOT:
3188      return c == next;      return c == next;
# Line 3188  if (escape == 0) Line 3192  if (escape == 0)
3192  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3193      if (utf)      if (utf)
3194        {        {
3195        unsigned int othercase;        pcre_uint32 othercase;
3196        if (next < 128) othercase = cd->fcc[next]; else        if (next < 128) othercase = cd->fcc[next]; else
3197  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3198        othercase = UCD_OTHERCASE((unsigned int)next);        othercase = UCD_OTHERCASE(next);
3199  #else  #else
3200        othercase = NOTACHAR;        othercase = NOTACHAR;
3201  #endif  #endif
3202        return (unsigned int)c == othercase;        return c == othercase;
3203        }        }
3204      else      else
3205  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
3206      return (c == TABLE_GET((unsigned int)next, cd->fcc, next));  /* Not UTF */      return (c == TABLE_GET(next, cd->fcc, next));  /* Not UTF */
3207    
3208      /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set.      /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set.
3209      When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */      When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
# Line 3690  pcre_int32 req_caseopt, reqvary, tempreq Line 3694  pcre_int32 req_caseopt, reqvary, tempreq
3694  int options = *optionsptr;               /* May change dynamically */  int options = *optionsptr;               /* May change dynamically */
3695  int after_manual_callout = 0;  int after_manual_callout = 0;
3696  int length_prevgroup = 0;  int length_prevgroup = 0;
3697  register int c;  register pcre_uint32 c;
3698  int escape;  int escape;
3699  register pcre_uchar *code = *codeptr;  register pcre_uchar *code = *codeptr;
3700  pcre_uchar *last_code = code;  pcre_uchar *last_code = code;

Legend:
Removed from v.1061  
changed lines
  Added in v.1072

  ViewVC Help
Powered by ViewVC 1.1.5