/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1059 by chpe, Tue Oct 16 15:53:53 2012 UTC revision 1074 by chpe, Tue Oct 16 15:54:47 2012 UTC
# Line 750  return (*p == CHAR_RIGHT_CURLY_BRACKET); Line 750  return (*p == CHAR_RIGHT_CURLY_BRACKET);
750    
751  /* This function is called when a \ has been encountered. It either returns a  /* This function is called when a \ has been encountered. It either returns a
752  positive value for a simple escape such as \n, or 0 for a data character  positive value for a simple escape such as \n, or 0 for a data character
753  which will be placed in chptr. A backreference to group  which will be placed in chptr. A backreference to group n is returned as
754  n is returned as ESC_REF + n; ESC_REF is the highest ESC_xxx macro. When  negative n. When UTF-8 is enabled, a positive value greater than 255 may
755  UTF-8 is enabled, a positive value greater than 255 may be returned in chptr.  be returned in chptr.
756  On entry,ptr is pointing at the \. On exit, it is on the final character of the  On entry,ptr is pointing at the \. On exit, it is on the final character of the
757  escape sequence.  escape sequence.
758    
# Line 766  Arguments: Line 766  Arguments:
766    
767  Returns:         zero => a data character  Returns:         zero => a data character
768                   positive => a special escape sequence                   positive => a special escape sequence
769                     negative => a back reference
770                   on error, errorcodeptr is set                   on error, errorcodeptr is set
771  */  */
772    
773  static int  static int
774  check_escape(const pcre_uchar **ptrptr, int *chptr, int *errorcodeptr,  check_escape(const pcre_uchar **ptrptr, pcre_uint32 *chptr, int *errorcodeptr,
775    int bracount, int options, BOOL isclass)    int bracount, int options, BOOL isclass)
776  {  {
777  /* PCRE_UTF16 has the same value as PCRE_UTF8. */  /* PCRE_UTF16 has the same value as PCRE_UTF8. */
778  BOOL utf = (options & PCRE_UTF8) != 0;  BOOL utf = (options & PCRE_UTF8) != 0;
779  const pcre_uchar *ptr = *ptrptr + 1;  const pcre_uchar *ptr = *ptrptr + 1;
780  pcre_int32 c;  pcre_uint32 c;
781  int escape = 0;  int escape = 0;
782  int i;  int i;
783    
# Line 794  Otherwise further processing may be requ Line 795  Otherwise further processing may be requ
795  #ifndef EBCDIC  /* ASCII/UTF-8 coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
796  /* Not alphanumeric */  /* Not alphanumeric */
797  else if (c < CHAR_0 || c > CHAR_z) {}  else if (c < CHAR_0 || c > CHAR_z) {}
798  else if ((i = escapes[c - CHAR_0]) != 0) { if (i > 0) c = i; else escape = -i; }  else if ((i = escapes[c - CHAR_0]) != 0) { if (i > 0) c = (pcre_uint32)i; else escape = -i; }
799    
800  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
801  /* Not alphanumeric */  /* Not alphanumeric */
802  else if (c < CHAR_a || (!MAX_255(c) || (ebcdic_chartab[c] & 0x0E) == 0)) {}  else if (c < CHAR_a || (!MAX_255(c) || (ebcdic_chartab[c] & 0x0E) == 0)) {}
803  else if ((i = escapes[c - 0x48]) != 0)  { if (i > 0) c = i; else escape = -i; }  else if ((i = escapes[c - 0x48]) != 0)  { if (i > 0) c = (pcre_uint32)i; else escape = -i; }
804  #endif  #endif
805    
806  /* Escapes that need further processing, or are illegal. */  /* Escapes that need further processing, or are illegal. */
# Line 807  else if ((i = escapes[c - 0x48]) != 0) Line 808  else if ((i = escapes[c - 0x48]) != 0)
808  else  else
809    {    {
810    const pcre_uchar *oldptr;    const pcre_uchar *oldptr;
811    BOOL braced, negated;    BOOL braced, negated, overflow;
812      int s;
813    
814    switch (c)    switch (c)
815      {      {
# Line 832  else Line 834  else
834          c = 0;          c = 0;
835          for (i = 0; i < 4; ++i)          for (i = 0; i < 4; ++i)
836            {            {
837            register int cc = *(++ptr);            register pcre_uint32 cc = *(++ptr);
838  #ifndef EBCDIC  /* ASCII/UTF-8 coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
839            if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */            if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */
840            c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));            c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
# Line 914  else Line 916  else
916      else negated = FALSE;      else negated = FALSE;
917    
918      /* The integer range is limited by the machine's int representation. */      /* The integer range is limited by the machine's int representation. */
919      c = 0;      s = 0;
920        overflow = FALSE;
921      while (IS_DIGIT(ptr[1]))      while (IS_DIGIT(ptr[1]))
922        {        {
923        if (((unsigned int)c) > INT_MAX / 10) /* Integer overflow */        if (s > INT_MAX / 10 - 1) /* Integer overflow */
924          {          {
925          c = -1;          overflow = TRUE;
926          break;          break;
927          }          }
928        c = c * 10 + *(++ptr) - CHAR_0;        s = s * 10 + (int)(*(++ptr) - CHAR_0);
929        }        }
930      if (((unsigned int)c) > INT_MAX) /* Integer overflow */      if (overflow) /* Integer overflow */
931        {        {
932        while (IS_DIGIT(ptr[1]))        while (IS_DIGIT(ptr[1]))
933          ptr++;          ptr++;
# Line 938  else Line 941  else
941        break;        break;
942        }        }
943    
944      if (c == 0)      if (s == 0)
945        {        {
946        *errorcodeptr = ERR58;        *errorcodeptr = ERR58;
947        break;        break;
# Line 946  else Line 949  else
949    
950      if (negated)      if (negated)
951        {        {
952        if (c > bracount)        if (s > bracount)
953          {          {
954          *errorcodeptr = ERR15;          *errorcodeptr = ERR15;
955          break;          break;
956          }          }
957        c = bracount - (c - 1);        s = bracount - (s - 1);
958        }        }
959    
960      escape = ESC_REF + c;      escape = -s;
961      break;      break;
962    
963      /* The handling of escape sequences consisting of a string of digits      /* The handling of escape sequences consisting of a string of digits
# Line 976  else Line 979  else
979        {        {
980        oldptr = ptr;        oldptr = ptr;
981        /* The integer range is limited by the machine's int representation. */        /* The integer range is limited by the machine's int representation. */
982        c -= CHAR_0;        s = (int)(c -CHAR_0);
983          overflow = FALSE;
984        while (IS_DIGIT(ptr[1]))        while (IS_DIGIT(ptr[1]))
985          {          {
986          if (((unsigned int)c) > INT_MAX / 10) /* Integer overflow */          if (s > INT_MAX / 10 - 1) /* Integer overflow */
987            {            {
988            c = -1;            overflow = TRUE;
989            break;            break;
990            }            }
991          c = c * 10 + *(++ptr) - CHAR_0;          s = s * 10 + (int)(*(++ptr) - CHAR_0);
992          }          }
993        if (((unsigned int)c) > INT_MAX) /* Integer overflow */        if (overflow) /* Integer overflow */
994          {          {
995          while (IS_DIGIT(ptr[1]))          while (IS_DIGIT(ptr[1]))
996            ptr++;            ptr++;
997          *errorcodeptr = ERR61;          *errorcodeptr = ERR61;
998          break;          break;
999          }          }
1000        if (c < 10 || c <= bracount)        if (s < 10 || s <= bracount)
1001          {          {
1002          escape = ESC_REF + c;          escape = -s;
1003          break;          break;
1004          }          }
1005        ptr = oldptr;      /* Put the pointer back and fall through */        ptr = oldptr;      /* Put the pointer back and fall through */
# Line 1042  else Line 1046  else
1046          c = 0;          c = 0;
1047          for (i = 0; i < 2; ++i)          for (i = 0; i < 2; ++i)
1048            {            {
1049            register int cc = *(++ptr);            register pcre_uint32 cc = *(++ptr);
1050  #ifndef EBCDIC  /* ASCII/UTF-8 coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
1051            if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */            if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */
1052            c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));            c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
# Line 1060  else Line 1064  else
1064        const pcre_uchar *pt = ptr + 2;        const pcre_uchar *pt = ptr + 2;
1065    
1066        c = 0;        c = 0;
1067          overflow = FALSE;
1068        while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0)        while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0)
1069          {          {
1070          register int cc = *pt++;          register pcre_uint32 cc = *pt++;
1071          if (c == 0 && cc == CHAR_0) continue;     /* Leading zeroes */          if (c == 0 && cc == CHAR_0) continue;     /* Leading zeroes */
1072    
1073    #ifdef COMPILE_PCRE32
1074            if (c >= 0x10000000l) { overflow = TRUE; break; }
1075    #endif
1076    
1077  #ifndef EBCDIC  /* ASCII/UTF-8 coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
1078          if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */          if (cc >= CHAR_a) cc -= 32;               /* Convert to upper case */
1079          c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));          c = (c << 4) + cc - ((cc < CHAR_A)? CHAR_0 : (CHAR_A - 10));
# Line 1074  else Line 1083  else
1083  #endif  #endif
1084    
1085  #if defined COMPILE_PCRE8  #if defined COMPILE_PCRE8
1086          if (c > (utf ? 0x10ffff : 0xff)) { c = -1; break; }          if (c > (utf ? 0x10ffff : 0xff)) { overflow = TRUE; break; }
1087  #elif defined COMPILE_PCRE16  #elif defined COMPILE_PCRE16
1088          if (c > (utf ? 0x10ffff : 0xffff)) { c = -1; break; }          if (c > (utf ? 0x10ffff : 0xffff)) { overflow = TRUE; break; }
1089  #elif defined COMPILE_PCRE32  #elif defined COMPILE_PCRE32
1090          if (utf && c > 0x10ffff) { c = -1; break; }          if (utf && c > 0x10ffff) { overflow = TRUE; break; }
1091  #endif  #endif
1092          }          }
1093    
1094        if (c < 0)        if (overflow)
1095          {          {
1096          while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0) pt++;          while (MAX_255(*pt) && (digitab[*pt] & ctype_xdigit) != 0) pt++;
1097          *errorcodeptr = ERR34;          *errorcodeptr = ERR34;
# Line 1104  else Line 1113  else
1113      c = 0;      c = 0;
1114      while (i++ < 2 && MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0)      while (i++ < 2 && MAX_255(ptr[1]) && (digitab[ptr[1]] & ctype_xdigit) != 0)
1115        {        {
1116        int cc;                                  /* Some compilers don't like */        pcre_uint32 cc;                          /* Some compilers don't like */
1117        cc = *(++ptr);                           /* ++ in initializers */        cc = *(++ptr);                           /* ++ in initializers */
1118  #ifndef EBCDIC  /* ASCII/UTF-8 coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
1119        if (cc >= CHAR_a) cc -= 32;              /* Convert to upper case */        if (cc >= CHAR_a) cc -= 32;              /* Convert to upper case */
# Line 1201  Returns:         type value from ucp_typ Line 1210  Returns:         type value from ucp_typ
1210  static int  static int
1211  get_ucp(const pcre_uchar **ptrptr, BOOL *negptr, int *dptr, int *errorcodeptr)  get_ucp(const pcre_uchar **ptrptr, BOOL *negptr, int *dptr, int *errorcodeptr)
1212  {  {
1213  int c, i, bot, top;  pcre_uchar c;
1214    int i, bot, top;
1215  const pcre_uchar *ptr = *ptrptr;  const pcre_uchar *ptr = *ptrptr;
1216  pcre_uchar name[32];  pcre_uchar name[32];
1217    
# Line 1248  top = PRIV(utt_size); Line 1258  top = PRIV(utt_size);
1258    
1259  while (bot < top)  while (bot < top)
1260    {    {
1261      int r;
1262    i = (bot + top) >> 1;    i = (bot + top) >> 1;
1263    c = STRCMP_UC_C8(name, PRIV(utt_names) + PRIV(utt)[i].name_offset);    r = STRCMP_UC_C8(name, PRIV(utt_names) + PRIV(utt)[i].name_offset);
1264    if (c == 0)    if (r == 0)
1265      {      {
1266      *dptr = PRIV(utt)[i].value;      *dptr = PRIV(utt)[i].value;
1267      return PRIV(utt)[i].type;      return PRIV(utt)[i].type;
1268      }      }
1269    if (c > 0) bot = i + 1; else top = i;    if (r > 0) bot = i + 1; else top = i;
1270    }    }
1271    
1272  *errorcodeptr = ERR47;  *errorcodeptr = ERR47;
# Line 1300  int max = -1; Line 1311  int max = -1;
1311  /* Read the minimum value and do a paranoid check: a negative value indicates  /* Read the minimum value and do a paranoid check: a negative value indicates
1312  an integer overflow. */  an integer overflow. */
1313    
1314  while (IS_DIGIT(*p)) min = min * 10 + *p++ - CHAR_0;  while (IS_DIGIT(*p)) min = min * 10 + (int)(*p++ - CHAR_0);
1315  if (min < 0 || min > 65535)  if (min < 0 || min > 65535)
1316    {    {
1317    *errorcodeptr = ERR5;    *errorcodeptr = ERR5;
# Line 1315  if (*p == CHAR_RIGHT_CURLY_BRACKET) max Line 1326  if (*p == CHAR_RIGHT_CURLY_BRACKET) max
1326    if (*(++p) != CHAR_RIGHT_CURLY_BRACKET)    if (*(++p) != CHAR_RIGHT_CURLY_BRACKET)
1327      {      {
1328      max = 0;      max = 0;
1329      while(IS_DIGIT(*p)) max = max * 10 + *p++ - CHAR_0;      while(IS_DIGIT(*p)) max = max * 10 + (int)(*p++ - CHAR_0);
1330      if (max < 0 || max > 65535)      if (max < 0 || max > 65535)
1331        {        {
1332        *errorcodeptr = ERR5;        *errorcodeptr = ERR5;
# Line 1446  if (ptr[0] == CHAR_LEFT_PARENTHESIS) Line 1457  if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1457      if ((*ptr == CHAR_LESS_THAN_SIGN && ptr[1] != CHAR_EXCLAMATION_MARK &&      if ((*ptr == CHAR_LESS_THAN_SIGN && ptr[1] != CHAR_EXCLAMATION_MARK &&
1458          ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE)          ptr[1] != CHAR_EQUALS_SIGN) || *ptr == CHAR_APOSTROPHE)
1459        {        {
1460        int term;        pcre_uchar term;
1461        const pcre_uchar *thisname;        const pcre_uchar *thisname;
1462        *count += 1;        *count += 1;
1463        if (name == NULL && *count == lorn) return *count;        if (name == NULL && *count == lorn) return *count;
# Line 1454  if (ptr[0] == CHAR_LEFT_PARENTHESIS) Line 1465  if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1465        if (term == CHAR_LESS_THAN_SIGN) term = CHAR_GREATER_THAN_SIGN;        if (term == CHAR_LESS_THAN_SIGN) term = CHAR_GREATER_THAN_SIGN;
1466        thisname = ptr;        thisname = ptr;
1467        while (*ptr != term) ptr++;        while (*ptr != term) ptr++;
1468        if (name != NULL && lorn == ptr - thisname &&        if (name != NULL && lorn == (int)(ptr - thisname) &&
1469            STRNCMP_UC_UC(name, thisname, lorn) == 0)            STRNCMP_UC_UC(name, thisname, (unsigned int)lorn) == 0)
1470          return *count;          return *count;
1471        term++;        term++;
1472        }        }
# Line 1733  for (;;) Line 1744  for (;;)
1744    {    {
1745    int d;    int d;
1746    pcre_uchar *ce, *cs;    pcre_uchar *ce, *cs;
1747    register int op = *cc;    register pcre_uchar op = *cc;
1748    
1749    switch (op)    switch (op)
1750      {      {
# Line 1853  for (;;) Line 1864  for (;;)
1864      case OP_EXACTI:      case OP_EXACTI:
1865      case OP_NOTEXACT:      case OP_NOTEXACT:
1866      case OP_NOTEXACTI:      case OP_NOTEXACTI:
1867      branchlength += GET2(cc,1);      branchlength += (int)GET2(cc,1);
1868      cc += 2 + IMM2_SIZE;      cc += 2 + IMM2_SIZE;
1869  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
1870      if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);      if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
# Line 1921  for (;;) Line 1932  for (;;)
1932        case OP_CRRANGE:        case OP_CRRANGE:
1933        case OP_CRMINRANGE:        case OP_CRMINRANGE:
1934        if (GET2(cc,1) != GET2(cc,1+IMM2_SIZE)) return -1;        if (GET2(cc,1) != GET2(cc,1+IMM2_SIZE)) return -1;
1935        branchlength += GET2(cc,1);        branchlength += (int)GET2(cc,1);
1936        cc += 1 + 2 * IMM2_SIZE;        cc += 1 + 2 * IMM2_SIZE;
1937        break;        break;
1938    
# Line 2048  PRIV(find_bracket)(const pcre_uchar *cod Line 2059  PRIV(find_bracket)(const pcre_uchar *cod
2059  {  {
2060  for (;;)  for (;;)
2061    {    {
2062    register int c = *code;    register pcre_uchar c = *code;
2063    
2064    if (c == OP_END) return NULL;    if (c == OP_END) return NULL;
2065    
# Line 2071  for (;;) Line 2082  for (;;)
2082    else if (c == OP_CBRA || c == OP_SCBRA ||    else if (c == OP_CBRA || c == OP_SCBRA ||
2083             c == OP_CBRAPOS || c == OP_SCBRAPOS)             c == OP_CBRAPOS || c == OP_SCBRAPOS)
2084      {      {
2085      int n = GET2(code, 1+LINK_SIZE);      int n = (int)GET2(code, 1+LINK_SIZE);
2086      if (n == number) return (pcre_uchar *)code;      if (n == number) return (pcre_uchar *)code;
2087      code += PRIV(OP_lengths)[c];      code += PRIV(OP_lengths)[c];
2088      }      }
# Line 2186  find_recurse(const pcre_uchar *code, BOO Line 2197  find_recurse(const pcre_uchar *code, BOO
2197  {  {
2198  for (;;)  for (;;)
2199    {    {
2200    register int c = *code;    register pcre_uchar c = *code;
2201    if (c == OP_END) return NULL;    if (c == OP_END) return NULL;
2202    if (c == OP_RECURSE) return code;    if (c == OP_RECURSE) return code;
2203    
# Line 2340  static BOOL Line 2351  static BOOL
2351  could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,  could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,
2352    BOOL utf, compile_data *cd)    BOOL utf, compile_data *cd)
2353  {  {
2354  register int c;  register pcre_uchar c;
2355  for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);  for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
2356       code < endcode;       code < endcode;
2357       code = first_significant_code(code + PRIV(OP_lengths)[c], TRUE))       code = first_significant_code(code + PRIV(OP_lengths)[c], TRUE))
# Line 2374  for (code = first_significant_code(code Line 2385  for (code = first_significant_code(code
2385      /* Test for forward reference */      /* Test for forward reference */
2386    
2387      for (scode = cd->start_workspace; scode < cd->hwm; scode += LINK_SIZE)      for (scode = cd->start_workspace; scode < cd->hwm; scode += LINK_SIZE)
2388        if (GET(scode, 0) == code + 1 - cd->start_code) return TRUE;        if ((int)GET(scode, 0) == (int)(code + 1 - cd->start_code)) return TRUE;
2389    
2390      /* Not a forward reference, test for completed backward reference */      /* Not a forward reference, test for completed backward reference */
2391    
# Line 2695  Returns:   TRUE or FALSE Line 2706  Returns:   TRUE or FALSE
2706  static BOOL  static BOOL
2707  check_posix_syntax(const pcre_uchar *ptr, const pcre_uchar **endptr)  check_posix_syntax(const pcre_uchar *ptr, const pcre_uchar **endptr)
2708  {  {
2709  int terminator;          /* Don't combine these lines; the Solaris cc */  pcre_uchar terminator;          /* Don't combine these lines; the Solaris cc */
2710  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
2711  for (++ptr; *ptr != 0; ptr++)  for (++ptr; *ptr != 0; ptr++)
2712    {    {
# Line 2744  register int yield = 0; Line 2755  register int yield = 0;
2755  while (posix_name_lengths[yield] != 0)  while (posix_name_lengths[yield] != 0)
2756    {    {
2757    if (len == posix_name_lengths[yield] &&    if (len == posix_name_lengths[yield] &&
2758      STRNCMP_UC_C8(ptr, pn, len) == 0) return yield;      STRNCMP_UC_C8(ptr, pn, (unsigned int)len) == 0) return yield;
2759    pn += posix_name_lengths[yield] + 1;    pn += posix_name_lengths[yield] + 1;
2760    yield++;    yield++;
2761    }    }
# Line 2799  while ((ptr = (pcre_uchar *)find_recurse Line 2810  while ((ptr = (pcre_uchar *)find_recurse
2810    
2811    for (hc = save_hwm; hc < cd->hwm; hc += LINK_SIZE)    for (hc = save_hwm; hc < cd->hwm; hc += LINK_SIZE)
2812      {      {
2813      offset = GET(hc, 0);      offset = (int)GET(hc, 0);
2814      if (cd->start_code + offset == ptr + 1)      if (cd->start_code + offset == ptr + 1)
2815        {        {
2816        PUT(hc, 0, offset + adjust);        PUT(hc, 0, offset + adjust);
# Line 2812  while ((ptr = (pcre_uchar *)find_recurse Line 2823  while ((ptr = (pcre_uchar *)find_recurse
2823    
2824    if (hc >= cd->hwm)    if (hc >= cd->hwm)
2825      {      {
2826      offset = GET(ptr, 1);      offset = (int)GET(ptr, 1);
2827      if (cd->start_code + offset >= group) PUT(ptr, 1, offset + adjust);      if (cd->start_code + offset >= group) PUT(ptr, 1, offset + adjust);
2828      }      }
2829    
# Line 2898  Yield:        -1 when no more Line 2909  Yield:        -1 when no more
2909  */  */
2910    
2911  static int  static int
2912  get_othercase_range(unsigned int *cptr, unsigned int d, unsigned int *ocptr,  get_othercase_range(pcre_uint32 *cptr, pcre_uint32 d, pcre_uint32 *ocptr,
2913    unsigned int *odptr)    pcre_uint32 *odptr)
2914  {  {
2915  unsigned int c, othercase, next;  pcre_uint32 c, othercase, next;
2916  int co;  int co;
2917    
2918  /* Find the first character that has an other case. If it has multiple other  /* Find the first character that has an other case. If it has multiple other
# Line 2953  Returns:       TRUE if auto-possessifyin Line 2964  Returns:       TRUE if auto-possessifyin
2964  */  */
2965    
2966  static BOOL  static BOOL
2967  check_char_prop(int c, int ptype, int pdata, BOOL negated)  check_char_prop(pcre_uint32 c, int ptype, int pdata, BOOL negated)
2968  {  {
2969  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2970  const pcre_uint32 *p;  const pcre_uint32 *p;
# Line 3039  static BOOL Line 3050  static BOOL
3050  check_auto_possessive(const pcre_uchar *previous, BOOL utf,  check_auto_possessive(const pcre_uchar *previous, BOOL utf,
3051    const pcre_uchar *ptr, int options, compile_data *cd)    const pcre_uchar *ptr, int options, compile_data *cd)
3052  {  {
3053  pcre_int32 c = NOTACHAR; // FIXMEchpe pcre_uint32  pcre_uint32 c = NOTACHAR;
3054  pcre_int32 next;  pcre_uint32 next;
3055  int escape;  int escape;
3056  int op_code = *previous++;  int op_code = *previous++;
3057    
# Line 3138  if (escape == 0) Line 3149  if (escape == 0)
3149    case, which maps to the special PT_CLIST property. Check this first. */    case, which maps to the special PT_CLIST property. Check this first. */
3150    
3151  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3152    if (utf && (unsigned int)c != NOTACHAR && (options & PCRE_CASELESS) != 0)    if (utf && c != NOTACHAR && (options & PCRE_CASELESS) != 0)
3153      {      {
3154      int ocs = UCD_CASESET(next);      int ocs = UCD_CASESET(next);
3155      if (ocs > 0) return check_char_prop(c, PT_CLIST, ocs, op_code >= OP_NOT);      if (ocs > 0) return check_char_prop(c, PT_CLIST, ocs, op_code >= OP_NOT);
# Line 3160  if (escape == 0) Line 3171  if (escape == 0)
3171  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3172      if (utf)      if (utf)
3173        {        {
3174        unsigned int othercase;        pcre_uint32 othercase;
3175        if (next < 128) othercase = cd->fcc[next]; else        if (next < 128) othercase = cd->fcc[next]; else
3176  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3177        othercase = UCD_OTHERCASE((unsigned int)next);        othercase = UCD_OTHERCASE(next);
3178  #else  #else
3179        othercase = NOTACHAR;        othercase = NOTACHAR;
3180  #endif  #endif
3181        return (unsigned int)c != othercase;        return c != othercase;
3182        }        }
3183      else      else
3184  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
3185      return (c != TABLE_GET((unsigned int)next, cd->fcc, next));  /* Not UTF */      return (c != TABLE_GET(next, cd->fcc, next));  /* Not UTF */
3186    
3187      case OP_NOT:      case OP_NOT:
3188      return c == next;      return c == next;
# Line 3181  if (escape == 0) Line 3192  if (escape == 0)
3192  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3193      if (utf)      if (utf)
3194        {        {
3195        unsigned int othercase;        pcre_uint32 othercase;
3196        if (next < 128) othercase = cd->fcc[next]; else        if (next < 128) othercase = cd->fcc[next]; else
3197  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3198        othercase = UCD_OTHERCASE((unsigned int)next);        othercase = UCD_OTHERCASE(next);
3199  #else  #else
3200        othercase = NOTACHAR;        othercase = NOTACHAR;
3201  #endif  #endif
3202        return (unsigned int)c == othercase;        return c == othercase;
3203        }        }
3204      else      else
3205  #endif  /* SUPPORT_UTF */  #endif  /* SUPPORT_UTF */
3206      return (c == TABLE_GET((unsigned int)next, cd->fcc, next));  /* Not UTF */      return (c == TABLE_GET(next, cd->fcc, next));  /* Not UTF */
3207    
3208      /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set.      /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set.
3209      When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */      When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
# Line 3683  pcre_int32 req_caseopt, reqvary, tempreq Line 3694  pcre_int32 req_caseopt, reqvary, tempreq
3694  int options = *optionsptr;               /* May change dynamically */  int options = *optionsptr;               /* May change dynamically */
3695  int after_manual_callout = 0;  int after_manual_callout = 0;
3696  int length_prevgroup = 0;  int length_prevgroup = 0;
3697  register int c;  register pcre_uint32 c;
3698  int escape;  int escape;
3699  register pcre_uchar *code = *codeptr;  register pcre_uchar *code = *codeptr;
3700  pcre_uchar *last_code = code;  pcre_uchar *last_code = code;
# Line 4453  for (;; ptr++) Line 4464  for (;; ptr++)
4464    
4465            /* \b is backspace; any other special means the '-' was literal. */            /* \b is backspace; any other special means the '-' was literal. */
4466    
4467            if (descape > 0)            if (descape != 0)
4468              {              {
4469              if (descape == ESC_b) d = CHAR_BS; else              if (descape == ESC_b) d = CHAR_BS; else
4470                {                {
# Line 6667  for (;; ptr++) Line 6678  for (;; ptr++)
6678      /* Handle metasequences introduced by \. For ones like \d, the ESC_ values      /* Handle metasequences introduced by \. For ones like \d, the ESC_ values
6679      are arranged to be the negation of the corresponding OP_values in the      are arranged to be the negation of the corresponding OP_values in the
6680      default case when PCRE_UCP is not set. For the back references, the values      default case when PCRE_UCP is not set. For the back references, the values
6681      are ESC_REF plus the reference number. Only back references and those types      are negative the reference number. Only back references and those types
6682      that consume a character may be repeated. We can test for values between      that consume a character may be repeated. We can test for values between
6683      ESC_b and ESC_Z for the latter; this may have to change if any new ones are      ESC_b and ESC_Z for the latter; this may have to change if any new ones are
6684      ever created. */      ever created. */
# Line 6707  for (;; ptr++) Line 6718  for (;; ptr++)
6718        is a subroutine call by number (Oniguruma syntax). In fact, the value        is a subroutine call by number (Oniguruma syntax). In fact, the value
6719        ESC_g is returned only for these cases. So we don't need to check for <        ESC_g is returned only for these cases. So we don't need to check for <
6720        or ' if the value is ESC_g. For the Perl syntax \g{n} the value is        or ' if the value is ESC_g. For the Perl syntax \g{n} the value is
6721        ESC_REF+n, and for the Perl syntax \g{name} the result is ESC_k (as        -n, and for the Perl syntax \g{name} the result is ESC_k (as
6722        that is a synonym for a named back reference). */        that is a synonym for a named back reference). */
6723    
6724        if (escape == ESC_g)        if (escape == ESC_g)
# Line 6785  for (;; ptr++) Line 6796  for (;; ptr++)
6796        not set to cope with cases like (?=(\w+))\1: which would otherwise set        not set to cope with cases like (?=(\w+))\1: which would otherwise set
6797        ':' later. */        ':' later. */
6798    
6799        if (escape >= ESC_REF)        if (escape < 0)
6800          {          {
6801          open_capitem *oc;          open_capitem *oc;
6802          recno = escape - ESC_REF;          recno = -escape;
6803    
6804          HANDLE_REFERENCE:    /* Come here from named backref handling */          HANDLE_REFERENCE:    /* Come here from named backref handling */
6805          if (firstchar == REQ_UNSET) firstchar = REQ_NONE;          if (firstchar == REQ_UNSET) firstchar = REQ_NONE;

Legend:
Removed from v.1059  
changed lines
  Added in v.1074

  ViewVC Help
Powered by ViewVC 1.1.5