/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1078 by chpe, Tue Oct 16 15:55:00 2012 UTC revision 1147 by chpe, Sat Oct 20 20:17:03 2012 UTC
# Line 1203  escape sequence. Line 1203  escape sequence.
1203  Argument:  Argument:
1204    ptrptr         points to the pattern position pointer    ptrptr         points to the pattern position pointer
1205    negptr         points to a boolean that is set TRUE for negation else FALSE    negptr         points to a boolean that is set TRUE for negation else FALSE
1206    dptr           points to an int that is set to the detailed property value    ptypeptr       points to an unsigned int that is set to the type value
1207      pdataptr       points to an unsigned int that is set to the detailed property value
1208    errorcodeptr   points to the error code variable    errorcodeptr   points to the error code variable
1209    
1210  Returns:         type value from ucp_type_table, or -1 for an invalid type  Returns:         TRUE if the type value was found, or FALSE for an invalid type
1211  */  */
1212    
1213  static int  static BOOL
1214  get_ucp(const pcre_uchar **ptrptr, BOOL *negptr, int *dptr, int *errorcodeptr)  get_ucp(const pcre_uchar **ptrptr, BOOL *negptr, unsigned int *ptypeptr,
1215      unsigned int *pdataptr, int *errorcodeptr)
1216  {  {
1217  pcre_uchar c;  pcre_uchar c;
1218  int i, bot, top;  int i, bot, top;
# Line 1265  while (bot < top) Line 1267  while (bot < top)
1267    r = STRCMP_UC_C8(name, PRIV(utt_names) + PRIV(utt)[i].name_offset);    r = STRCMP_UC_C8(name, PRIV(utt_names) + PRIV(utt)[i].name_offset);
1268    if (r == 0)    if (r == 0)
1269      {      {
1270      *dptr = PRIV(utt)[i].value;      *ptypeptr = PRIV(utt)[i].type;
1271      return PRIV(utt)[i].type;      *pdataptr = PRIV(utt)[i].value;
1272        return TRUE;
1273      }      }
1274    if (r > 0) bot = i + 1; else top = i;    if (r > 0) bot = i + 1; else top = i;
1275    }    }
1276    
1277  *errorcodeptr = ERR47;  *errorcodeptr = ERR47;
1278  *ptrptr = ptr;  *ptrptr = ptr;
1279  return -1;  return FALSE;
1280    
1281  ERROR_RETURN:  ERROR_RETURN:
1282  *errorcodeptr = ERR46;  *errorcodeptr = ERR46;
1283  *ptrptr = ptr;  *ptrptr = ptr;
1284  return -1;  return FALSE;
1285  }  }
1286  #endif  #endif
1287    
# Line 1854  for (;;) Line 1857  for (;;)
1857      case OP_NOTI:      case OP_NOTI:
1858      branchlength++;      branchlength++;
1859      cc += 2;      cc += 2;
1860  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #ifdef SUPPORT_UTF
1861      if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);      if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1862  #endif  #endif
1863      break;      break;
# Line 1868  for (;;) Line 1871  for (;;)
1871      case OP_NOTEXACTI:      case OP_NOTEXACTI:
1872      branchlength += (int)GET2(cc,1);      branchlength += (int)GET2(cc,1);
1873      cc += 2 + IMM2_SIZE;      cc += 2 + IMM2_SIZE;
1874  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #ifdef SUPPORT_UTF
1875      if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);      if (utf && HAS_EXTRALEN(cc[-1])) cc += GET_EXTRALEN(cc[-1]);
1876  #endif  #endif
1877      break;      break;
# Line 1913  for (;;) Line 1916  for (;;)
1916    
1917  #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32  #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1918      case OP_XCLASS:      case OP_XCLASS:
     cc += GET(cc, 1) - PRIV(OP_lengths)[OP_CLASS];  
     /* Fall through */  
1919  #endif  #endif
   
1920      case OP_CLASS:      case OP_CLASS:
1921      case OP_NCLASS:      case OP_NCLASS:
1922      cc += PRIV(OP_lengths)[OP_CLASS];  
1923        switch (op)
1924          {
1925    #if defined SUPPORT_UTF || defined COMPILE_PCRE16 || defined COMPILE_PCRE32
1926          case OP_XCLASS:
1927          cc += GET(cc, 1);
1928          break;
1929    #endif
1930    
1931          case OP_CLASS:
1932          case OP_NCLASS:
1933          cc += PRIV(OP_lengths)[OP_CLASS];
1934          break;
1935          }
1936    
1937      switch (*cc)      switch (*cc)
1938        {        {
# Line 2915  get_othercase_range(pcre_uint32 *cptr, p Line 2928  get_othercase_range(pcre_uint32 *cptr, p
2928    pcre_uint32 *odptr)    pcre_uint32 *odptr)
2929  {  {
2930  pcre_uint32 c, othercase, next;  pcre_uint32 c, othercase, next;
2931  int co;  unsigned int co;
2932    
2933  /* Find the first character that has an other case. If it has multiple other  /* Find the first character that has an other case. If it has multiple other
2934  cases, return its case offset value. */  cases, return its case offset value. */
# Line 2926  for (c = *cptr; c <= d; c++) Line 2939  for (c = *cptr; c <= d; c++)
2939      {      {
2940      *ocptr = c++;   /* Character that has the set */      *ocptr = c++;   /* Character that has the set */
2941      *cptr = c;      /* Rest of input range */      *cptr = c;      /* Rest of input range */
2942      return co;      return (int)co;
2943      }      }
2944    if ((othercase = UCD_OTHERCASE(c)) != c) break;    if ((othercase = UCD_OTHERCASE(c)) != c) break;
2945    }    }
# Line 2966  Returns:       TRUE if auto-possessifyin Line 2979  Returns:       TRUE if auto-possessifyin
2979  */  */
2980    
2981  static BOOL  static BOOL
2982  check_char_prop(pcre_uint32 c, int ptype, int pdata, BOOL negated)  check_char_prop(pcre_uint32 c, unsigned int ptype, unsigned int pdata, BOOL negated)
2983  {  {
2984  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2985  const pcre_uint32 *p;  const pcre_uint32 *p;
# Line 3133  if (*ptr == CHAR_ASTERISK || *ptr == CHA Line 3146  if (*ptr == CHAR_ASTERISK || *ptr == CHA
3146    
3147  if (op_code == OP_CHAR || op_code == OP_CHARI ||  if (op_code == OP_CHAR || op_code == OP_CHARI ||
3148      op_code == OP_NOT || op_code == OP_NOTI)      op_code == OP_NOT || op_code == OP_NOTI)
   //if (escape == 0) switch(op_code)  
3149    {    {
3150  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3151    GETCHARTEST(c, previous);    GETCHARTEST(c, previous);
# Line 3153  if (escape == 0) Line 3165  if (escape == 0)
3165  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3166    if (utf && c != NOTACHAR && (options & PCRE_CASELESS) != 0)    if (utf && c != NOTACHAR && (options & PCRE_CASELESS) != 0)
3167      {      {
3168      int ocs = UCD_CASESET(next);      unsigned int ocs = UCD_CASESET(next);
3169      if (ocs > 0) return check_char_prop(c, PT_CLIST, ocs, op_code >= OP_NOT);      if (ocs > 0) return check_char_prop(c, PT_CLIST, ocs, op_code >= OP_NOT);
3170      }      }
3171  #endif  #endif
# Line 3253  if (escape == 0) Line 3265  if (escape == 0)
3265    
3266  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3267      case OP_PROP:      case OP_PROP:
3268      return check_char_prop(next, (int)previous[0], (int)previous[1], FALSE);      return check_char_prop(next, previous[0], previous[1], FALSE);
3269    
3270      case OP_NOTPROP:      case OP_NOTPROP:
3271      return check_char_prop(next, (int)previous[0], (int)previous[1], TRUE);      return check_char_prop(next, previous[0], previous[1], TRUE);
3272  #endif  #endif
3273    
3274      default:      default:
# Line 3339  switch(op_code) Line 3351  switch(op_code)
3351      case ESC_p:      case ESC_p:
3352      case ESC_P:      case ESC_P:
3353        {        {
3354        int ptype, pdata, errorcodeptr;        unsigned int ptype = 0, pdata = 0;
3355          int errorcodeptr;
3356        BOOL negated;        BOOL negated;
3357    
3358        ptr--;      /* Make ptr point at the p or P */        ptr--;      /* Make ptr point at the p or P */
3359        ptype = get_ucp(&ptr, &negated, &pdata, &errorcodeptr);        if (!get_ucp(&ptr, &negated, &ptype, &pdata, &errorcodeptr))
3360        if (ptype < 0) return FALSE;          return FALSE;
3361        ptr++;      /* Point past the final curly ket */        ptr++;      /* Point past the final curly ket */
3362    
3363        /* If the property item is optional, we have to give up. (When generated        /* If the property item is optional, we have to give up. (When generated
# Line 3725  dynamically as we process the pattern. * Line 3738  dynamically as we process the pattern. *
3738  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
3739  /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */  /* PCRE_UTF[16|32] have the same value as PCRE_UTF8. */
3740  BOOL utf = (options & PCRE_UTF8) != 0;  BOOL utf = (options & PCRE_UTF8) != 0;
3741    #ifndef COMPILE_PCRE32
3742  pcre_uchar utf_chars[6];  pcre_uchar utf_chars[6];
3743    #endif
3744  #else  #else
3745  BOOL utf = FALSE;  BOOL utf = FALSE;
3746  #endif  #endif
# Line 3790  for (;; ptr++) Line 3805  for (;; ptr++)
3805    pcre_uint32 subreqchar, subfirstchar;    pcre_uint32 subreqchar, subfirstchar;
3806    pcre_int32 subreqcharflags, subfirstcharflags;    pcre_int32 subreqcharflags, subfirstcharflags;
3807    int terminator;    int terminator;
3808    int mclength;    unsigned int mclength;
3809    int tempbracount;    unsigned int tempbracount;
3810    pcre_uint32 ec;    pcre_uint32 ec;
3811    pcre_uchar mcbuffer[8];    pcre_uchar mcbuffer[8];
3812    
# Line 4115  for (;; ptr++) Line 4130  for (;; ptr++)
4130        {        {
4131        const pcre_uchar *oldptr;        const pcre_uchar *oldptr;
4132    
4133  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #ifdef SUPPORT_UTF
4134        if (utf && HAS_EXTRALEN(c))        if (utf && HAS_EXTRALEN(c))
4135          {                           /* Braces are required because the */          {                           /* Braces are required because the */
4136          GETCHARLEN(c, ptr, ptr);    /* macro generates multiple statements */          GETCHARLEN(c, ptr, ptr);    /* macro generates multiple statements */
# Line 4374  for (;; ptr++) Line 4389  for (;; ptr++)
4389              case ESC_P:              case ESC_P:
4390                {                {
4391                BOOL negated;                BOOL negated;
4392                int pdata;                unsigned int ptype = 0, pdata = 0;
4393                int ptype = get_ucp(&ptr, &negated, &pdata, errorcodeptr);                if (!get_ucp(&ptr, &negated, &ptype, &pdata, errorcodeptr))
4394                if (ptype < 0) goto FAILED;                  goto FAILED;
4395                *class_uchardata++ = ((escape == ESC_p) != negated)?                *class_uchardata++ = ((escape == ESC_p) != negated)?
4396                  XCL_PROP : XCL_NOTPROP;                  XCL_PROP : XCL_NOTPROP;
4397                *class_uchardata++ = ptype;                *class_uchardata++ = ptype;
# Line 5505  for (;; ptr++) Line 5520  for (;; ptr++)
5520        else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT)        else if (*tempcode == OP_EXACT || *tempcode == OP_NOTEXACT)
5521          {          {
5522          tempcode += PRIV(OP_lengths)[*tempcode];          tempcode += PRIV(OP_lengths)[*tempcode];
5523  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #ifdef SUPPORT_UTF
5524          if (utf && HAS_EXTRALEN(tempcode[-1]))          if (utf && HAS_EXTRALEN(tempcode[-1]))
5525            tempcode += GET_EXTRALEN(tempcode[-1]);            tempcode += GET_EXTRALEN(tempcode[-1]);
5526  #endif  #endif
# Line 5817  for (;; ptr++) Line 5832  for (;; ptr++)
5832            }            }
5833          namelen = (int)(ptr - name);          namelen = (int)(ptr - name);
5834    
5835          if ((terminator > 0 && *ptr++ != terminator) ||          if ((terminator > 0 && *ptr++ != (pcre_uchar)terminator) ||
5836              *ptr++ != CHAR_RIGHT_PARENTHESIS)              *ptr++ != CHAR_RIGHT_PARENTHESIS)
5837            {            {
5838            ptr--;      /* Error offset */            ptr--;      /* Error offset */
# Line 6057  for (;; ptr++) Line 6072  for (;; ptr++)
6072    
6073            if (lengthptr != NULL)            if (lengthptr != NULL)
6074              {              {
6075              if (*ptr != terminator)              if (*ptr != (pcre_uchar)terminator)
6076                {                {
6077                *errorcodeptr = ERR42;                *errorcodeptr = ERR42;
6078                goto FAILED;                goto FAILED;
# Line 6199  for (;; ptr++) Line 6214  for (;; ptr++)
6214              *errorcodeptr = ERR62;              *errorcodeptr = ERR62;
6215              goto FAILED;              goto FAILED;
6216              }              }
6217            if (*ptr != terminator)            if (*ptr != (pcre_uchar)terminator)
6218              {              {
6219              *errorcodeptr = ERR42;              *errorcodeptr = ERR42;
6220              goto FAILED;              goto FAILED;
# Line 6305  for (;; ptr++) Line 6320  for (;; ptr++)
6320            while(IS_DIGIT(*ptr))            while(IS_DIGIT(*ptr))
6321              recno = recno * 10 + *ptr++ - CHAR_0;              recno = recno * 10 + *ptr++ - CHAR_0;
6322    
6323            if (*ptr != terminator)            if (*ptr != (pcre_uchar)terminator)
6324              {              {
6325              *errorcodeptr = ERR29;              *errorcodeptr = ERR29;
6326              goto FAILED;              goto FAILED;
# Line 6785  for (;; ptr++) Line 6800  for (;; ptr++)
6800          if (ptr[1] != CHAR_PLUS && ptr[1] != CHAR_MINUS)          if (ptr[1] != CHAR_PLUS && ptr[1] != CHAR_MINUS)
6801            {            {
6802            BOOL is_a_number = TRUE;            BOOL is_a_number = TRUE;
6803            for (p = ptr + 1; *p != 0 && *p != terminator; p++)            for (p = ptr + 1; *p != 0 && *p != (pcre_uchar)terminator; p++)
6804              {              {
6805              if (!MAX_255(*p)) { is_a_number = FALSE; break; }              if (!MAX_255(*p)) { is_a_number = FALSE; break; }
6806              if ((cd->ctypes[*p] & ctype_digit) == 0) is_a_number = FALSE;              if ((cd->ctypes[*p] & ctype_digit) == 0) is_a_number = FALSE;
6807              if ((cd->ctypes[*p] & ctype_word) == 0) break;              if ((cd->ctypes[*p] & ctype_word) == 0) break;
6808              }              }
6809            if (*p != terminator)            if (*p != (pcre_uchar)terminator)
6810              {              {
6811              *errorcodeptr = ERR57;              *errorcodeptr = ERR57;
6812              break;              break;
# Line 6809  for (;; ptr++) Line 6824  for (;; ptr++)
6824    
6825          p = ptr + 2;          p = ptr + 2;
6826          while (IS_DIGIT(*p)) p++;          while (IS_DIGIT(*p)) p++;
6827          if (*p != terminator)          if (*p != (pcre_uchar)terminator)
6828            {            {
6829            *errorcodeptr = ERR57;            *errorcodeptr = ERR57;
6830            break;            break;
# Line 6873  for (;; ptr++) Line 6888  for (;; ptr++)
6888        else if (escape == ESC_P || escape == ESC_p)        else if (escape == ESC_P || escape == ESC_p)
6889          {          {
6890          BOOL negated;          BOOL negated;
6891          int pdata;          unsigned int ptype = 0, pdata = 0;
6892          int ptype = get_ucp(&ptr, &negated, &pdata, errorcodeptr);          if (!get_ucp(&ptr, &negated, &ptype, &pdata, errorcodeptr))
6893          if (ptype < 0) goto FAILED;            goto FAILED;
6894          previous = code;          previous = code;
6895          *code++ = ((escape == ESC_p) != negated)? OP_PROP : OP_NOTPROP;          *code++ = ((escape == ESC_p) != negated)? OP_PROP : OP_NOTPROP;
6896          *code++ = ptype;          *code++ = ptype;
# Line 6949  for (;; ptr++) Line 6964  for (;; ptr++)
6964      mclength = 1;      mclength = 1;
6965      mcbuffer[0] = c;      mcbuffer[0] = c;
6966    
6967  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #ifdef SUPPORT_UTF
6968      if (utf && HAS_EXTRALEN(c))      if (utf && HAS_EXTRALEN(c))
6969        ACROSSCHAR(TRUE, ptr[1], mcbuffer[mclength++] = *(++ptr));        ACROSSCHAR(TRUE, ptr[1], mcbuffer[mclength++] = *(++ptr));
6970  #endif  #endif
# Line 7102  pcre_int32 firstcharflags, reqcharflags; Line 7117  pcre_int32 firstcharflags, reqcharflags;
7117  pcre_uint32 branchfirstchar, branchreqchar;  pcre_uint32 branchfirstchar, branchreqchar;
7118  pcre_int32 branchfirstcharflags, branchreqcharflags;  pcre_int32 branchfirstcharflags, branchreqcharflags;
7119  int length;  int length;
7120  int orig_bracount;  unsigned int orig_bracount;
7121  int max_bracount;  unsigned int max_bracount;
7122  branch_chain bc;  branch_chain bc;
7123    
7124  bc.outer = bcptr;  bc.outer = bcptr;
# Line 7643  do { Line 7658  do {
7658               *code == OP_CBRAPOS || *code == OP_SCBRAPOS)? IMM2_SIZE:0;               *code == OP_CBRAPOS || *code == OP_SCBRAPOS)? IMM2_SIZE:0;
7659     const pcre_uchar *scode = first_significant_code(code + 1+LINK_SIZE + xl,     const pcre_uchar *scode = first_significant_code(code + 1+LINK_SIZE + xl,
7660       TRUE);       TRUE);
7661     register int op = *scode;     register pcre_uchar op = *scode;
7662    
7663     switch(op)     switch(op)
7664       {       {

Legend:
Removed from v.1078  
changed lines
  Added in v.1147

  ViewVC Help
Powered by ViewVC 1.1.5