/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 518 by ph10, Tue May 18 15:47:01 2010 UTC revision 530 by ph10, Tue Jun 1 13:42:06 2010 UTC
# Line 2316  auto_callout(uschar *code, const uschar Line 2316  auto_callout(uschar *code, const uschar
2316  {  {
2317  *code++ = OP_CALLOUT;  *code++ = OP_CALLOUT;
2318  *code++ = 255;  *code++ = 255;
2319  PUT(code, 0, ptr - cd->start_pattern);  /* Pattern offset */  PUT(code, 0, (int)(ptr - cd->start_pattern));  /* Pattern offset */
2320  PUT(code, LINK_SIZE, 0);                /* Default length */  PUT(code, LINK_SIZE, 0);                       /* Default length */
2321  return code + 2*LINK_SIZE;  return code + 2*LINK_SIZE;
2322  }  }
2323    
# Line 2342  Returns:             nothing Line 2342  Returns:             nothing
2342  static void  static void
2343  complete_callout(uschar *previous_callout, const uschar *ptr, compile_data *cd)  complete_callout(uschar *previous_callout, const uschar *ptr, compile_data *cd)
2344  {  {
2345  int length = ptr - cd->start_pattern - GET(previous_callout, 2);  int length = (int)(ptr - cd->start_pattern - GET(previous_callout, 2));
2346  PUT(previous_callout, 2 + LINK_SIZE, length);  PUT(previous_callout, 2 + LINK_SIZE, length);
2347  }  }
2348    
# Line 2544  if (next >= 0) switch(op_code) Line 2544  if (next >= 0) switch(op_code)
2544    else    else
2545  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
2546    return (item == cd->fcc[next]);  /* Non-UTF-8 mode */    return (item == cd->fcc[next]);  /* Non-UTF-8 mode */
2547    
2548      /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set.
2549      When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
2550    
2551    case OP_DIGIT:    case OP_DIGIT:
2552    return next > 127 || (cd->ctypes[next] & ctype_digit) == 0;    return next > 127 || (cd->ctypes[next] & ctype_digit) == 0;
# Line 2586  if (next >= 0) switch(op_code) Line 2589  if (next >= 0) switch(op_code)
2589      case 0x202f:      case 0x202f:
2590      case 0x205f:      case 0x205f:
2591      case 0x3000:      case 0x3000:
2592      return op_code != OP_HSPACE;      return op_code == OP_NOT_HSPACE;
2593      default:      default:
2594      return op_code == OP_HSPACE;      return op_code != OP_NOT_HSPACE;
2595      }      }
2596    
2597      case OP_ANYNL:
2598    case OP_VSPACE:    case OP_VSPACE:
2599    case OP_NOT_VSPACE:    case OP_NOT_VSPACE:
2600    switch(next)    switch(next)
# Line 2602  if (next >= 0) switch(op_code) Line 2606  if (next >= 0) switch(op_code)
2606      case 0x85:      case 0x85:
2607      case 0x2028:      case 0x2028:
2608      case 0x2029:      case 0x2029:
2609      return op_code != OP_VSPACE;      return op_code == OP_NOT_VSPACE;
2610      default:      default:
2611      return op_code == OP_VSPACE;      return op_code != OP_NOT_VSPACE;
2612      }      }
2613    
2614    default:    default:
# Line 2612  if (next >= 0) switch(op_code) Line 2616  if (next >= 0) switch(op_code)
2616    }    }
2617    
2618    
2619  /* Handle the case when the next item is \d, \s, etc. */  /* Handle the case when the next item is \d, \s, etc. Note that when PCRE_UCP
2620    is set, \d turns into ESC_du rather than ESC_d, etc., so ESC_d etc. are
2621    generated only when PCRE_UCP is *not* set, that is, when only ASCII
2622    characteristics are recognized. */
2623    
2624  switch(op_code)  switch(op_code)
2625    {    {
# Line 2691  switch(op_code) Line 2698  switch(op_code)
2698    
2699    case OP_DIGIT:    case OP_DIGIT:
2700    return next == -ESC_D || next == -ESC_s || next == -ESC_W ||    return next == -ESC_D || next == -ESC_s || next == -ESC_W ||
2701           next == -ESC_h || next == -ESC_v;           next == -ESC_h || next == -ESC_v || next == -ESC_R;
2702    
2703    case OP_NOT_DIGIT:    case OP_NOT_DIGIT:
2704    return next == -ESC_d;    return next == -ESC_d;
2705    
2706    case OP_WHITESPACE:    case OP_WHITESPACE:
2707    return next == -ESC_S || next == -ESC_d || next == -ESC_w;    return next == -ESC_S || next == -ESC_d || next == -ESC_w || next == -ESC_R;
2708    
2709    case OP_NOT_WHITESPACE:    case OP_NOT_WHITESPACE:
2710    return next == -ESC_s || next == -ESC_h || next == -ESC_v;    return next == -ESC_s || next == -ESC_h || next == -ESC_v;
2711    
2712    case OP_HSPACE:    case OP_HSPACE:
2713    return next == -ESC_S || next == -ESC_H || next == -ESC_d || next == -ESC_w;    return next == -ESC_S || next == -ESC_H || next == -ESC_d ||
2714             next == -ESC_w || next == -ESC_v || next == -ESC_R;
2715    
2716    case OP_NOT_HSPACE:    case OP_NOT_HSPACE:
2717    return next == -ESC_h;    return next == -ESC_h;
2718    
2719    /* Can't have \S in here because VT matches \S (Perl anomaly) */    /* Can't have \S in here because VT matches \S (Perl anomaly) */
2720      case OP_ANYNL:
2721    case OP_VSPACE:    case OP_VSPACE:
2722    return next == -ESC_V || next == -ESC_d || next == -ESC_w;    return next == -ESC_V || next == -ESC_d || next == -ESC_w;
2723    
2724    case OP_NOT_VSPACE:    case OP_NOT_VSPACE:
2725    return next == -ESC_v;    return next == -ESC_v || next == -ESC_R;
2726    
2727    case OP_WORDCHAR:    case OP_WORDCHAR:
2728    return next == -ESC_W || next == -ESC_s || next == -ESC_h || next == -ESC_v;    return next == -ESC_W || next == -ESC_s || next == -ESC_h ||
2729             next == -ESC_v || next == -ESC_R;
2730    
2731    case OP_NOT_WORDCHAR:    case OP_NOT_WORDCHAR:
2732    return next == -ESC_w || next == -ESC_d;    return next == -ESC_w || next == -ESC_d;
# Line 2891  for (;; ptr++) Line 2901  for (;; ptr++)
2901        goto FAILED;        goto FAILED;
2902        }        }
2903    
2904      *lengthptr += code - last_code;      *lengthptr += (int)(code - last_code);
2905      DPRINTF(("length=%d added %d c=%c\n", *lengthptr, code - last_code, c));      DPRINTF(("length=%d added %d c=%c\n", *lengthptr, code - last_code, c));
2906    
2907      /* If "previous" is set and it is not at the start of the work space, move      /* If "previous" is set and it is not at the start of the work space, move
# Line 3009  for (;; ptr++) Line 3019  for (;; ptr++)
3019          *errorcodeptr = ERR20;          *errorcodeptr = ERR20;
3020          goto FAILED;          goto FAILED;
3021          }          }
3022        *lengthptr += code - last_code;   /* To include callout length */        *lengthptr += (int)(code - last_code);   /* To include callout length */
3023        DPRINTF((">> end branch\n"));        DPRINTF((">> end branch\n"));
3024        }        }
3025      return TRUE;      return TRUE;
# Line 3214  for (;; ptr++) Line 3224  for (;; ptr++)
3224            ptr++;            ptr++;
3225            }            }
3226    
3227          posix_class = check_posix_name(ptr, tempptr - ptr);          posix_class = check_posix_name(ptr, (int)(tempptr - ptr));
3228          if (posix_class < 0)          if (posix_class < 0)
3229            {            {
3230            *errorcodeptr = ERR30;            *errorcodeptr = ERR30;
# Line 4244  for (;; ptr++) Line 4254  for (;; ptr++)
4254        {        {
4255        register int i;        register int i;
4256        int ketoffset = 0;        int ketoffset = 0;
4257        int len = code - previous;        int len = (int)(code - previous);
4258        uschar *bralink = NULL;        uschar *bralink = NULL;
4259    
4260        /* Repeating a DEFINE group is pointless */        /* Repeating a DEFINE group is pointless */
# Line 4265  for (;; ptr++) Line 4275  for (;; ptr++)
4275          {          {
4276          register uschar *ket = previous;          register uschar *ket = previous;
4277          do ket += GET(ket, 1); while (*ket != OP_KET);          do ket += GET(ket, 1); while (*ket != OP_KET);
4278          ketoffset = code - ket;          ketoffset = (int)(code - ket);
4279          }          }
4280    
4281        /* The case of a zero minimum is special because of the need to stick        /* The case of a zero minimum is special because of the need to stick
# Line 4333  for (;; ptr++) Line 4343  for (;; ptr++)
4343            /* We chain together the bracket offset fields that have to be            /* We chain together the bracket offset fields that have to be
4344            filled in later when the ends of the brackets are reached. */            filled in later when the ends of the brackets are reached. */
4345    
4346            offset = (bralink == NULL)? 0 : previous - bralink;            offset = (bralink == NULL)? 0 : (int)(previous - bralink);
4347            bralink = previous;            bralink = previous;
4348            PUTINC(previous, 0, offset);            PUTINC(previous, 0, offset);
4349            }            }
# Line 4442  for (;; ptr++) Line 4452  for (;; ptr++)
4452              {              {
4453              int offset;              int offset;
4454              *code++ = OP_BRA;              *code++ = OP_BRA;
4455              offset = (bralink == NULL)? 0 : code - bralink;              offset = (bralink == NULL)? 0 : (int)(code - bralink);
4456              bralink = code;              bralink = code;
4457              PUTINC(code, 0, offset);              PUTINC(code, 0, offset);
4458              }              }
# Line 4463  for (;; ptr++) Line 4473  for (;; ptr++)
4473          while (bralink != NULL)          while (bralink != NULL)
4474            {            {
4475            int oldlinkoffset;            int oldlinkoffset;
4476            int offset = code - bralink + 1;            int offset = (int)(code - bralink + 1);
4477            uschar *bra = code - offset;            uschar *bra = code - offset;
4478            oldlinkoffset = GET(bra, 1);            oldlinkoffset = GET(bra, 1);
4479            bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;            bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;
# Line 4551  for (;; ptr++) Line 4561  for (;; ptr++)
4561  #endif  #endif
4562          }          }
4563    
4564        len = code - tempcode;        len = (int)(code - tempcode);
4565        if (len > 0) switch (*tempcode)        if (len > 0) switch (*tempcode)
4566          {          {
4567          case OP_STAR:  *tempcode = OP_POSSTAR; break;          case OP_STAR:  *tempcode = OP_POSSTAR; break;
# Line 4620  for (;; ptr++) Line 4630  for (;; ptr++)
4630        const uschar *arg = NULL;        const uschar *arg = NULL;
4631        previous = NULL;        previous = NULL;
4632        while ((cd->ctypes[*++ptr] & ctype_letter) != 0) {};        while ((cd->ctypes[*++ptr] & ctype_letter) != 0) {};
4633        namelen = ptr - name;        namelen = (int)(ptr - name);
4634    
4635        if (*ptr == CHAR_COLON)        if (*ptr == CHAR_COLON)
4636          {          {
# Line 4808  for (;; ptr++) Line 4818  for (;; ptr++)
4818                recno * 10 + *ptr - CHAR_0 : -1;                recno * 10 + *ptr - CHAR_0 : -1;
4819            ptr++;            ptr++;
4820            }            }
4821          namelen = ptr - name;          namelen = (int)(ptr - name);
4822    
4823          if ((terminator > 0 && *ptr++ != terminator) ||          if ((terminator > 0 && *ptr++ != terminator) ||
4824              *ptr++ != CHAR_RIGHT_PARENTHESIS)              *ptr++ != CHAR_RIGHT_PARENTHESIS)
# Line 5004  for (;; ptr++) Line 5014  for (;; ptr++)
5014              goto FAILED;              goto FAILED;
5015              }              }
5016            *code++ = n;            *code++ = n;
5017            PUT(code, 0, ptr - cd->start_pattern + 1);  /* Pattern offset */            PUT(code, 0, (int)(ptr - cd->start_pattern + 1)); /* Pattern offset */
5018            PUT(code, LINK_SIZE, 0);                    /* Default length */            PUT(code, LINK_SIZE, 0);                          /* Default length */
5019            code += 2 * LINK_SIZE;            code += 2 * LINK_SIZE;
5020            }            }
5021          previous = NULL;          previous = NULL;
# Line 5038  for (;; ptr++) Line 5048  for (;; ptr++)
5048            name = ++ptr;            name = ++ptr;
5049    
5050            while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++;            while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
5051            namelen = ptr - name;            namelen = (int)(ptr - name);
5052    
5053            /* In the pre-compile phase, just do a syntax check. */            /* In the pre-compile phase, just do a syntax check. */
5054    
# Line 5168  for (;; ptr++) Line 5178  for (;; ptr++)
5178          NAMED_REF_OR_RECURSE:          NAMED_REF_OR_RECURSE:
5179          name = ++ptr;          name = ++ptr;
5180          while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++;          while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
5181          namelen = ptr - name;          namelen = (int)(ptr - name);
5182    
5183          /* In the pre-compile phase, do a syntax check and set a dummy          /* In the pre-compile phase, do a syntax check and set a dummy
5184          reference number. */          reference number. */
# Line 5337  for (;; ptr++) Line 5347  for (;; ptr++)
5347                of the group. */                of the group. */
5348    
5349                called = cd->start_code + recno;                called = cd->start_code + recno;
5350                PUTINC(cd->hwm, 0, code + 2 + LINK_SIZE - cd->start_code);                PUTINC(cd->hwm, 0, (int)(code + 2 + LINK_SIZE - cd->start_code));
5351                }                }
5352    
5353              /* If not a forward reference, and the subpattern is still open,              /* If not a forward reference, and the subpattern is still open,
# Line 5361  for (;; ptr++) Line 5371  for (;; ptr++)
5371            code += 1 + LINK_SIZE;            code += 1 + LINK_SIZE;
5372    
5373            *code = OP_RECURSE;            *code = OP_RECURSE;
5374            PUT(code, 1, called - cd->start_code);            PUT(code, 1, (int)(called - cd->start_code));
5375            code += 1 + LINK_SIZE;            code += 1 + LINK_SIZE;
5376    
5377            *code = OP_KET;            *code = OP_KET;
# Line 6179  for (;;) Line 6189  for (;;)
6189      {      {
6190      if (lengthptr == NULL)      if (lengthptr == NULL)
6191        {        {
6192        int branch_length = code - last_branch;        int branch_length = (int)(code - last_branch);
6193        do        do
6194          {          {
6195          int prev_length = GET(last_branch, 1);          int prev_length = GET(last_branch, 1);
# Line 6193  for (;;) Line 6203  for (;;)
6203      /* Fill in the ket */      /* Fill in the ket */
6204    
6205      *code = OP_KET;      *code = OP_KET;
6206      PUT(code, 1, code - start_bracket);      PUT(code, 1, (int)(code - start_bracket));
6207      code += 1 + LINK_SIZE;      code += 1 + LINK_SIZE;
6208    
6209      /* If it was a capturing subpattern, check to see if it contained any      /* If it was a capturing subpattern, check to see if it contained any
# Line 6208  for (;;) Line 6218  for (;;)
6218            code - start_bracket);            code - start_bracket);
6219          *start_bracket = OP_ONCE;          *start_bracket = OP_ONCE;
6220          code += 1 + LINK_SIZE;          code += 1 + LINK_SIZE;
6221          PUT(start_bracket, 1, code - start_bracket);          PUT(start_bracket, 1, (int)(code - start_bracket));
6222          *code = OP_KET;          *code = OP_KET;
6223          PUT(code, 1, code - start_bracket);          PUT(code, 1, (int)(code - start_bracket));
6224          code += 1 + LINK_SIZE;          code += 1 + LINK_SIZE;
6225          length += 2 + 2*LINK_SIZE;          length += 2 + 2*LINK_SIZE;
6226          }          }
# Line 6265  for (;;) Line 6275  for (;;)
6275    else    else
6276      {      {
6277      *code = OP_ALT;      *code = OP_ALT;
6278      PUT(code, 1, code - last_branch);      PUT(code, 1, (int)(code - last_branch));
6279      bc.current_branch = last_branch = code;      bc.current_branch = last_branch = code;
6280      code += 1 + LINK_SIZE;      code += 1 + LINK_SIZE;
6281      }      }
# Line 6837  regex compiled on a system with 4-byte p Line 6847  regex compiled on a system with 4-byte p
6847  pointers. */  pointers. */
6848    
6849  re->magic_number = MAGIC_NUMBER;  re->magic_number = MAGIC_NUMBER;
6850  re->size = size;  re->size = (int)size;
6851  re->options = cd->external_options;  re->options = cd->external_options;
6852  re->flags = cd->external_flags;  re->flags = cd->external_flags;
6853  re->dummy1 = 0;  re->dummy1 = 0;
# Line 6908  while (errorcode == 0 && cd->hwm > cwork Line 6918  while (errorcode == 0 && cd->hwm > cwork
6918    recno = GET(codestart, offset);    recno = GET(codestart, offset);
6919    groupptr = _pcre_find_bracket(codestart, utf8, recno);    groupptr = _pcre_find_bracket(codestart, utf8, recno);
6920    if (groupptr == NULL) errorcode = ERR53;    if (groupptr == NULL) errorcode = ERR53;
6921      else PUT(((uschar *)codestart), offset, groupptr - codestart);      else PUT(((uschar *)codestart), offset, (int)(groupptr - codestart));
6922    }    }
6923    
6924  /* Give an error if there's back reference to a non-existent capturing  /* Give an error if there's back reference to a non-existent capturing
# Line 6963  if (errorcode != 0) Line 6973  if (errorcode != 0)
6973    {    {
6974    (pcre_free)(re);    (pcre_free)(re);
6975    PCRE_EARLY_ERROR_RETURN:    PCRE_EARLY_ERROR_RETURN:
6976    *erroroffset = ptr - (const uschar *)pattern;    *erroroffset = (int)(ptr - (const uschar *)pattern);
6977    PCRE_EARLY_ERROR_RETURN2:    PCRE_EARLY_ERROR_RETURN2:
6978    *errorptr = find_error_text(errorcode);    *errorptr = find_error_text(errorcode);
6979    if (errorcodeptr != NULL) *errorcodeptr = errorcode;    if (errorcodeptr != NULL) *errorcodeptr = errorcode;

Legend:
Removed from v.518  
changed lines
  Added in v.530

  ViewVC Help
Powered by ViewVC 1.1.5