/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 924 by zherczeg, Wed Feb 22 10:23:56 2012 UTC revision 964 by ph10, Fri May 4 13:03:39 2012 UTC
# Line 489  static const char error_texts[] = Line 489  static const char error_texts[] =
489    "too many forward references\0"    "too many forward references\0"
490    "disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0"    "disallowed Unicode code point (>= 0xd800 && <= 0xdfff)\0"
491    "invalid UTF-16 string\0"    "invalid UTF-16 string\0"
492      /* 75 */
493      "name is too long in (*MARK), (*PRUNE), (*SKIP), or (*THEN)\0"
494    ;    ;
495    
496  /* Table to identify digits and hex digits. This is used when compiling  /* Table to identify digits and hex digits. This is used when compiling
# Line 3132  if (next >= 0) switch(op_code) Line 3134  if (next >= 0) switch(op_code)
3134    When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */    When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
3135    
3136    case OP_DIGIT:    case OP_DIGIT:
3137    return next > 127 || (cd->ctypes[next] & ctype_digit) == 0;    return next > 255 || (cd->ctypes[next] & ctype_digit) == 0;
3138    
3139    case OP_NOT_DIGIT:    case OP_NOT_DIGIT:
3140    return next <= 127 && (cd->ctypes[next] & ctype_digit) != 0;    return next <= 255 && (cd->ctypes[next] & ctype_digit) != 0;
3141    
3142    case OP_WHITESPACE:    case OP_WHITESPACE:
3143    return next > 127 || (cd->ctypes[next] & ctype_space) == 0;    return next > 255 || (cd->ctypes[next] & ctype_space) == 0;
3144    
3145    case OP_NOT_WHITESPACE:    case OP_NOT_WHITESPACE:
3146    return next <= 127 && (cd->ctypes[next] & ctype_space) != 0;    return next <= 255 && (cd->ctypes[next] & ctype_space) != 0;
3147    
3148    case OP_WORDCHAR:    case OP_WORDCHAR:
3149    return next > 127 || (cd->ctypes[next] & ctype_word) == 0;    return next > 255 || (cd->ctypes[next] & ctype_word) == 0;
3150    
3151    case OP_NOT_WORDCHAR:    case OP_NOT_WORDCHAR:
3152    return next <= 127 && (cd->ctypes[next] & ctype_word) != 0;    return next <= 255 && (cd->ctypes[next] & ctype_word) != 0;
3153    
3154    case OP_HSPACE:    case OP_HSPACE:
3155    case OP_NOT_HSPACE:    case OP_NOT_HSPACE:
# Line 3225  switch(op_code) Line 3227  switch(op_code)
3227    switch(-next)    switch(-next)
3228      {      {
3229      case ESC_d:      case ESC_d:
3230      return c > 127 || (cd->ctypes[c] & ctype_digit) == 0;      return c > 255 || (cd->ctypes[c] & ctype_digit) == 0;
3231    
3232      case ESC_D:      case ESC_D:
3233      return c <= 127 && (cd->ctypes[c] & ctype_digit) != 0;      return c <= 255 && (cd->ctypes[c] & ctype_digit) != 0;
3234    
3235      case ESC_s:      case ESC_s:
3236      return c > 127 || (cd->ctypes[c] & ctype_space) == 0;      return c > 255 || (cd->ctypes[c] & ctype_space) == 0;
3237    
3238      case ESC_S:      case ESC_S:
3239      return c <= 127 && (cd->ctypes[c] & ctype_space) != 0;      return c <= 255 && (cd->ctypes[c] & ctype_space) != 0;
3240    
3241      case ESC_w:      case ESC_w:
3242      return c > 127 || (cd->ctypes[c] & ctype_word) == 0;      return c > 255 || (cd->ctypes[c] & ctype_word) == 0;
3243    
3244      case ESC_W:      case ESC_W:
3245      return c <= 127 && (cd->ctypes[c] & ctype_word) != 0;      return c <= 255 && (cd->ctypes[c] & ctype_word) != 0;
3246    
3247      case ESC_h:      case ESC_h:
3248      case ESC_H:      case ESC_H:
# Line 3349  switch(op_code) Line 3351  switch(op_code)
3351    return next == -ESC_d;    return next == -ESC_d;
3352    
3353    case OP_WHITESPACE:    case OP_WHITESPACE:
3354    return next == -ESC_S || next == -ESC_d || next == -ESC_w || next == -ESC_R;    return next == -ESC_S || next == -ESC_d || next == -ESC_w;
3355    
3356    case OP_NOT_WHITESPACE:    case OP_NOT_WHITESPACE:
3357    return next == -ESC_s || next == -ESC_h || next == -ESC_v;    return next == -ESC_s || next == -ESC_h || next == -ESC_v || next == -ESC_R;
3358    
3359    case OP_HSPACE:    case OP_HSPACE:
3360    return next == -ESC_S || next == -ESC_H || next == -ESC_d ||    return next == -ESC_S || next == -ESC_H || next == -ESC_d ||
# Line 4516  for (;; ptr++) Line 4518  for (;; ptr++)
4518        LONE_SINGLE_CHARACTER:        LONE_SINGLE_CHARACTER:
4519    
4520        /* Only the value of 1 matters for class_single_char. */        /* Only the value of 1 matters for class_single_char. */
4521    
4522        if (class_single_char < 2) class_single_char++;        if (class_single_char < 2) class_single_char++;
4523    
4524        /* If class_charcount is 1, we saw precisely one character. As long as        /* If class_charcount is 1, we saw precisely one character. As long as
4525        there was no use of \p or \P, in other words, no use of any XCLASS features,        there was no use of \p or \P, in other words, no use of any XCLASS
4526        we can optimize.        features, we can optimize.
4527    
4528        The optimization throws away the bit map. We turn the item into a        The optimization throws away the bit map. We turn the item into a
4529        1-character OP_CHAR[I] if it's positive, or OP_NOT[I] if it's negative.        1-character OP_CHAR[I] if it's positive, or OP_NOT[I] if it's negative.
# Line 4533  for (;; ptr++) Line 4536  for (;; ptr++)
4536          ptr++;          ptr++;
4537          zeroreqchar = reqchar;          zeroreqchar = reqchar;
4538    
         /* The OP_NOT[I] opcodes work on single characters only. */  
   
4539          if (negate_class)          if (negate_class)
4540            {            {
4541            if (firstchar == REQ_UNSET) firstchar = REQ_NONE;            if (firstchar == REQ_UNSET) firstchar = REQ_NONE;
# Line 4804  for (;; ptr++) Line 4805  for (;; ptr++)
4805      /* Now handle repetition for the different types of item. */      /* Now handle repetition for the different types of item. */
4806    
4807      /* If previous was a character or negated character match, abolish the item      /* If previous was a character or negated character match, abolish the item
4808      and generate a repeat item instead. If a char item has a minumum of more      and generate a repeat item instead. If a char item has a minimum of more
4809      than one, ensure  that it is set in reqchar - it might not be if a sequence      than one, ensure that it is set in reqchar - it might not be if a sequence
4810      such as x{3} is  the first thing in a branch because the x will have gone      such as x{3} is the first thing in a branch because the x will have gone
4811      into firstchar instead.  */      into firstchar instead.  */
4812    
4813      if (*previous == OP_CHAR || *previous == OP_CHARI      if (*previous == OP_CHAR || *previous == OP_CHARI
4814          || *previous == OP_NOT || *previous == OP_NOTI)          || *previous == OP_NOT || *previous == OP_NOTI)
4815        {        {
4816        switch (*previous) {        switch (*previous)
4817        default: /* Make compiler happy. */          {
4818        case OP_CHAR:  op_type = OP_STAR - OP_STAR; break;          default: /* Make compiler happy. */
4819        case OP_CHARI: op_type = OP_STARI - OP_STAR; break;          case OP_CHAR:  op_type = OP_STAR - OP_STAR; break;
4820        case OP_NOT:   op_type = OP_NOTSTAR - OP_STAR; break;          case OP_CHARI: op_type = OP_STARI - OP_STAR; break;
4821        case OP_NOTI:  op_type = OP_NOTSTARI - OP_STAR; break;          case OP_NOT:   op_type = OP_NOTSTAR - OP_STAR; break;
4822        }          case OP_NOTI:  op_type = OP_NOTSTARI - OP_STAR; break;
4823            }
4824    
4825        /* Deal with UTF characters that take up more than one character. It's        /* Deal with UTF characters that take up more than one character. It's
4826        easier to write this out separately than try to macrify it. Use c to        easier to write this out separately than try to macrify it. Use c to
# Line 5591  for (;; ptr++) Line 5593  for (;; ptr++)
5593        ptr++;        ptr++;
5594        while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_letter) != 0) ptr++;        while (MAX_255(*ptr) && (cd->ctypes[*ptr] & ctype_letter) != 0) ptr++;
5595        namelen = (int)(ptr - name);        namelen = (int)(ptr - name);
5596    
5597        /* It appears that Perl allows any characters whatsoever, other than        /* It appears that Perl allows any characters whatsoever, other than
5598        a closing parenthesis, to appear in arguments, so we no longer insist on        a closing parenthesis, to appear in arguments, so we no longer insist on
5599        letters, digits, and underscores. */        letters, digits, and underscores. */
# Line 5601  for (;; ptr++) Line 5603  for (;; ptr++)
5603          arg = ++ptr;          arg = ++ptr;
5604          while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;          while (*ptr != 0 && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
5605          arglen = (int)(ptr - arg);          arglen = (int)(ptr - arg);
5606            if (arglen > (int)MAX_MARK)
5607              {
5608              *errorcodeptr = ERR75;
5609              goto FAILED;
5610              }
5611          }          }
5612    
5613        if (*ptr != CHAR_RIGHT_PARENTHESIS)        if (*ptr != CHAR_RIGHT_PARENTHESIS)
# Line 6852  for (;; ptr++) Line 6859  for (;; ptr++)
6859        /* For the rest (including \X when Unicode properties are supported), we        /* For the rest (including \X when Unicode properties are supported), we
6860        can obtain the OP value by negating the escape value in the default        can obtain the OP value by negating the escape value in the default
6861        situation when PCRE_UCP is not set. When it *is* set, we substitute        situation when PCRE_UCP is not set. When it *is* set, we substitute
6862        Unicode property tests. */        Unicode property tests. Note that \b and \B do a one-character
6863          lookbehind. */
6864    
6865        else        else
6866          {          {
6867            if ((-c == ESC_b || -c == ESC_B) && cd->max_lookbehind == 0)
6868              cd->max_lookbehind = 1;
6869  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
6870          if (-c >= ESC_DU && -c <= ESC_wu)          if (-c >= ESC_DU && -c <= ESC_wu)
6871            {            {
# Line 7163  for (;;) Line 7173  for (;;)
7173          *ptrptr = ptr;          *ptrptr = ptr;
7174          return FALSE;          return FALSE;
7175          }          }
7176        else { PUT(reverse_count, 0, fixed_length); }        else
7177            {
7178            if (fixed_length > cd->max_lookbehind)
7179              cd->max_lookbehind = fixed_length;
7180            PUT(reverse_count, 0, fixed_length);
7181            }
7182        }        }
7183      }      }
7184    
# Line 7833  cd->start_pattern = (const pcre_uchar *) Line 7848  cd->start_pattern = (const pcre_uchar *)
7848  cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern));  cd->end_pattern = (const pcre_uchar *)(pattern + STRLEN_UC((const pcre_uchar *)pattern));
7849  cd->req_varyopt = 0;  cd->req_varyopt = 0;
7850  cd->assert_depth = 0;  cd->assert_depth = 0;
7851    cd->max_lookbehind = 0;
7852  cd->external_options = options;  cd->external_options = options;
7853  cd->external_flags = 0;  cd->external_flags = 0;
7854  cd->open_caps = NULL;  cd->open_caps = NULL;
# Line 7883  re->magic_number = MAGIC_NUMBER; Line 7899  re->magic_number = MAGIC_NUMBER;
7899  re->size = (int)size;  re->size = (int)size;
7900  re->options = cd->external_options;  re->options = cd->external_options;
7901  re->flags = cd->external_flags;  re->flags = cd->external_flags;
 re->dummy1 = 0;  
7902  re->first_char = 0;  re->first_char = 0;
7903  re->req_char = 0;  re->req_char = 0;
7904  re->name_table_offset = sizeof(REAL_PCRE) / sizeof(pcre_uchar);  re->name_table_offset = sizeof(REAL_PCRE) / sizeof(pcre_uchar);
# Line 7903  field; this time it's used for rememberi Line 7918  field; this time it's used for rememberi
7918  cd->final_bracount = cd->bracount;  /* Save for checking forward references */  cd->final_bracount = cd->bracount;  /* Save for checking forward references */
7919  cd->assert_depth = 0;  cd->assert_depth = 0;
7920  cd->bracount = 0;  cd->bracount = 0;
7921    cd->max_lookbehind = 0;
7922  cd->names_found = 0;  cd->names_found = 0;
7923  cd->name_table = (pcre_uchar *)re + re->name_table_offset;  cd->name_table = (pcre_uchar *)re + re->name_table_offset;
7924  codestart = cd->name_table + re->name_entry_size * re->name_count;  codestart = cd->name_table + re->name_entry_size * re->name_count;
# Line 7924  code = (pcre_uchar *)codestart; Line 7940  code = (pcre_uchar *)codestart;
7940    &firstchar, &reqchar, NULL, cd, NULL);    &firstchar, &reqchar, NULL, cd, NULL);
7941  re->top_bracket = cd->bracount;  re->top_bracket = cd->bracount;
7942  re->top_backref = cd->top_backref;  re->top_backref = cd->top_backref;
7943    re->max_lookbehind = cd->max_lookbehind;
7944  re->flags = cd->external_flags | PCRE_MODE;  re->flags = cd->external_flags | PCRE_MODE;
7945    
7946  if (cd->had_accept) reqchar = REQ_NONE;   /* Must disable after (*ACCEPT) */  if (cd->had_accept) reqchar = REQ_NONE;   /* Must disable after (*ACCEPT) */
# Line 8011  if (cd->check_lookbehind) Line 8028  if (cd->check_lookbehind)
8028                      (fixed_length == -4)? ERR70 : ERR25;                      (fixed_length == -4)? ERR70 : ERR25;
8029          break;          break;
8030          }          }
8031          if (fixed_length > cd->max_lookbehind) cd->max_lookbehind = fixed_length;
8032        PUT(cc, 1, fixed_length);        PUT(cc, 1, fixed_length);
8033        }        }
8034      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;

Legend:
Removed from v.924  
changed lines
  Added in v.964

  ViewVC Help
Powered by ViewVC 1.1.5