/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1538 by ph10, Sun Mar 29 11:22:24 2015 UTC revision 1608 by ph10, Fri Nov 20 17:34:16 2015 UTC
# Line 174  static const short int escapes[] = { Line 174  static const short int escapes[] = {
174       -ESC_Z,                  CHAR_LEFT_SQUARE_BRACKET,       -ESC_Z,                  CHAR_LEFT_SQUARE_BRACKET,
175       CHAR_BACKSLASH,          CHAR_RIGHT_SQUARE_BRACKET,       CHAR_BACKSLASH,          CHAR_RIGHT_SQUARE_BRACKET,
176       CHAR_CIRCUMFLEX_ACCENT,  CHAR_UNDERSCORE,       CHAR_CIRCUMFLEX_ACCENT,  CHAR_UNDERSCORE,
177       CHAR_GRAVE_ACCENT,       7,       CHAR_GRAVE_ACCENT,       ESC_a,
178       -ESC_b,                  0,       -ESC_b,                  0,
179       -ESC_d,                  ESC_e,       -ESC_d,                  ESC_e,
180       ESC_f,                   0,       ESC_f,                   0,
# Line 202  static const short int escapes[] = { Line 202  static const short int escapes[] = {
202  /*  68 */     0,     0,    '|',     ',',    '%',   '_',    '>',    '?',  /*  68 */     0,     0,    '|',     ',',    '%',   '_',    '>',    '?',
203  /*  70 */     0,     0,      0,       0,      0,     0,      0,      0,  /*  70 */     0,     0,      0,       0,      0,     0,      0,      0,
204  /*  78 */     0,   '`',    ':',     '#',    '@',  '\'',    '=',    '"',  /*  78 */     0,   '`',    ':',     '#',    '@',  '\'',    '=',    '"',
205  /*  80 */     0,     7, -ESC_b,       0, -ESC_d, ESC_e,  ESC_f,      0,  /*  80 */     0, ESC_a, -ESC_b,       0, -ESC_d, ESC_e,  ESC_f,      0,
206  /*  88 */-ESC_h,     0,      0,     '{',      0,     0,      0,      0,  /*  88 */-ESC_h,     0,      0,     '{',      0,     0,      0,      0,
207  /*  90 */     0,     0, -ESC_k,     'l',      0, ESC_n,      0, -ESC_p,  /*  90 */     0,     0, -ESC_k,       0,      0, ESC_n,      0, -ESC_p,
208  /*  98 */     0, ESC_r,      0,     '}',      0,     0,      0,      0,  /*  98 */     0, ESC_r,      0,     '}',      0,     0,      0,      0,
209  /*  A0 */     0,   '~', -ESC_s, ESC_tee,      0,-ESC_v, -ESC_w,      0,  /*  A0 */     0,   '~', -ESC_s, ESC_tee,      0,-ESC_v, -ESC_w,      0,
210  /*  A8 */     0,-ESC_z,      0,       0,      0,   '[',      0,      0,  /*  A8 */     0,-ESC_z,      0,       0,      0,   '[',      0,      0,
# Line 219  static const short int escapes[] = { Line 219  static const short int escapes[] = {
219  /*  F0 */     0,     0,      0,       0,      0,     0,      0,      0,  /*  F0 */     0,     0,      0,       0,      0,     0,      0,      0,
220  /*  F8 */     0,     0,      0,       0,      0,     0,      0,      0  /*  F8 */     0,     0,      0,       0,      0,     0,      0,      0
221  };  };
222    
223    /* We also need a table of characters that may follow \c in an EBCDIC
224    environment for characters 0-31. */
225    
226    static unsigned char ebcdic_escape_c[] = "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
227    
228  #endif  #endif
229    
230    
# Line 458  static const char error_texts[] = Line 464  static const char error_texts[] =
464    "range out of order in character class\0"    "range out of order in character class\0"
465    "nothing to repeat\0"    "nothing to repeat\0"
466    /* 10 */    /* 10 */
467    "operand of unlimited repeat could match the empty string\0"  /** DEAD **/    "internal error: invalid forward reference offset\0"
468    "internal error: unexpected repeat\0"    "internal error: unexpected repeat\0"
469    "unrecognized character after (? or (?-\0"    "unrecognized character after (? or (?-\0"
470    "POSIX named classes are supported only within a class\0"    "POSIX named classes are supported only within a class\0"
# Line 527  static const char error_texts[] = Line 533  static const char error_texts[] =
533    "different names for subpatterns of the same number are not allowed\0"    "different names for subpatterns of the same number are not allowed\0"
534    "(*MARK) must have an argument\0"    "(*MARK) must have an argument\0"
535    "this version of PCRE is not compiled with Unicode property support\0"    "this version of PCRE is not compiled with Unicode property support\0"
536    #ifndef EBCDIC
537    "\\c must be followed by an ASCII character\0"    "\\c must be followed by an ASCII character\0"
538    #else
539      "\\c must be followed by a letter or one of [\\]^_?\0"
540    #endif
541    "\\k is not followed by a braced, angle-bracketed, or quoted name\0"    "\\k is not followed by a braced, angle-bracketed, or quoted name\0"
542    /* 70 */    /* 70 */
543    "internal error: unknown opcode in find_fixedlength()\0"    "internal error: unknown opcode in find_fixedlength()\0"
# Line 1425  else Line 1435  else
1435      c ^= 0x40;      c ^= 0x40;
1436  #else             /* EBCDIC coding */  #else             /* EBCDIC coding */
1437      if (c >= CHAR_a && c <= CHAR_z) c += 64;      if (c >= CHAR_a && c <= CHAR_z) c += 64;
1438      c ^= 0xC0;      if (c == CHAR_QUESTION_MARK)
1439          c = ('\\' == 188 && '`' == 74)? 0x5f : 0xff;
1440        else
1441          {
1442          for (i = 0; i < 32; i++)
1443            {
1444            if (c == ebcdic_escape_c[i]) break;
1445            }
1446          if (i < 32) c = i; else *errorcodeptr = ERR68;
1447          }
1448  #endif  #endif
1449      break;      break;
1450    
# Line 1704  Arguments: Line 1723  Arguments:
1723    utf      TRUE in UTF-8 / UTF-16 / UTF-32 mode    utf      TRUE in UTF-8 / UTF-16 / UTF-32 mode
1724    atend    TRUE if called when the pattern is complete    atend    TRUE if called when the pattern is complete
1725    cd       the "compile data" structure    cd       the "compile data" structure
1726      recurses    chain of recurse_check to catch mutual recursion
1727    
1728  Returns:   the fixed length,  Returns:   the fixed length,
1729               or -1 if there is no fixed length,               or -1 if there is no fixed length,
# Line 1713  Returns:   the fixed length, Line 1733  Returns:   the fixed length,
1733  */  */
1734    
1735  static int  static int
1736  find_fixedlength(pcre_uchar *code, BOOL utf, BOOL atend, compile_data *cd)  find_fixedlength(pcre_uchar *code, BOOL utf, BOOL atend, compile_data *cd,
1737      recurse_check *recurses)
1738  {  {
1739  int length = -1;  int length = -1;
1740    recurse_check this_recurse;
1741  register int branchlength = 0;  register int branchlength = 0;
1742  register pcre_uchar *cc = code + 1 + LINK_SIZE;  register pcre_uchar *cc = code + 1 + LINK_SIZE;
1743    
# Line 1741  for (;;) Line 1762  for (;;)
1762      case OP_ONCE:      case OP_ONCE:
1763      case OP_ONCE_NC:      case OP_ONCE_NC:
1764      case OP_COND:      case OP_COND:
1765      d = find_fixedlength(cc + ((op == OP_CBRA)? IMM2_SIZE : 0), utf, atend, cd);      d = find_fixedlength(cc + ((op == OP_CBRA)? IMM2_SIZE : 0), utf, atend, cd,
1766          recurses);
1767      if (d < 0) return d;      if (d < 0) return d;
1768      branchlength += d;      branchlength += d;
1769      do cc += GET(cc, 1); while (*cc == OP_ALT);      do cc += GET(cc, 1); while (*cc == OP_ALT);
# Line 1775  for (;;) Line 1797  for (;;)
1797      cs = ce = (pcre_uchar *)cd->start_code + GET(cc, 1);  /* Start subpattern */      cs = ce = (pcre_uchar *)cd->start_code + GET(cc, 1);  /* Start subpattern */
1798      do ce += GET(ce, 1); while (*ce == OP_ALT);           /* End subpattern */      do ce += GET(ce, 1); while (*ce == OP_ALT);           /* End subpattern */
1799      if (cc > cs && cc < ce) return -1;                    /* Recursion */      if (cc > cs && cc < ce) return -1;                    /* Recursion */
1800      d = find_fixedlength(cs + IMM2_SIZE, utf, atend, cd);      else   /* Check for mutual recursion */
1801          {
1802          recurse_check *r = recurses;
1803          for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
1804          if (r != NULL) return -1;   /* Mutual recursion */
1805          }
1806        this_recurse.prev = recurses;
1807        this_recurse.group = cs;
1808        d = find_fixedlength(cs + IMM2_SIZE, utf, atend, cd, &this_recurse);
1809      if (d < 0) return d;      if (d < 0) return d;
1810      branchlength += d;      branchlength += d;
1811      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
# Line 1788  for (;;) Line 1818  for (;;)
1818      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1819      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1820      do cc += GET(cc, 1); while (*cc == OP_ALT);      do cc += GET(cc, 1); while (*cc == OP_ALT);
1821      cc += PRIV(OP_lengths)[*cc];      cc += 1 + LINK_SIZE;
1822      break;      break;
1823    
1824      /* Skip over things that don't match chars */      /* Skip over things that don't match chars */
# Line 2362  Arguments: Line 2392  Arguments:
2392  Returns:      TRUE if what is matched could be empty  Returns:      TRUE if what is matched could be empty
2393  */  */
2394    
 typedef struct recurse_check {  
   struct recurse_check *prev;  
   const pcre_uchar *group;  
 } recurse_check;  
   
2395  static BOOL  static BOOL
2396  could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,  could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,
2397    BOOL utf, compile_data *cd, recurse_check *recurses)    BOOL utf, compile_data *cd, recurse_check *recurses)
# Line 2481  for (code = first_significant_code(code Line 2506  for (code = first_significant_code(code
2506    if (c == OP_BRA  || c == OP_BRAPOS ||    if (c == OP_BRA  || c == OP_BRAPOS ||
2507        c == OP_CBRA || c == OP_CBRAPOS ||        c == OP_CBRA || c == OP_CBRAPOS ||
2508        c == OP_ONCE || c == OP_ONCE_NC ||        c == OP_ONCE || c == OP_ONCE_NC ||
2509        c == OP_COND)        c == OP_COND || c == OP_SCOND)
2510      {      {
2511      BOOL empty_branch;      BOOL empty_branch;
2512      if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */      if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */
# Line 2497  for (code = first_significant_code(code Line 2522  for (code = first_significant_code(code
2522        empty_branch = FALSE;        empty_branch = FALSE;
2523        do        do
2524          {          {
2525          if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd,          if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd,
2526            recurses)) empty_branch = TRUE;            recurses)) empty_branch = TRUE;
2527          code += GET(code, 1);          code += GET(code, 1);
2528          }          }
# Line 3658  for (;;) Line 3683  for (;;)
3683        get_chr_property_list(code, utf, cd->fcc, list) : NULL;        get_chr_property_list(code, utf, cd->fcc, list) : NULL;
3684      list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;      list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;
3685    
3686      rec_limit = 10000;      rec_limit = 1000;
3687      if (end != NULL && compare_opcodes(end, utf, cd, list, end, &rec_limit))      if (end != NULL && compare_opcodes(end, utf, cd, list, end, &rec_limit))
3688        {        {
3689        switch(c)        switch(c)
# Line 3715  for (;;) Line 3740  for (;;)
3740    
3741        list[1] = (c & 1) == 0;        list[1] = (c & 1) == 0;
3742    
3743        rec_limit = 10000;        rec_limit = 1000;
3744        if (compare_opcodes(end, utf, cd, list, end, &rec_limit))        if (compare_opcodes(end, utf, cd, list, end, &rec_limit))
3745          {          {
3746          switch (c)          switch (c)
# Line 3880  didn't consider this to be a POSIX class Line 3905  didn't consider this to be a POSIX class
3905  The problem in trying to be exactly like Perl is in the handling of escapes. We  The problem in trying to be exactly like Perl is in the handling of escapes. We
3906  have to be sure that [abc[:x\]pqr] is *not* treated as containing a POSIX  have to be sure that [abc[:x\]pqr] is *not* treated as containing a POSIX
3907  class, but [abc[:x\]pqr:]] is (so that an error can be generated). The code  class, but [abc[:x\]pqr:]] is (so that an error can be generated). The code
3908  below handles the special case of \], but does not try to do any other escape  below handles the special cases \\ and \], but does not try to do any other
3909  processing. This makes it different from Perl for cases such as [:l\ower:]  escape processing. This makes it different from Perl for cases such as
3910  where Perl recognizes it as the POSIX class "lower" but PCRE does not recognize  [:l\ower:] where Perl recognizes it as the POSIX class "lower" but PCRE does
3911  "l\ower". This is a lesser evil than not diagnosing bad classes when Perl does,  not recognize "l\ower". This is a lesser evil than not diagnosing bad classes
3912  I think.  when Perl does, I think.
3913    
3914  A user pointed out that PCRE was rejecting [:a[:digit:]] whereas Perl was not.  A user pointed out that PCRE was rejecting [:a[:digit:]] whereas Perl was not.
3915  It seems that the appearance of a nested POSIX class supersedes an apparent  It seems that the appearance of a nested POSIX class supersedes an apparent
# Line 3911  pcre_uchar terminator;          /* Don't Line 3936  pcre_uchar terminator;          /* Don't
3936  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
3937  for (++ptr; *ptr != CHAR_NULL; ptr++)  for (++ptr; *ptr != CHAR_NULL; ptr++)
3938    {    {
3939    if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)    if (*ptr == CHAR_BACKSLASH &&
3940          (ptr[1] == CHAR_RIGHT_SQUARE_BRACKET ||
3941           ptr[1] == CHAR_BACKSLASH))
3942      ptr++;      ptr++;
3943    else if (*ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE;    else if ((*ptr == CHAR_LEFT_SQUARE_BRACKET && ptr[1] == terminator) ||
3944    else              *ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE;
3945      else if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
3946      {      {
3947      if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)      *endptr = ptr;
3948        {      return TRUE;
       *endptr = ptr;  
       return TRUE;  
       }  
     if (*ptr == CHAR_LEFT_SQUARE_BRACKET &&  
          (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||  
           ptr[1] == CHAR_EQUALS_SIGN) &&  
         check_posix_syntax(ptr, endptr))  
       return FALSE;  
3949      }      }
3950    }    }
3951  return FALSE;  return FALSE;
# Line 3979  have their offsets adjusted. That one of Line 3999  have their offsets adjusted. That one of
3999  is called, the partially compiled regex must be temporarily terminated with  is called, the partially compiled regex must be temporarily terminated with
4000  OP_END.  OP_END.
4001    
4002  This function has been extended with the possibility of forward references for  This function has been extended to cope with forward references for recursions
4003  recursions and subroutine calls. It must also check the list of such references  and subroutine calls. It must check the list of such references for the
4004  for the group we are dealing with. If it finds that one of the recursions in  group we are dealing with. If it finds that one of the recursions in the
4005  the current group is on this list, it adjusts the offset in the list, not the  current group is on this list, it does not adjust the value in the reference
4006  value in the reference (which is a group number).  (which is a group number). After the group has been scanned, all the offsets in
4007    the forward reference list for the group are adjusted.
4008    
4009  Arguments:  Arguments:
4010    group      points to the start of the group    group      points to the start of the group
# Line 3999  static void Line 4020  static void
4020  adjust_recurse(pcre_uchar *group, int adjust, BOOL utf, compile_data *cd,  adjust_recurse(pcre_uchar *group, int adjust, BOOL utf, compile_data *cd,
4021    size_t save_hwm_offset)    size_t save_hwm_offset)
4022  {  {
4023    int offset;
4024    pcre_uchar *hc;
4025  pcre_uchar *ptr = group;  pcre_uchar *ptr = group;
4026    
4027  while ((ptr = (pcre_uchar *)find_recurse(ptr, utf)) != NULL)  while ((ptr = (pcre_uchar *)find_recurse(ptr, utf)) != NULL)
4028    {    {
   int offset;  
   pcre_uchar *hc;  
   
   /* See if this recursion is on the forward reference list. If so, adjust the  
   reference. */  
   
4029    for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset; hc < cd->hwm;    for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset; hc < cd->hwm;
4030         hc += LINK_SIZE)         hc += LINK_SIZE)
4031      {      {
4032      offset = (int)GET(hc, 0);      offset = (int)GET(hc, 0);
4033      if (cd->start_code + offset == ptr + 1)      if (cd->start_code + offset == ptr + 1) break;
       {  
       PUT(hc, 0, offset + adjust);  
       break;  
       }  
4034      }      }
4035    
4036    /* Otherwise, adjust the recursion offset if it's after the start of this    /* If we have not found this recursion on the forward reference list, adjust
4037    group. */    the recursion's offset if it's after the start of this group. */
4038    
4039    if (hc >= cd->hwm)    if (hc >= cd->hwm)
4040      {      {
# Line 4031  while ((ptr = (pcre_uchar *)find_recurse Line 4044  while ((ptr = (pcre_uchar *)find_recurse
4044    
4045    ptr += 1 + LINK_SIZE;    ptr += 1 + LINK_SIZE;
4046    }    }
4047    
4048    /* Now adjust all forward reference offsets for the group. */
4049    
4050    for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset; hc < cd->hwm;
4051         hc += LINK_SIZE)
4052      {
4053      offset = (int)GET(hc, 0);
4054      PUT(hc, 0, offset + adjust);
4055      }
4056  }  }
4057    
4058    
# Line 4459  const pcre_uchar *tempptr; Line 4481  const pcre_uchar *tempptr;
4481  const pcre_uchar *nestptr = NULL;  const pcre_uchar *nestptr = NULL;
4482  pcre_uchar *previous = NULL;  pcre_uchar *previous = NULL;
4483  pcre_uchar *previous_callout = NULL;  pcre_uchar *previous_callout = NULL;
4484  size_t save_hwm_offset = 0;  size_t item_hwm_offset = 0;
4485  pcre_uint8 classbits[32];  pcre_uint8 classbits[32];
4486    
4487  /* We can fish out the UTF-8 setting once and for all into a BOOL, but we  /* We can fish out the UTF-8 setting once and for all into a BOOL, but we
# Line 4617  for (;; ptr++) Line 4639  for (;; ptr++)
4639    /* In the real compile phase, just check the workspace used by the forward    /* In the real compile phase, just check the workspace used by the forward
4640    reference list. */    reference list. */
4641    
4642    else if (cd->hwm > cd->start_workspace + cd->workspace_size -    else if (cd->hwm > cd->start_workspace + cd->workspace_size)
            WORK_SIZE_SAFETY_MARGIN)  
4643      {      {
4644      *errorcodeptr = ERR52;      *errorcodeptr = ERR52;
4645      goto FAILED;      goto FAILED;
# Line 4761  for (;; ptr++) Line 4782  for (;; ptr++)
4782      zeroreqchar = reqchar;      zeroreqchar = reqchar;
4783      zeroreqcharflags = reqcharflags;      zeroreqcharflags = reqcharflags;
4784      previous = code;      previous = code;
4785        item_hwm_offset = cd->hwm - cd->start_workspace;
4786      *code++ = ((options & PCRE_DOTALL) != 0)? OP_ALLANY: OP_ANY;      *code++ = ((options & PCRE_DOTALL) != 0)? OP_ALLANY: OP_ANY;
4787      break;      break;
4788    
# Line 4812  for (;; ptr++) Line 4834  for (;; ptr++)
4834      /* Handle a real character class. */      /* Handle a real character class. */
4835    
4836      previous = code;      previous = code;
4837        item_hwm_offset = cd->hwm - cd->start_workspace;
4838    
4839      /* PCRE supports POSIX class stuff inside a class. Perl gives an error if      /* PCRE supports POSIX class stuff inside a class. Perl gives an error if
4840      they are encountered at the top level, so we'll do that too. */      they are encountered at the top level, so we'll do that too. */
# Line 4917  for (;; ptr++) Line 4940  for (;; ptr++)
4940        (which is on the stack). We have to remember that there was XCLASS data,        (which is on the stack). We have to remember that there was XCLASS data,
4941        however. */        however. */
4942    
4943          if (class_uchardata > class_uchardata_base) xclass = TRUE;
4944    
4945        if (lengthptr != NULL && class_uchardata > class_uchardata_base)        if (lengthptr != NULL && class_uchardata > class_uchardata_base)
4946          {          {
         xclass = TRUE;  
4947          *lengthptr += (int)(class_uchardata - class_uchardata_base);          *lengthptr += (int)(class_uchardata - class_uchardata_base);
4948          class_uchardata = class_uchardata_base;          class_uchardata = class_uchardata_base;
4949          }          }
# Line 5022  for (;; ptr++) Line 5046  for (;; ptr++)
5046              ptr = tempptr + 1;              ptr = tempptr + 1;
5047              continue;              continue;
5048    
5049              /* For all other POSIX classes, no special action is taken in UCP              /* For the other POSIX classes (ascii, xdigit) we are going to fall
5050              mode. Fall through to the non_UCP case. */              through to the non-UCP case and build a bit map for characters with
5051                code points less than 256. If we are in a negated POSIX class
5052                within a non-negated overall class, characters with code points
5053                greater than 255 must all match. In the special case where we have
5054                not yet generated any xclass data, and this is the final item in
5055                the overall class, we need do nothing: later on, the opcode
5056                OP_NCLASS will be used to indicate that characters greater than 255
5057                are acceptable. If we have already seen an xclass item or one may
5058                follow (we have to assume that it might if this is not the end of
5059                the class), explicitly match all wide codepoints. */
5060    
5061              default:              default:
5062                if (!negate_class && local_negate &&
5063                    (xclass || tempptr[2] != CHAR_RIGHT_SQUARE_BRACKET))
5064                  {
5065                  *class_uchardata++ = XCL_RANGE;
5066                  class_uchardata += PRIV(ord2utf)(0x100, class_uchardata);
5067                  class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);
5068                  }
5069              break;              break;
5070              }              }
5071            }            }
# Line 5189  for (;; ptr++) Line 5229  for (;; ptr++)
5229                cd, PRIV(vspace_list));                cd, PRIV(vspace_list));
5230              continue;              continue;
5231    
 #ifdef SUPPORT_UCP  
5232              case ESC_p:              case ESC_p:
5233              case ESC_P:              case ESC_P:
5234    #ifdef SUPPORT_UCP
5235                {                {
5236                BOOL negated;                BOOL negated;
5237                unsigned int ptype = 0, pdata = 0;                unsigned int ptype = 0, pdata = 0;
# Line 5205  for (;; ptr++) Line 5245  for (;; ptr++)
5245                class_has_8bitchar--;                /* Undo! */                class_has_8bitchar--;                /* Undo! */
5246                continue;                continue;
5247                }                }
5248    #else
5249                *errorcodeptr = ERR45;
5250                goto FAILED;
5251  #endif  #endif
5252              /* Unrecognized escapes are faulted if PCRE is running in its              /* Unrecognized escapes are faulted if PCRE is running in its
5253              strict mode. By default, for compatibility with Perl, they are              strict mode. By default, for compatibility with Perl, they are
# Line 5361  for (;; ptr++) Line 5404  for (;; ptr++)
5404        CLASS_SINGLE_CHARACTER:        CLASS_SINGLE_CHARACTER:
5405        if (class_one_char < 2) class_one_char++;        if (class_one_char < 2) class_one_char++;
5406    
5407        /* If class_one_char is 1, we have the first single character in the        /* If xclass_has_prop is false and class_one_char is 1, we have the first
5408        class, and there have been no prior ranges, or XCLASS items generated by        single character in the class, and there have been no prior ranges, or
5409        escapes. If this is the final character in the class, we can optimize by        XCLASS items generated by escapes. If this is the final character in the
5410        turning the item into a 1-character OP_CHAR[I] if it's positive, or        class, we can optimize by turning the item into a 1-character OP_CHAR[I]
5411        OP_NOT[I] if it's negative. In the positive case, it can cause firstchar        if it's positive, or OP_NOT[I] if it's negative. In the positive case, it
5412        to be set. Otherwise, there can be no first char if this item is first,        can cause firstchar to be set. Otherwise, there can be no first char if
5413        whatever repeat count may follow. In the case of reqchar, save the        this item is first, whatever repeat count may follow. In the case of
5414        previous value for reinstating. */        reqchar, save the previous value for reinstating. */
5415    
5416        if (!inescq && class_one_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)        if (!inescq &&
5417    #ifdef SUPPORT_UCP
5418              !xclass_has_prop &&
5419    #endif
5420              class_one_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
5421          {          {
5422          ptr++;          ptr++;
5423          zeroreqchar = reqchar;          zeroreqchar = reqchar;
# Line 5486  for (;; ptr++) Line 5533  for (;; ptr++)
5533      actual compiled code. */      actual compiled code. */
5534    
5535  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
5536      if (xclass && (!should_flip_negation || (options & PCRE_UCP) != 0))      if (xclass && (xclass_has_prop || !should_flip_negation ||
5537            (options & PCRE_UCP) != 0))
5538  #elif !defined COMPILE_PCRE8  #elif !defined COMPILE_PCRE8
5539      if (xclass && !should_flip_negation)      if (xclass && (xclass_has_prop || !should_flip_negation))
5540  #endif  #endif
5541  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5542        {        {
# Line 5518  for (;; ptr++) Line 5566  for (;; ptr++)
5566        PUT(previous, 1, (int)(code - previous));        PUT(previous, 1, (int)(code - previous));
5567        break;   /* End of class handling */        break;   /* End of class handling */
5568        }        }
 #endif  
5569    
5570      /* Even though any XCLASS list is now discarded, we must allow for      /* Even though any XCLASS list is now discarded, we must allow for
5571      its memory. */      its memory. */
5572    
5573      if (lengthptr != NULL)      if (lengthptr != NULL)
5574        *lengthptr += (int)(class_uchardata - class_uchardata_base);        *lengthptr += (int)(class_uchardata - class_uchardata_base);
5575    #endif
5576    
5577      /* If there are no characters > 255, or they are all to be included or      /* If there are no characters > 255, or they are all to be included or
5578      excluded, set the opcode to OP_CLASS or OP_NCLASS, depending on whether the      excluded, set the opcode to OP_CLASS or OP_NCLASS, depending on whether the
# Line 5924  for (;; ptr++) Line 5972  for (;; ptr++)
5972        {        {
5973        register int i;        register int i;
5974        int len = (int)(code - previous);        int len = (int)(code - previous);
5975          size_t base_hwm_offset = item_hwm_offset;
5976        pcre_uchar *bralink = NULL;        pcre_uchar *bralink = NULL;
5977        pcre_uchar *brazeroptr = NULL;        pcre_uchar *brazeroptr = NULL;
5978    
# Line 5978  for (;; ptr++) Line 6027  for (;; ptr++)
6027          if (repeat_max <= 1)    /* Covers 0, 1, and unlimited */          if (repeat_max <= 1)    /* Covers 0, 1, and unlimited */
6028            {            {
6029            *code = OP_END;            *code = OP_END;
6030            adjust_recurse(previous, 1, utf, cd, save_hwm_offset);            adjust_recurse(previous, 1, utf, cd, item_hwm_offset);
6031            memmove(previous + 1, previous, IN_UCHARS(len));            memmove(previous + 1, previous, IN_UCHARS(len));
6032            code++;            code++;
6033            if (repeat_max == 0)            if (repeat_max == 0)
# Line 6002  for (;; ptr++) Line 6051  for (;; ptr++)
6051            {            {
6052            int offset;            int offset;
6053            *code = OP_END;            *code = OP_END;
6054            adjust_recurse(previous, 2 + LINK_SIZE, utf, cd, save_hwm_offset);            adjust_recurse(previous, 2 + LINK_SIZE, utf, cd, item_hwm_offset);
6055            memmove(previous + 2 + LINK_SIZE, previous, IN_UCHARS(len));            memmove(previous + 2 + LINK_SIZE, previous, IN_UCHARS(len));
6056            code += 2 + LINK_SIZE;            code += 2 + LINK_SIZE;
6057            *previous++ = OP_BRAZERO + repeat_type;            *previous++ = OP_BRAZERO + repeat_type;
# Line 6070  for (;; ptr++) Line 6119  for (;; ptr++)
6119    
6120                while (cd->hwm > cd->start_workspace + cd->workspace_size -                while (cd->hwm > cd->start_workspace + cd->workspace_size -
6121                       WORK_SIZE_SAFETY_MARGIN -                       WORK_SIZE_SAFETY_MARGIN -
6122                       (this_hwm_offset - save_hwm_offset))                       (this_hwm_offset - base_hwm_offset))
6123                  {                  {
6124                  *errorcodeptr = expand_workspace(cd);                  *errorcodeptr = expand_workspace(cd);
6125                  if (*errorcodeptr != 0) goto FAILED;                  if (*errorcodeptr != 0) goto FAILED;
6126                  }                  }
6127    
6128                for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset;                for (hc = (pcre_uchar *)cd->start_workspace + base_hwm_offset;
6129                     hc < (pcre_uchar *)cd->start_workspace + this_hwm_offset;                     hc < (pcre_uchar *)cd->start_workspace + this_hwm_offset;
6130                     hc += LINK_SIZE)                     hc += LINK_SIZE)
6131                  {                  {
6132                  PUT(cd->hwm, 0, GET(hc, 0) + len);                  PUT(cd->hwm, 0, GET(hc, 0) + len);
6133                  cd->hwm += LINK_SIZE;                  cd->hwm += LINK_SIZE;
6134                  }                  }
6135                save_hwm_offset = this_hwm_offset;                base_hwm_offset = this_hwm_offset;
6136                code += len;                code += len;
6137                }                }
6138              }              }
# Line 6151  for (;; ptr++) Line 6200  for (;; ptr++)
6200    
6201            while (cd->hwm > cd->start_workspace + cd->workspace_size -            while (cd->hwm > cd->start_workspace + cd->workspace_size -
6202                   WORK_SIZE_SAFETY_MARGIN -                   WORK_SIZE_SAFETY_MARGIN -
6203                   (this_hwm_offset - save_hwm_offset))                   (this_hwm_offset - base_hwm_offset))
6204              {              {
6205              *errorcodeptr = expand_workspace(cd);              *errorcodeptr = expand_workspace(cd);
6206              if (*errorcodeptr != 0) goto FAILED;              if (*errorcodeptr != 0) goto FAILED;
6207              }              }
6208    
6209            for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset;            for (hc = (pcre_uchar *)cd->start_workspace + base_hwm_offset;
6210                 hc < (pcre_uchar *)cd->start_workspace + this_hwm_offset;                 hc < (pcre_uchar *)cd->start_workspace + this_hwm_offset;
6211                 hc += LINK_SIZE)                 hc += LINK_SIZE)
6212              {              {
6213              PUT(cd->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1));              PUT(cd->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1));
6214              cd->hwm += LINK_SIZE;              cd->hwm += LINK_SIZE;
6215              }              }
6216            save_hwm_offset = this_hwm_offset;            base_hwm_offset = this_hwm_offset;
6217            code += len;            code += len;
6218            }            }
6219    
# Line 6247  for (;; ptr++) Line 6296  for (;; ptr++)
6296              while (*scode == OP_ALT);              while (*scode == OP_ALT);
6297              }              }
6298    
6299              /* A conditional group with only one branch has an implicit empty
6300              alternative branch. */
6301    
6302              if (*bracode == OP_COND && bracode[GET(bracode,1)] != OP_ALT)
6303                *bracode = OP_SCOND;
6304    
6305            /* Handle possessive quantifiers. */            /* Handle possessive quantifiers. */
6306    
6307            if (possessive_quantifier)            if (possessive_quantifier)
# Line 6260  for (;; ptr++) Line 6315  for (;; ptr++)
6315                {                {
6316                int nlen = (int)(code - bracode);                int nlen = (int)(code - bracode);
6317                *code = OP_END;                *code = OP_END;
6318                adjust_recurse(bracode, 1 + LINK_SIZE, utf, cd, save_hwm_offset);                adjust_recurse(bracode, 1 + LINK_SIZE, utf, cd, item_hwm_offset);
6319                memmove(bracode + 1 + LINK_SIZE, bracode, IN_UCHARS(nlen));                memmove(bracode + 1 + LINK_SIZE, bracode, IN_UCHARS(nlen));
6320                code += 1 + LINK_SIZE;                code += 1 + LINK_SIZE;
6321                nlen += 1 + LINK_SIZE;                nlen += 1 + LINK_SIZE;
6322                *bracode = OP_BRAPOS;                *bracode = (*bracode == OP_COND)? OP_BRAPOS : OP_SBRAPOS;
6323                *code++ = OP_KETRPOS;                *code++ = OP_KETRPOS;
6324                PUTINC(code, 0, nlen);                PUTINC(code, 0, nlen);
6325                PUT(bracode, 1, nlen);                PUT(bracode, 1, nlen);
# Line 6394  for (;; ptr++) Line 6449  for (;; ptr++)
6449          else          else
6450            {            {
6451            *code = OP_END;            *code = OP_END;
6452            adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm_offset);            adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, item_hwm_offset);
6453            memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));            memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));
6454            code += 1 + LINK_SIZE;            code += 1 + LINK_SIZE;
6455            len += 1 + LINK_SIZE;            len += 1 + LINK_SIZE;
# Line 6443  for (;; ptr++) Line 6498  for (;; ptr++)
6498    
6499          default:          default:
6500          *code = OP_END;          *code = OP_END;
6501          adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm_offset);          adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, item_hwm_offset);
6502          memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));          memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));
6503          code += 1 + LINK_SIZE;          code += 1 + LINK_SIZE;
6504          len += 1 + LINK_SIZE;          len += 1 + LINK_SIZE;
# Line 6476  for (;; ptr++) Line 6531  for (;; ptr++)
6531    
6532      /* First deal with comments. Putting this code right at the start ensures      /* First deal with comments. Putting this code right at the start ensures
6533      that comments have no bad side effects. */      that comments have no bad side effects. */
6534    
6535      if (ptr[0] == CHAR_QUESTION_MARK && ptr[1] == CHAR_NUMBER_SIGN)      if (ptr[0] == CHAR_QUESTION_MARK && ptr[1] == CHAR_NUMBER_SIGN)
6536        {        {
6537        ptr += 2;        ptr += 2;
6538        while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;        while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
6539        if (*ptr == CHAR_NULL)        if (*ptr == CHAR_NULL)
6540          {          {
6541          *errorcodeptr = ERR18;          *errorcodeptr = ERR18;
6542          goto FAILED;          goto FAILED;
6543          }          }
6544        continue;        continue;
6545        }        }
6546    
6547      /* Now deal with various "verbs" that can be introduced by '*'. */      /* Now deal with various "verbs" that can be introduced by '*'. */
6548    
# Line 6579  for (;; ptr++) Line 6634  for (;; ptr++)
6634                goto FAILED;                goto FAILED;
6635                }                }
6636              setverb = *code++ = verbs[i].op_arg;              setverb = *code++ = verbs[i].op_arg;
6637              *code++ = arglen;              if (lengthptr != NULL)    /* In pass 1 just add in the length */
6638              memcpy(code, arg, IN_UCHARS(arglen));                {                       /* to avoid potential workspace */
6639              code += arglen;                *lengthptr += arglen;   /* overflow. */
6640                  *code++ = 0;
6641                  }
6642                else
6643                  {
6644                  *code++ = arglen;
6645                  memcpy(code, arg, IN_UCHARS(arglen));
6646                  code += arglen;
6647                  }
6648              *code++ = 0;              *code++ = 0;
6649              }              }
6650    
# Line 6616  for (;; ptr++) Line 6679  for (;; ptr++)
6679      newoptions = options;      newoptions = options;
6680      skipbytes = 0;      skipbytes = 0;
6681      bravalue = OP_CBRA;      bravalue = OP_CBRA;
6682      save_hwm_offset = cd->hwm - cd->start_workspace;      item_hwm_offset = cd->hwm - cd->start_workspace;
6683      reset_bracount = FALSE;      reset_bracount = FALSE;
6684    
6685      /* Deal with the extended parentheses; all are introduced by '?', and the      /* Deal with the extended parentheses; all are introduced by '?', and the
# Line 6634  for (;; ptr++) Line 6697  for (;; ptr++)
6697          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
6698          case CHAR_VERTICAL_LINE:  /* Reset capture count for each branch */          case CHAR_VERTICAL_LINE:  /* Reset capture count for each branch */
6699          reset_bracount = TRUE;          reset_bracount = TRUE;
6700            cd->dupgroups = TRUE;     /* Record (?| encountered */
6701          /* Fall through */          /* Fall through */
6702    
6703          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
# Line 6679  for (;; ptr++) Line 6743  for (;; ptr++)
6743          if (tempptr[1] == CHAR_QUESTION_MARK &&          if (tempptr[1] == CHAR_QUESTION_MARK &&
6744                (tempptr[2] == CHAR_EQUALS_SIGN ||                (tempptr[2] == CHAR_EQUALS_SIGN ||
6745                 tempptr[2] == CHAR_EXCLAMATION_MARK ||                 tempptr[2] == CHAR_EXCLAMATION_MARK ||
6746                 tempptr[2] == CHAR_LESS_THAN_SIGN))                   (tempptr[2] == CHAR_LESS_THAN_SIGN &&
6747                       (tempptr[3] == CHAR_EQUALS_SIGN ||
6748                        tempptr[3] == CHAR_EXCLAMATION_MARK))))
6749            {            {
6750            cd->iscondassert = TRUE;            cd->iscondassert = TRUE;
6751            break;            break;
# Line 6732  for (;; ptr++) Line 6798  for (;; ptr++)
6798            {            {
6799            while (IS_DIGIT(*ptr))            while (IS_DIGIT(*ptr))
6800              {              {
6801                if (recno > INT_MAX / 10 - 1)  /* Integer overflow */
6802                  {
6803                  while (IS_DIGIT(*ptr)) ptr++;
6804                  *errorcodeptr = ERR61;
6805                  goto FAILED;
6806                  }
6807              recno = recno * 10 + (int)(*ptr - CHAR_0);              recno = recno * 10 + (int)(*ptr - CHAR_0);
6808              ptr++;              ptr++;
6809              }              }
# Line 6760  for (;; ptr++) Line 6832  for (;; ptr++)
6832              ptr++;              ptr++;
6833              }              }
6834            namelen = (int)(ptr - name);            namelen = (int)(ptr - name);
6835            if (lengthptr != NULL) *lengthptr += IMM2_SIZE;            if (lengthptr != NULL) skipbytes += IMM2_SIZE;
6836            }            }
6837    
6838          /* Check the terminator */          /* Check the terminator */
# Line 6866  for (;; ptr++) Line 6938  for (;; ptr++)
6938                *errorcodeptr = ERR15;                *errorcodeptr = ERR15;
6939                goto FAILED;                goto FAILED;
6940                }                }
6941                if (recno > INT_MAX / 10 - 1)   /* Integer overflow */
6942                  {
6943                  *errorcodeptr = ERR61;
6944                  goto FAILED;
6945                  }
6946              recno = recno * 10 + name[i] - CHAR_0;              recno = recno * 10 + name[i] - CHAR_0;
6947              }              }
6948            if (recno == 0) recno = RREF_ANY;            if (recno == 0) recno = RREF_ANY;
# Line 7142  for (;; ptr++) Line 7219  for (;; ptr++)
7219          if (lengthptr != NULL)          if (lengthptr != NULL)
7220            {            {
7221            named_group *ng;            named_group *ng;
7222              recno = 0;
7223    
7224            if (namelen == 0)            if (namelen == 0)
7225              {              {
# Line 7159  for (;; ptr++) Line 7237  for (;; ptr++)
7237              goto FAILED;              goto FAILED;
7238              }              }
7239    
           /* The name table does not exist in the first pass; instead we must  
           scan the list of names encountered so far in order to get the  
           number. If the name is not found, set the value to 0 for a forward  
           reference. */  
   
           ng = cd->named_groups;  
           for (i = 0; i < cd->names_found; i++, ng++)  
             {  
             if (namelen == ng->length &&  
                 STRNCMP_UC_UC(name, ng->name, namelen) == 0)  
               break;  
             }  
           recno = (i < cd->names_found)? ng->number : 0;  
   
7240            /* Count named back references. */            /* Count named back references. */
7241    
7242            if (!is_recurse) cd->namedrefcount++;            if (!is_recurse) cd->namedrefcount++;
# Line 7182  for (;; ptr++) Line 7246  for (;; ptr++)
7246            16-bit data item. */            16-bit data item. */
7247    
7248            *lengthptr += IMM2_SIZE;            *lengthptr += IMM2_SIZE;
7249    
7250              /* If this is a forward reference and we are within a (?|...) group,
7251              the reference may end up as the number of a group which we are
7252              currently inside, that is, it could be a recursive reference. In the
7253              real compile this will be picked up and the reference wrapped with
7254              OP_ONCE to make it atomic, so we must space in case this occurs. */
7255    
7256              /* In fact, this can happen for a non-forward reference because
7257              another group with the same number might be created later. This
7258              issue is fixed "properly" in PCRE2. As PCRE1 is now in maintenance
7259              only mode, we finesse the bug by allowing more memory always. */
7260    
7261              *lengthptr += 2 + 2*LINK_SIZE;
7262    
7263              /* It is even worse than that. The current reference may be to an
7264              existing named group with a different number (so apparently not
7265              recursive) but which later on is also attached to a group with the
7266              current number. This can only happen if $(| has been previous
7267              encountered. In that case, we allow yet more memory, just in case.
7268              (Again, this is fixed "properly" in PCRE2. */
7269    
7270              if (cd->dupgroups) *lengthptr += 4 + 4*LINK_SIZE;
7271    
7272              /* Otherwise, check for recursion here. The name table does not exist
7273              in the first pass; instead we must scan the list of names encountered
7274              so far in order to get the number. If the name is not found, leave
7275              the value of recno as 0 for a forward reference. */
7276    
7277              else
7278                {
7279                ng = cd->named_groups;
7280                for (i = 0; i < cd->names_found; i++, ng++)
7281                  {
7282                  if (namelen == ng->length &&
7283                      STRNCMP_UC_UC(name, ng->name, namelen) == 0)
7284                    {
7285                    open_capitem *oc;
7286                    recno = ng->number;
7287                    if (is_recurse) break;
7288                    for (oc = cd->open_caps; oc != NULL; oc = oc->next)
7289                      {
7290                      if (oc->number == recno)
7291                        {
7292                        oc->flag = TRUE;
7293                        break;
7294                        }
7295                      }
7296                    }
7297                  }
7298                }
7299            }            }
7300    
7301          /* In the real compile, search the name table. We check the name          /* In the real compile, search the name table. We check the name
# Line 7228  for (;; ptr++) Line 7342  for (;; ptr++)
7342            for (i++; i < cd->names_found; i++)            for (i++; i < cd->names_found; i++)
7343              {              {
7344              if (STRCMP_UC_UC(slot + IMM2_SIZE, cslot + IMM2_SIZE) != 0) break;              if (STRCMP_UC_UC(slot + IMM2_SIZE, cslot + IMM2_SIZE) != 0) break;
   
   
7345              count++;              count++;
7346              cslot += cd->name_entry_size;              cslot += cd->name_entry_size;
7347              }              }
# Line 7238  for (;; ptr++) Line 7350  for (;; ptr++)
7350              {              {
7351              if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;              if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
7352              previous = code;              previous = code;
7353                item_hwm_offset = cd->hwm - cd->start_workspace;
7354              *code++ = ((options & PCRE_CASELESS) != 0)? OP_DNREFI : OP_DNREF;              *code++ = ((options & PCRE_CASELESS) != 0)? OP_DNREFI : OP_DNREF;
7355              PUT2INC(code, 0, index);              PUT2INC(code, 0, index);
7356              PUT2INC(code, 0, count);              PUT2INC(code, 0, count);
# Line 7275  for (;; ptr++) Line 7388  for (;; ptr++)
7388    
7389    
7390          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
7391          case CHAR_R:              /* Recursion */          case CHAR_R:              /* Recursion, same as (?0) */
7392          ptr++;                    /* Same as (?0)      */          recno = 0;
7393          /* Fall through */          if (*(++ptr) != CHAR_RIGHT_PARENTHESIS)
7394              {
7395              *errorcodeptr = ERR29;
7396              goto FAILED;
7397              }
7398            goto HANDLE_RECURSION;
7399    
7400    
7401          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
# Line 7314  for (;; ptr++) Line 7432  for (;; ptr++)
7432    
7433            recno = 0;            recno = 0;
7434            while(IS_DIGIT(*ptr))            while(IS_DIGIT(*ptr))
7435                {
7436                if (recno > INT_MAX / 10 - 1) /* Integer overflow */
7437                  {
7438                  while (IS_DIGIT(*ptr)) ptr++;
7439                  *errorcodeptr = ERR61;
7440                  goto FAILED;
7441                  }
7442              recno = recno * 10 + *ptr++ - CHAR_0;              recno = recno * 10 + *ptr++ - CHAR_0;
7443                }
7444    
7445            if (*ptr != (pcre_uchar)terminator)            if (*ptr != (pcre_uchar)terminator)
7446              {              {
# Line 7351  for (;; ptr++) Line 7477  for (;; ptr++)
7477            HANDLE_RECURSION:            HANDLE_RECURSION:
7478    
7479            previous = code;            previous = code;
7480              item_hwm_offset = cd->hwm - cd->start_workspace;
7481            called = cd->start_code;            called = cd->start_code;
7482    
7483            /* When we are actually compiling, find the bracket that is being            /* When we are actually compiling, find the bracket that is being
# Line 7552  for (;; ptr++) Line 7679  for (;; ptr++)
7679        previous = NULL;        previous = NULL;
7680        cd->iscondassert = FALSE;        cd->iscondassert = FALSE;
7681        }        }
7682      else previous = code;      else
7683          {
7684          previous = code;
7685          item_hwm_offset = cd->hwm - cd->start_workspace;
7686          }
7687    
7688      *code = bravalue;      *code = bravalue;
7689      tempcode = code;      tempcode = code;
# Line 7800  for (;; ptr++) Line 7931  for (;; ptr++)
7931          const pcre_uchar *p;          const pcre_uchar *p;
7932          pcre_uint32 cf;          pcre_uint32 cf;
7933    
7934          save_hwm_offset = cd->hwm - cd->start_workspace;   /* Normally this is set when '(' is read */          item_hwm_offset = cd->hwm - cd->start_workspace;   /* Normally this is set when '(' is read */
7935          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
7936            CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;            CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
7937    
# Line 7829  for (;; ptr++) Line 7960  for (;; ptr++)
7960          if (*p != (pcre_uchar)terminator)          if (*p != (pcre_uchar)terminator)
7961            {            {
7962            *errorcodeptr = ERR57;            *errorcodeptr = ERR57;
7963            break;            goto FAILED;
7964            }            }
7965          ptr++;          ptr++;
7966          goto HANDLE_NUMERICAL_RECURSION;          goto HANDLE_NUMERICAL_RECURSION;
# Line 7844  for (;; ptr++) Line 7975  for (;; ptr++)
7975            ptr[1] != CHAR_APOSTROPHE && ptr[1] != CHAR_LEFT_CURLY_BRACKET))            ptr[1] != CHAR_APOSTROPHE && ptr[1] != CHAR_LEFT_CURLY_BRACKET))
7976            {            {
7977            *errorcodeptr = ERR69;            *errorcodeptr = ERR69;
7978            break;            goto FAILED;
7979            }            }
7980          is_recurse = FALSE;          is_recurse = FALSE;
7981          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
# Line 7868  for (;; ptr++) Line 7999  for (;; ptr++)
7999          HANDLE_REFERENCE:          HANDLE_REFERENCE:
8000          if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;          if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
8001          previous = code;          previous = code;
8002            item_hwm_offset = cd->hwm - cd->start_workspace;
8003          *code++ = ((options & PCRE_CASELESS) != 0)? OP_REFI : OP_REF;          *code++ = ((options & PCRE_CASELESS) != 0)? OP_REFI : OP_REF;
8004          PUT2INC(code, 0, recno);          PUT2INC(code, 0, recno);
8005          cd->backref_map |= (recno < 32)? (1 << recno) : 1;          cd->backref_map |= (recno < 32)? (1 << recno) : 1;
# Line 7897  for (;; ptr++) Line 8029  for (;; ptr++)
8029          if (!get_ucp(&ptr, &negated, &ptype, &pdata, errorcodeptr))          if (!get_ucp(&ptr, &negated, &ptype, &pdata, errorcodeptr))
8030            goto FAILED;            goto FAILED;
8031          previous = code;          previous = code;
8032            item_hwm_offset = cd->hwm - cd->start_workspace;
8033          *code++ = ((escape == ESC_p) != negated)? OP_PROP : OP_NOTPROP;          *code++ = ((escape == ESC_p) != negated)? OP_PROP : OP_NOTPROP;
8034          *code++ = ptype;          *code++ = ptype;
8035          *code++ = pdata;          *code++ = pdata;
# Line 7937  for (;; ptr++) Line 8070  for (;; ptr++)
8070    
8071            {            {
8072            previous = (escape > ESC_b && escape < ESC_Z)? code : NULL;            previous = (escape > ESC_b && escape < ESC_Z)? code : NULL;
8073              item_hwm_offset = cd->hwm - cd->start_workspace;
8074            *code++ = (!utf && escape == ESC_C)? OP_ALLANY : escape;            *code++ = (!utf && escape == ESC_C)? OP_ALLANY : escape;
8075            }            }
8076          }          }
# Line 7980  for (;; ptr++) Line 8114  for (;; ptr++)
8114    
8115      ONE_CHAR:      ONE_CHAR:
8116      previous = code;      previous = code;
8117        item_hwm_offset = cd->hwm - cd->start_workspace;
8118    
8119      /* For caseless UTF-8 mode when UCP support is available, check whether      /* For caseless UTF-8 mode when UCP support is available, check whether
8120      this character has more than one other case. If so, generate a special      this character has more than one other case. If so, generate a special
# Line 8288  for (;;) Line 8423  for (;;)
8423        int fixed_length;        int fixed_length;
8424        *code = OP_END;        *code = OP_END;
8425        fixed_length = find_fixedlength(last_branch,  (options & PCRE_UTF8) != 0,        fixed_length = find_fixedlength(last_branch,  (options & PCRE_UTF8) != 0,
8426          FALSE, cd);          FALSE, cd, NULL);
8427        DPRINTF(("fixed length = %d\n", fixed_length));        DPRINTF(("fixed length = %d\n", fixed_length));
8428        if (fixed_length == -3)        if (fixed_length == -3)
8429          {          {
# Line 9155  cd->names_found = 0; Line 9290  cd->names_found = 0;
9290  cd->name_entry_size = 0;  cd->name_entry_size = 0;
9291  cd->name_table = NULL;  cd->name_table = NULL;
9292  cd->dupnames = FALSE;  cd->dupnames = FALSE;
9293    cd->dupgroups = FALSE;
9294  cd->namedrefcount = 0;  cd->namedrefcount = 0;
9295  cd->start_code = cworkspace;  cd->start_code = cworkspace;
9296  cd->hwm = cworkspace;  cd->hwm = cworkspace;
# Line 9327  if (cd->hwm > cd->start_workspace) Line 9463  if (cd->hwm > cd->start_workspace)
9463      int offset, recno;      int offset, recno;
9464      cd->hwm -= LINK_SIZE;      cd->hwm -= LINK_SIZE;
9465      offset = GET(cd->hwm, 0);      offset = GET(cd->hwm, 0);
9466    
9467        /* Check that the hwm handling hasn't gone wrong. This whole area is
9468        rewritten in PCRE2 because there are some obscure cases. */
9469    
9470        if (offset == 0 || codestart[offset-1] != OP_RECURSE)
9471          {
9472          errorcode = ERR10;
9473          break;
9474          }
9475    
9476      recno = GET(codestart, offset);      recno = GET(codestart, offset);
9477      if (recno != prev_recno)      if (recno != prev_recno)
9478        {        {
# Line 9357  used in this code because at least one c Line 9503  used in this code because at least one c
9503  "const" attribute if the cast (pcre_uchar *)codestart is used directly in the  "const" attribute if the cast (pcre_uchar *)codestart is used directly in the
9504  function call. */  function call. */
9505    
9506  if ((options & PCRE_NO_AUTO_POSSESS) == 0)  if (errorcode == 0 && (options & PCRE_NO_AUTO_POSSESS) == 0)
9507    {    {
9508    pcre_uchar *temp = (pcre_uchar *)codestart;    pcre_uchar *temp = (pcre_uchar *)codestart;
9509    auto_possessify(temp, utf, cd);    auto_possessify(temp, utf, cd);
# Line 9371  OP_RECURSE that are not fixed length get Line 9517  OP_RECURSE that are not fixed length get
9517  exceptional ones forgo this. We scan the pattern to check that they are fixed  exceptional ones forgo this. We scan the pattern to check that they are fixed
9518  length, and set their lengths. */  length, and set their lengths. */
9519    
9520  if (cd->check_lookbehind)  if (errorcode == 0 && cd->check_lookbehind)
9521    {    {
9522    pcre_uchar *cc = (pcre_uchar *)codestart;    pcre_uchar *cc = (pcre_uchar *)codestart;
9523    
# Line 9391  if (cd->check_lookbehind) Line 9537  if (cd->check_lookbehind)
9537        int end_op = *be;        int end_op = *be;
9538        *be = OP_END;        *be = OP_END;
9539        fixed_length = find_fixedlength(cc, (re->options & PCRE_UTF8) != 0, TRUE,        fixed_length = find_fixedlength(cc, (re->options & PCRE_UTF8) != 0, TRUE,
9540          cd);          cd, NULL);
9541        *be = end_op;        *be = end_op;
9542        DPRINTF(("fixed length = %d\n", fixed_length));        DPRINTF(("fixed length = %d\n", fixed_length));
9543        if (fixed_length < 0)        if (fixed_length < 0)
# Line 9584  return (pcre32 *)re; Line 9730  return (pcre32 *)re;
9730  }  }
9731    
9732  /* End of pcre_compile.c */  /* End of pcre_compile.c */
   

Legend:
Removed from v.1538  
changed lines
  Added in v.1608

  ViewVC Help
Powered by ViewVC 1.1.5