/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1538 by ph10, Sun Mar 29 11:22:24 2015 UTC revision 1616 by ph10, Mon Nov 30 17:44:45 2015 UTC
# Line 174  static const short int escapes[] = { Line 174  static const short int escapes[] = {
174       -ESC_Z,                  CHAR_LEFT_SQUARE_BRACKET,       -ESC_Z,                  CHAR_LEFT_SQUARE_BRACKET,
175       CHAR_BACKSLASH,          CHAR_RIGHT_SQUARE_BRACKET,       CHAR_BACKSLASH,          CHAR_RIGHT_SQUARE_BRACKET,
176       CHAR_CIRCUMFLEX_ACCENT,  CHAR_UNDERSCORE,       CHAR_CIRCUMFLEX_ACCENT,  CHAR_UNDERSCORE,
177       CHAR_GRAVE_ACCENT,       7,       CHAR_GRAVE_ACCENT,       ESC_a,
178       -ESC_b,                  0,       -ESC_b,                  0,
179       -ESC_d,                  ESC_e,       -ESC_d,                  ESC_e,
180       ESC_f,                   0,       ESC_f,                   0,
# Line 202  static const short int escapes[] = { Line 202  static const short int escapes[] = {
202  /*  68 */     0,     0,    '|',     ',',    '%',   '_',    '>',    '?',  /*  68 */     0,     0,    '|',     ',',    '%',   '_',    '>',    '?',
203  /*  70 */     0,     0,      0,       0,      0,     0,      0,      0,  /*  70 */     0,     0,      0,       0,      0,     0,      0,      0,
204  /*  78 */     0,   '`',    ':',     '#',    '@',  '\'',    '=',    '"',  /*  78 */     0,   '`',    ':',     '#',    '@',  '\'',    '=',    '"',
205  /*  80 */     0,     7, -ESC_b,       0, -ESC_d, ESC_e,  ESC_f,      0,  /*  80 */     0, ESC_a, -ESC_b,       0, -ESC_d, ESC_e,  ESC_f,      0,
206  /*  88 */-ESC_h,     0,      0,     '{',      0,     0,      0,      0,  /*  88 */-ESC_h,     0,      0,     '{',      0,     0,      0,      0,
207  /*  90 */     0,     0, -ESC_k,     'l',      0, ESC_n,      0, -ESC_p,  /*  90 */     0,     0, -ESC_k,       0,      0, ESC_n,      0, -ESC_p,
208  /*  98 */     0, ESC_r,      0,     '}',      0,     0,      0,      0,  /*  98 */     0, ESC_r,      0,     '}',      0,     0,      0,      0,
209  /*  A0 */     0,   '~', -ESC_s, ESC_tee,      0,-ESC_v, -ESC_w,      0,  /*  A0 */     0,   '~', -ESC_s, ESC_tee,      0,-ESC_v, -ESC_w,      0,
210  /*  A8 */     0,-ESC_z,      0,       0,      0,   '[',      0,      0,  /*  A8 */     0,-ESC_z,      0,       0,      0,   '[',      0,      0,
# Line 219  static const short int escapes[] = { Line 219  static const short int escapes[] = {
219  /*  F0 */     0,     0,      0,       0,      0,     0,      0,      0,  /*  F0 */     0,     0,      0,       0,      0,     0,      0,      0,
220  /*  F8 */     0,     0,      0,       0,      0,     0,      0,      0  /*  F8 */     0,     0,      0,       0,      0,     0,      0,      0
221  };  };
222    
223    /* We also need a table of characters that may follow \c in an EBCDIC
224    environment for characters 0-31. */
225    
226    static unsigned char ebcdic_escape_c[] = "@ABCDEFGHIJKLMNOPQRSTUVWXYZ[\\]^_";
227    
228  #endif  #endif
229    
230    
# Line 458  static const char error_texts[] = Line 464  static const char error_texts[] =
464    "range out of order in character class\0"    "range out of order in character class\0"
465    "nothing to repeat\0"    "nothing to repeat\0"
466    /* 10 */    /* 10 */
467    "operand of unlimited repeat could match the empty string\0"  /** DEAD **/    "internal error: invalid forward reference offset\0"
468    "internal error: unexpected repeat\0"    "internal error: unexpected repeat\0"
469    "unrecognized character after (? or (?-\0"    "unrecognized character after (? or (?-\0"
470    "POSIX named classes are supported only within a class\0"    "POSIX named classes are supported only within a class\0"
# Line 527  static const char error_texts[] = Line 533  static const char error_texts[] =
533    "different names for subpatterns of the same number are not allowed\0"    "different names for subpatterns of the same number are not allowed\0"
534    "(*MARK) must have an argument\0"    "(*MARK) must have an argument\0"
535    "this version of PCRE is not compiled with Unicode property support\0"    "this version of PCRE is not compiled with Unicode property support\0"
536    #ifndef EBCDIC
537    "\\c must be followed by an ASCII character\0"    "\\c must be followed by an ASCII character\0"
538    #else
539      "\\c must be followed by a letter or one of [\\]^_?\0"
540    #endif
541    "\\k is not followed by a braced, angle-bracketed, or quoted name\0"    "\\k is not followed by a braced, angle-bracketed, or quoted name\0"
542    /* 70 */    /* 70 */
543    "internal error: unknown opcode in find_fixedlength()\0"    "internal error: unknown opcode in find_fixedlength()\0"
# Line 1425  else Line 1435  else
1435      c ^= 0x40;      c ^= 0x40;
1436  #else             /* EBCDIC coding */  #else             /* EBCDIC coding */
1437      if (c >= CHAR_a && c <= CHAR_z) c += 64;      if (c >= CHAR_a && c <= CHAR_z) c += 64;
1438      c ^= 0xC0;      if (c == CHAR_QUESTION_MARK)
1439          c = ('\\' == 188 && '`' == 74)? 0x5f : 0xff;
1440        else
1441          {
1442          for (i = 0; i < 32; i++)
1443            {
1444            if (c == ebcdic_escape_c[i]) break;
1445            }
1446          if (i < 32) c = i; else *errorcodeptr = ERR68;
1447          }
1448  #endif  #endif
1449      break;      break;
1450    
# Line 1704  Arguments: Line 1723  Arguments:
1723    utf      TRUE in UTF-8 / UTF-16 / UTF-32 mode    utf      TRUE in UTF-8 / UTF-16 / UTF-32 mode
1724    atend    TRUE if called when the pattern is complete    atend    TRUE if called when the pattern is complete
1725    cd       the "compile data" structure    cd       the "compile data" structure
1726      recurses    chain of recurse_check to catch mutual recursion
1727    
1728  Returns:   the fixed length,  Returns:   the fixed length,
1729               or -1 if there is no fixed length,               or -1 if there is no fixed length,
# Line 1713  Returns:   the fixed length, Line 1733  Returns:   the fixed length,
1733  */  */
1734    
1735  static int  static int
1736  find_fixedlength(pcre_uchar *code, BOOL utf, BOOL atend, compile_data *cd)  find_fixedlength(pcre_uchar *code, BOOL utf, BOOL atend, compile_data *cd,
1737      recurse_check *recurses)
1738  {  {
1739  int length = -1;  int length = -1;
1740    recurse_check this_recurse;
1741  register int branchlength = 0;  register int branchlength = 0;
1742  register pcre_uchar *cc = code + 1 + LINK_SIZE;  register pcre_uchar *cc = code + 1 + LINK_SIZE;
1743    
# Line 1741  for (;;) Line 1762  for (;;)
1762      case OP_ONCE:      case OP_ONCE:
1763      case OP_ONCE_NC:      case OP_ONCE_NC:
1764      case OP_COND:      case OP_COND:
1765      d = find_fixedlength(cc + ((op == OP_CBRA)? IMM2_SIZE : 0), utf, atend, cd);      d = find_fixedlength(cc + ((op == OP_CBRA)? IMM2_SIZE : 0), utf, atend, cd,
1766          recurses);
1767      if (d < 0) return d;      if (d < 0) return d;
1768      branchlength += d;      branchlength += d;
1769      do cc += GET(cc, 1); while (*cc == OP_ALT);      do cc += GET(cc, 1); while (*cc == OP_ALT);
# Line 1775  for (;;) Line 1797  for (;;)
1797      cs = ce = (pcre_uchar *)cd->start_code + GET(cc, 1);  /* Start subpattern */      cs = ce = (pcre_uchar *)cd->start_code + GET(cc, 1);  /* Start subpattern */
1798      do ce += GET(ce, 1); while (*ce == OP_ALT);           /* End subpattern */      do ce += GET(ce, 1); while (*ce == OP_ALT);           /* End subpattern */
1799      if (cc > cs && cc < ce) return -1;                    /* Recursion */      if (cc > cs && cc < ce) return -1;                    /* Recursion */
1800      d = find_fixedlength(cs + IMM2_SIZE, utf, atend, cd);      else   /* Check for mutual recursion */
1801          {
1802          recurse_check *r = recurses;
1803          for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
1804          if (r != NULL) return -1;   /* Mutual recursion */
1805          }
1806        this_recurse.prev = recurses;
1807        this_recurse.group = cs;
1808        d = find_fixedlength(cs + IMM2_SIZE, utf, atend, cd, &this_recurse);
1809      if (d < 0) return d;      if (d < 0) return d;
1810      branchlength += d;      branchlength += d;
1811      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
# Line 1788  for (;;) Line 1818  for (;;)
1818      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1819      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1820      do cc += GET(cc, 1); while (*cc == OP_ALT);      do cc += GET(cc, 1); while (*cc == OP_ALT);
1821      cc += PRIV(OP_lengths)[*cc];      cc += 1 + LINK_SIZE;
1822      break;      break;
1823    
1824      /* Skip over things that don't match chars */      /* Skip over things that don't match chars */
# Line 2362  Arguments: Line 2392  Arguments:
2392  Returns:      TRUE if what is matched could be empty  Returns:      TRUE if what is matched could be empty
2393  */  */
2394    
 typedef struct recurse_check {  
   struct recurse_check *prev;  
   const pcre_uchar *group;  
 } recurse_check;  
   
2395  static BOOL  static BOOL
2396  could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,  could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,
2397    BOOL utf, compile_data *cd, recurse_check *recurses)    BOOL utf, compile_data *cd, recurse_check *recurses)
# Line 2481  for (code = first_significant_code(code Line 2506  for (code = first_significant_code(code
2506    if (c == OP_BRA  || c == OP_BRAPOS ||    if (c == OP_BRA  || c == OP_BRAPOS ||
2507        c == OP_CBRA || c == OP_CBRAPOS ||        c == OP_CBRA || c == OP_CBRAPOS ||
2508        c == OP_ONCE || c == OP_ONCE_NC ||        c == OP_ONCE || c == OP_ONCE_NC ||
2509        c == OP_COND)        c == OP_COND || c == OP_SCOND)
2510      {      {
2511      BOOL empty_branch;      BOOL empty_branch;
2512      if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */      if (GET(code, 1) == 0) return TRUE;    /* Hit unclosed bracket */
# Line 2497  for (code = first_significant_code(code Line 2522  for (code = first_significant_code(code
2522        empty_branch = FALSE;        empty_branch = FALSE;
2523        do        do
2524          {          {
2525          if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd,          if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd,
2526            recurses)) empty_branch = TRUE;            recurses)) empty_branch = TRUE;
2527          code += GET(code, 1);          code += GET(code, 1);
2528          }          }
# Line 3658  for (;;) Line 3683  for (;;)
3683        get_chr_property_list(code, utf, cd->fcc, list) : NULL;        get_chr_property_list(code, utf, cd->fcc, list) : NULL;
3684      list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;      list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;
3685    
3686      rec_limit = 10000;      rec_limit = 1000;
3687      if (end != NULL && compare_opcodes(end, utf, cd, list, end, &rec_limit))      if (end != NULL && compare_opcodes(end, utf, cd, list, end, &rec_limit))
3688        {        {
3689        switch(c)        switch(c)
# Line 3715  for (;;) Line 3740  for (;;)
3740    
3741        list[1] = (c & 1) == 0;        list[1] = (c & 1) == 0;
3742    
3743        rec_limit = 10000;        rec_limit = 1000;
3744        if (compare_opcodes(end, utf, cd, list, end, &rec_limit))        if (compare_opcodes(end, utf, cd, list, end, &rec_limit))
3745          {          {
3746          switch (c)          switch (c)
# Line 3880  didn't consider this to be a POSIX class Line 3905  didn't consider this to be a POSIX class
3905  The problem in trying to be exactly like Perl is in the handling of escapes. We  The problem in trying to be exactly like Perl is in the handling of escapes. We
3906  have to be sure that [abc[:x\]pqr] is *not* treated as containing a POSIX  have to be sure that [abc[:x\]pqr] is *not* treated as containing a POSIX
3907  class, but [abc[:x\]pqr:]] is (so that an error can be generated). The code  class, but [abc[:x\]pqr:]] is (so that an error can be generated). The code
3908  below handles the special case of \], but does not try to do any other escape  below handles the special cases \\ and \], but does not try to do any other
3909  processing. This makes it different from Perl for cases such as [:l\ower:]  escape processing. This makes it different from Perl for cases such as
3910  where Perl recognizes it as the POSIX class "lower" but PCRE does not recognize  [:l\ower:] where Perl recognizes it as the POSIX class "lower" but PCRE does
3911  "l\ower". This is a lesser evil than not diagnosing bad classes when Perl does,  not recognize "l\ower". This is a lesser evil than not diagnosing bad classes
3912  I think.  when Perl does, I think.
3913    
3914  A user pointed out that PCRE was rejecting [:a[:digit:]] whereas Perl was not.  A user pointed out that PCRE was rejecting [:a[:digit:]] whereas Perl was not.
3915  It seems that the appearance of a nested POSIX class supersedes an apparent  It seems that the appearance of a nested POSIX class supersedes an apparent
# Line 3911  pcre_uchar terminator;          /* Don't Line 3936  pcre_uchar terminator;          /* Don't
3936  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */  terminator = *(++ptr);   /* compiler warns about "non-constant" initializer. */
3937  for (++ptr; *ptr != CHAR_NULL; ptr++)  for (++ptr; *ptr != CHAR_NULL; ptr++)
3938    {    {
3939    if (*ptr == CHAR_BACKSLASH && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)    if (*ptr == CHAR_BACKSLASH &&
3940          (ptr[1] == CHAR_RIGHT_SQUARE_BRACKET ||
3941           ptr[1] == CHAR_BACKSLASH))
3942      ptr++;      ptr++;
3943    else if (*ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE;    else if ((*ptr == CHAR_LEFT_SQUARE_BRACKET && ptr[1] == terminator) ||
3944    else              *ptr == CHAR_RIGHT_SQUARE_BRACKET) return FALSE;
3945      else if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
3946      {      {
3947      if (*ptr == terminator && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)      *endptr = ptr;
3948        {      return TRUE;
       *endptr = ptr;  
       return TRUE;  
       }  
     if (*ptr == CHAR_LEFT_SQUARE_BRACKET &&  
          (ptr[1] == CHAR_COLON || ptr[1] == CHAR_DOT ||  
           ptr[1] == CHAR_EQUALS_SIGN) &&  
         check_posix_syntax(ptr, endptr))  
       return FALSE;  
3949      }      }
3950    }    }
3951  return FALSE;  return FALSE;
# Line 3979  have their offsets adjusted. That one of Line 3999  have their offsets adjusted. That one of
3999  is called, the partially compiled regex must be temporarily terminated with  is called, the partially compiled regex must be temporarily terminated with
4000  OP_END.  OP_END.
4001    
4002  This function has been extended with the possibility of forward references for  This function has been extended to cope with forward references for recursions
4003  recursions and subroutine calls. It must also check the list of such references  and subroutine calls. It must check the list of such references for the
4004  for the group we are dealing with. If it finds that one of the recursions in  group we are dealing with. If it finds that one of the recursions in the
4005  the current group is on this list, it adjusts the offset in the list, not the  current group is on this list, it does not adjust the value in the reference
4006  value in the reference (which is a group number).  (which is a group number). After the group has been scanned, all the offsets in
4007    the forward reference list for the group are adjusted.
4008    
4009  Arguments:  Arguments:
4010    group      points to the start of the group    group      points to the start of the group
# Line 3999  static void Line 4020  static void
4020  adjust_recurse(pcre_uchar *group, int adjust, BOOL utf, compile_data *cd,  adjust_recurse(pcre_uchar *group, int adjust, BOOL utf, compile_data *cd,
4021    size_t save_hwm_offset)    size_t save_hwm_offset)
4022  {  {
4023    int offset;
4024    pcre_uchar *hc;
4025  pcre_uchar *ptr = group;  pcre_uchar *ptr = group;
4026    
4027  while ((ptr = (pcre_uchar *)find_recurse(ptr, utf)) != NULL)  while ((ptr = (pcre_uchar *)find_recurse(ptr, utf)) != NULL)
4028    {    {
   int offset;  
   pcre_uchar *hc;  
   
   /* See if this recursion is on the forward reference list. If so, adjust the  
   reference. */  
   
4029    for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset; hc < cd->hwm;    for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset; hc < cd->hwm;
4030         hc += LINK_SIZE)         hc += LINK_SIZE)
4031      {      {
4032      offset = (int)GET(hc, 0);      offset = (int)GET(hc, 0);
4033      if (cd->start_code + offset == ptr + 1)      if (cd->start_code + offset == ptr + 1) break;
       {  
       PUT(hc, 0, offset + adjust);  
       break;  
       }  
4034      }      }
4035    
4036    /* Otherwise, adjust the recursion offset if it's after the start of this    /* If we have not found this recursion on the forward reference list, adjust
4037    group. */    the recursion's offset if it's after the start of this group. */
4038    
4039    if (hc >= cd->hwm)    if (hc >= cd->hwm)
4040      {      {
# Line 4031  while ((ptr = (pcre_uchar *)find_recurse Line 4044  while ((ptr = (pcre_uchar *)find_recurse
4044    
4045    ptr += 1 + LINK_SIZE;    ptr += 1 + LINK_SIZE;
4046    }    }
4047    
4048    /* Now adjust all forward reference offsets for the group. */
4049    
4050    for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset; hc < cd->hwm;
4051         hc += LINK_SIZE)
4052      {
4053      offset = (int)GET(hc, 0);
4054      PUT(hc, 0, offset + adjust);
4055      }
4056  }  }
4057    
4058    
# Line 4459  const pcre_uchar *tempptr; Line 4481  const pcre_uchar *tempptr;
4481  const pcre_uchar *nestptr = NULL;  const pcre_uchar *nestptr = NULL;
4482  pcre_uchar *previous = NULL;  pcre_uchar *previous = NULL;
4483  pcre_uchar *previous_callout = NULL;  pcre_uchar *previous_callout = NULL;
4484  size_t save_hwm_offset = 0;  size_t item_hwm_offset = 0;
4485  pcre_uint8 classbits[32];  pcre_uint8 classbits[32];
4486    
4487  /* We can fish out the UTF-8 setting once and for all into a BOOL, but we  /* We can fish out the UTF-8 setting once and for all into a BOOL, but we
# Line 4617  for (;; ptr++) Line 4639  for (;; ptr++)
4639    /* In the real compile phase, just check the workspace used by the forward    /* In the real compile phase, just check the workspace used by the forward
4640    reference list. */    reference list. */
4641    
4642    else if (cd->hwm > cd->start_workspace + cd->workspace_size -    else if (cd->hwm > cd->start_workspace + cd->workspace_size)
            WORK_SIZE_SAFETY_MARGIN)  
4643      {      {
4644      *errorcodeptr = ERR52;      *errorcodeptr = ERR52;
4645      goto FAILED;      goto FAILED;
4646      }      }
4647    
4648    /* If in \Q...\E, check for the end; if not, we have a literal */    /* If in \Q...\E, check for the end; if not, we have a literal. Otherwise an
4649      isolated \E is ignored. */
4650    
4651    if (inescq && c != CHAR_NULL)    if (c != CHAR_NULL)
4652      {      {
4653      if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)      if (c == CHAR_BACKSLASH && ptr[1] == CHAR_E)
4654        {        {
# Line 4634  for (;; ptr++) Line 4656  for (;; ptr++)
4656        ptr++;        ptr++;
4657        continue;        continue;
4658        }        }
4659      else      else if (inescq)
4660        {        {
4661        if (previous_callout != NULL)        if (previous_callout != NULL)
4662          {          {
# Line 4649  for (;; ptr++) Line 4671  for (;; ptr++)
4671          }          }
4672        goto NORMAL_CHAR;        goto NORMAL_CHAR;
4673        }        }
4674      /* Control does not reach here. */  
4675        /* Check for the start of a \Q...\E sequence. We must do this here rather
4676        than later in case it is immediately followed by \E, which turns it into a
4677        "do nothing" sequence. */
4678    
4679        if (c == CHAR_BACKSLASH && ptr[1] == CHAR_Q)
4680          {
4681          inescq = TRUE;
4682          ptr++;
4683          continue;
4684          }
4685      }      }
4686    
4687    /* In extended mode, skip white space and comments. We need a loop in order    /* In extended mode, skip white space and comments. */
   to check for more white space and more comments after a comment. */  
4688    
4689    if ((options & PCRE_EXTENDED) != 0)    if ((options & PCRE_EXTENDED) != 0)
4690      {      {
4691      for (;;)      const pcre_uchar *wscptr = ptr;
4692        while (MAX_255(c) && (cd->ctypes[c] & ctype_space) != 0) c = *(++ptr);
4693        if (c == CHAR_NUMBER_SIGN)
4694        {        {
       while (MAX_255(c) && (cd->ctypes[c] & ctype_space) != 0) c = *(++ptr);  
       if (c != CHAR_NUMBER_SIGN) break;  
4695        ptr++;        ptr++;
4696        while (*ptr != CHAR_NULL)        while (*ptr != CHAR_NULL)
4697          {          {
# Line 4674  for (;; ptr++) Line 4705  for (;; ptr++)
4705          if (utf) FORWARDCHAR(ptr);          if (utf) FORWARDCHAR(ptr);
4706  #endif  #endif
4707          }          }
4708        c = *ptr;     /* Either NULL or the char after a newline */        }
4709    
4710        /* If we skipped any characters, restart the loop. Otherwise, we didn't see
4711        a comment. */
4712    
4713        if (ptr > wscptr)
4714          {
4715          ptr--;
4716          continue;
4717        }        }
4718      }      }
4719    
4720      /* Skip over (?# comments. We need to do this here because we want to know if
4721      the next thing is a quantifier, and these comments may come between an item
4722      and its quantifier. */
4723    
4724      if (c == CHAR_LEFT_PARENTHESIS && ptr[1] == CHAR_QUESTION_MARK &&
4725          ptr[2] == CHAR_NUMBER_SIGN)
4726        {
4727        ptr += 3;
4728        while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
4729        if (*ptr == CHAR_NULL)
4730          {
4731          *errorcodeptr = ERR18;
4732          goto FAILED;
4733          }
4734        continue;
4735        }
4736    
4737    /* See if the next thing is a quantifier. */    /* See if the next thing is a quantifier. */
4738    
4739    is_quantifier =    is_quantifier =
# Line 4761  for (;; ptr++) Line 4817  for (;; ptr++)
4817      zeroreqchar = reqchar;      zeroreqchar = reqchar;
4818      zeroreqcharflags = reqcharflags;      zeroreqcharflags = reqcharflags;
4819      previous = code;      previous = code;
4820        item_hwm_offset = cd->hwm - cd->start_workspace;
4821      *code++ = ((options & PCRE_DOTALL) != 0)? OP_ALLANY: OP_ANY;      *code++ = ((options & PCRE_DOTALL) != 0)? OP_ALLANY: OP_ANY;
4822      break;      break;
4823    
# Line 4812  for (;; ptr++) Line 4869  for (;; ptr++)
4869      /* Handle a real character class. */      /* Handle a real character class. */
4870    
4871      previous = code;      previous = code;
4872        item_hwm_offset = cd->hwm - cd->start_workspace;
4873    
4874      /* PCRE supports POSIX class stuff inside a class. Perl gives an error if      /* PCRE supports POSIX class stuff inside a class. Perl gives an error if
4875      they are encountered at the top level, so we'll do that too. */      they are encountered at the top level, so we'll do that too. */
# Line 4917  for (;; ptr++) Line 4975  for (;; ptr++)
4975        (which is on the stack). We have to remember that there was XCLASS data,        (which is on the stack). We have to remember that there was XCLASS data,
4976        however. */        however. */
4977    
4978          if (class_uchardata > class_uchardata_base) xclass = TRUE;
4979    
4980        if (lengthptr != NULL && class_uchardata > class_uchardata_base)        if (lengthptr != NULL && class_uchardata > class_uchardata_base)
4981          {          {
         xclass = TRUE;  
4982          *lengthptr += (int)(class_uchardata - class_uchardata_base);          *lengthptr += (int)(class_uchardata - class_uchardata_base);
4983          class_uchardata = class_uchardata_base;          class_uchardata = class_uchardata_base;
4984          }          }
# Line 5022  for (;; ptr++) Line 5081  for (;; ptr++)
5081              ptr = tempptr + 1;              ptr = tempptr + 1;
5082              continue;              continue;
5083    
5084              /* For all other POSIX classes, no special action is taken in UCP              /* For the other POSIX classes (ascii, cntrl, xdigit) we are going
5085              mode. Fall through to the non_UCP case. */              to fall through to the non-UCP case and build a bit map for
5086                characters with code points less than 256. If we are in a negated
5087                POSIX class, characters with code points greater than 255 must
5088                either all match or all not match. In the special case where we
5089                have not yet generated any xclass data, and this is the final item
5090                in the overall class, we need do nothing: later on, the opcode
5091                OP_NCLASS will be used to indicate that characters greater than 255
5092                are acceptable. If we have already seen an xclass item or one may
5093                follow (we have to assume that it might if this is not the end of
5094                the class), explicitly list all wide codepoints, which will then
5095                either not match or match, depending on whether the class is or is
5096                not negated. */
5097    
5098              default:              default:
5099                if (local_negate &&
5100                    (xclass || tempptr[2] != CHAR_RIGHT_SQUARE_BRACKET))
5101                  {
5102                  *class_uchardata++ = XCL_RANGE;
5103                  class_uchardata += PRIV(ord2utf)(0x100, class_uchardata);
5104                  class_uchardata += PRIV(ord2utf)(0x10ffff, class_uchardata);
5105                  }
5106              break;              break;
5107              }              }
5108            }            }
# Line 5189  for (;; ptr++) Line 5266  for (;; ptr++)
5266                cd, PRIV(vspace_list));                cd, PRIV(vspace_list));
5267              continue;              continue;
5268    
 #ifdef SUPPORT_UCP  
5269              case ESC_p:              case ESC_p:
5270              case ESC_P:              case ESC_P:
5271    #ifdef SUPPORT_UCP
5272                {                {
5273                BOOL negated;                BOOL negated;
5274                unsigned int ptype = 0, pdata = 0;                unsigned int ptype = 0, pdata = 0;
# Line 5205  for (;; ptr++) Line 5282  for (;; ptr++)
5282                class_has_8bitchar--;                /* Undo! */                class_has_8bitchar--;                /* Undo! */
5283                continue;                continue;
5284                }                }
5285    #else
5286                *errorcodeptr = ERR45;
5287                goto FAILED;
5288  #endif  #endif
5289              /* Unrecognized escapes are faulted if PCRE is running in its              /* Unrecognized escapes are faulted if PCRE is running in its
5290              strict mode. By default, for compatibility with Perl, they are              strict mode. By default, for compatibility with Perl, they are
# Line 5361  for (;; ptr++) Line 5441  for (;; ptr++)
5441        CLASS_SINGLE_CHARACTER:        CLASS_SINGLE_CHARACTER:
5442        if (class_one_char < 2) class_one_char++;        if (class_one_char < 2) class_one_char++;
5443    
5444        /* If class_one_char is 1, we have the first single character in the        /* If xclass_has_prop is false and class_one_char is 1, we have the first
5445        class, and there have been no prior ranges, or XCLASS items generated by        single character in the class, and there have been no prior ranges, or
5446        escapes. If this is the final character in the class, we can optimize by        XCLASS items generated by escapes. If this is the final character in the
5447        turning the item into a 1-character OP_CHAR[I] if it's positive, or        class, we can optimize by turning the item into a 1-character OP_CHAR[I]
5448        OP_NOT[I] if it's negative. In the positive case, it can cause firstchar        if it's positive, or OP_NOT[I] if it's negative. In the positive case, it
5449        to be set. Otherwise, there can be no first char if this item is first,        can cause firstchar to be set. Otherwise, there can be no first char if
5450        whatever repeat count may follow. In the case of reqchar, save the        this item is first, whatever repeat count may follow. In the case of
5451        previous value for reinstating. */        reqchar, save the previous value for reinstating. */
5452    
5453        if (!inescq && class_one_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)        if (!inescq &&
5454    #ifdef SUPPORT_UCP
5455              !xclass_has_prop &&
5456    #endif
5457              class_one_char == 1 && ptr[1] == CHAR_RIGHT_SQUARE_BRACKET)
5458          {          {
5459          ptr++;          ptr++;
5460          zeroreqchar = reqchar;          zeroreqchar = reqchar;
# Line 5486  for (;; ptr++) Line 5570  for (;; ptr++)
5570      actual compiled code. */      actual compiled code. */
5571    
5572  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
5573      if (xclass && (!should_flip_negation || (options & PCRE_UCP) != 0))      if (xclass && (xclass_has_prop || !should_flip_negation ||
5574            (options & PCRE_UCP) != 0))
5575  #elif !defined COMPILE_PCRE8  #elif !defined COMPILE_PCRE8
5576      if (xclass && !should_flip_negation)      if (xclass && (xclass_has_prop || !should_flip_negation))
5577  #endif  #endif
5578  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
5579        {        {
# Line 5518  for (;; ptr++) Line 5603  for (;; ptr++)
5603        PUT(previous, 1, (int)(code - previous));        PUT(previous, 1, (int)(code - previous));
5604        break;   /* End of class handling */        break;   /* End of class handling */
5605        }        }
 #endif  
5606    
5607      /* Even though any XCLASS list is now discarded, we must allow for      /* Even though any XCLASS list is now discarded, we must allow for
5608      its memory. */      its memory. */
5609    
5610      if (lengthptr != NULL)      if (lengthptr != NULL)
5611        *lengthptr += (int)(class_uchardata - class_uchardata_base);        *lengthptr += (int)(class_uchardata - class_uchardata_base);
5612    #endif
5613    
5614      /* If there are no characters > 255, or they are all to be included or      /* If there are no characters > 255, or they are all to be included or
5615      excluded, set the opcode to OP_CLASS or OP_NCLASS, depending on whether the      excluded, set the opcode to OP_CLASS or OP_NCLASS, depending on whether the
# Line 5924  for (;; ptr++) Line 6009  for (;; ptr++)
6009        {        {
6010        register int i;        register int i;
6011        int len = (int)(code - previous);        int len = (int)(code - previous);
6012          size_t base_hwm_offset = item_hwm_offset;
6013        pcre_uchar *bralink = NULL;        pcre_uchar *bralink = NULL;
6014        pcre_uchar *brazeroptr = NULL;        pcre_uchar *brazeroptr = NULL;
6015    
# Line 5978  for (;; ptr++) Line 6064  for (;; ptr++)
6064          if (repeat_max <= 1)    /* Covers 0, 1, and unlimited */          if (repeat_max <= 1)    /* Covers 0, 1, and unlimited */
6065            {            {
6066            *code = OP_END;            *code = OP_END;
6067            adjust_recurse(previous, 1, utf, cd, save_hwm_offset);            adjust_recurse(previous, 1, utf, cd, item_hwm_offset);
6068            memmove(previous + 1, previous, IN_UCHARS(len));            memmove(previous + 1, previous, IN_UCHARS(len));
6069            code++;            code++;
6070            if (repeat_max == 0)            if (repeat_max == 0)
# Line 6002  for (;; ptr++) Line 6088  for (;; ptr++)
6088            {            {
6089            int offset;            int offset;
6090            *code = OP_END;            *code = OP_END;
6091            adjust_recurse(previous, 2 + LINK_SIZE, utf, cd, save_hwm_offset);            adjust_recurse(previous, 2 + LINK_SIZE, utf, cd, item_hwm_offset);
6092            memmove(previous + 2 + LINK_SIZE, previous, IN_UCHARS(len));            memmove(previous + 2 + LINK_SIZE, previous, IN_UCHARS(len));
6093            code += 2 + LINK_SIZE;            code += 2 + LINK_SIZE;
6094            *previous++ = OP_BRAZERO + repeat_type;            *previous++ = OP_BRAZERO + repeat_type;
# Line 6070  for (;; ptr++) Line 6156  for (;; ptr++)
6156    
6157                while (cd->hwm > cd->start_workspace + cd->workspace_size -                while (cd->hwm > cd->start_workspace + cd->workspace_size -
6158                       WORK_SIZE_SAFETY_MARGIN -                       WORK_SIZE_SAFETY_MARGIN -
6159                       (this_hwm_offset - save_hwm_offset))                       (this_hwm_offset - base_hwm_offset))
6160                  {                  {
6161                  *errorcodeptr = expand_workspace(cd);                  *errorcodeptr = expand_workspace(cd);
6162                  if (*errorcodeptr != 0) goto FAILED;                  if (*errorcodeptr != 0) goto FAILED;
6163                  }                  }
6164    
6165                for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset;                for (hc = (pcre_uchar *)cd->start_workspace + base_hwm_offset;
6166                     hc < (pcre_uchar *)cd->start_workspace + this_hwm_offset;                     hc < (pcre_uchar *)cd->start_workspace + this_hwm_offset;
6167                     hc += LINK_SIZE)                     hc += LINK_SIZE)
6168                  {                  {
6169                  PUT(cd->hwm, 0, GET(hc, 0) + len);                  PUT(cd->hwm, 0, GET(hc, 0) + len);
6170                  cd->hwm += LINK_SIZE;                  cd->hwm += LINK_SIZE;
6171                  }                  }
6172                save_hwm_offset = this_hwm_offset;                base_hwm_offset = this_hwm_offset;
6173                code += len;                code += len;
6174                }                }
6175              }              }
# Line 6151  for (;; ptr++) Line 6237  for (;; ptr++)
6237    
6238            while (cd->hwm > cd->start_workspace + cd->workspace_size -            while (cd->hwm > cd->start_workspace + cd->workspace_size -
6239                   WORK_SIZE_SAFETY_MARGIN -                   WORK_SIZE_SAFETY_MARGIN -
6240                   (this_hwm_offset - save_hwm_offset))                   (this_hwm_offset - base_hwm_offset))
6241              {              {
6242              *errorcodeptr = expand_workspace(cd);              *errorcodeptr = expand_workspace(cd);
6243              if (*errorcodeptr != 0) goto FAILED;              if (*errorcodeptr != 0) goto FAILED;
6244              }              }
6245    
6246            for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset;            for (hc = (pcre_uchar *)cd->start_workspace + base_hwm_offset;
6247                 hc < (pcre_uchar *)cd->start_workspace + this_hwm_offset;                 hc < (pcre_uchar *)cd->start_workspace + this_hwm_offset;
6248                 hc += LINK_SIZE)                 hc += LINK_SIZE)
6249              {              {
6250              PUT(cd->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1));              PUT(cd->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1));
6251              cd->hwm += LINK_SIZE;              cd->hwm += LINK_SIZE;
6252              }              }
6253            save_hwm_offset = this_hwm_offset;            base_hwm_offset = this_hwm_offset;
6254            code += len;            code += len;
6255            }            }
6256    
# Line 6247  for (;; ptr++) Line 6333  for (;; ptr++)
6333              while (*scode == OP_ALT);              while (*scode == OP_ALT);
6334              }              }
6335    
6336              /* A conditional group with only one branch has an implicit empty
6337              alternative branch. */
6338    
6339              if (*bracode == OP_COND && bracode[GET(bracode,1)] != OP_ALT)
6340                *bracode = OP_SCOND;
6341    
6342            /* Handle possessive quantifiers. */            /* Handle possessive quantifiers. */
6343    
6344            if (possessive_quantifier)            if (possessive_quantifier)
# Line 6260  for (;; ptr++) Line 6352  for (;; ptr++)
6352                {                {
6353                int nlen = (int)(code - bracode);                int nlen = (int)(code - bracode);
6354                *code = OP_END;                *code = OP_END;
6355                adjust_recurse(bracode, 1 + LINK_SIZE, utf, cd, save_hwm_offset);                adjust_recurse(bracode, 1 + LINK_SIZE, utf, cd, item_hwm_offset);
6356                memmove(bracode + 1 + LINK_SIZE, bracode, IN_UCHARS(nlen));                memmove(bracode + 1 + LINK_SIZE, bracode, IN_UCHARS(nlen));
6357                code += 1 + LINK_SIZE;                code += 1 + LINK_SIZE;
6358                nlen += 1 + LINK_SIZE;                nlen += 1 + LINK_SIZE;
6359                *bracode = OP_BRAPOS;                *bracode = (*bracode == OP_COND)? OP_BRAPOS : OP_SBRAPOS;
6360                *code++ = OP_KETRPOS;                *code++ = OP_KETRPOS;
6361                PUTINC(code, 0, nlen);                PUTINC(code, 0, nlen);
6362                PUT(bracode, 1, nlen);                PUT(bracode, 1, nlen);
# Line 6394  for (;; ptr++) Line 6486  for (;; ptr++)
6486          else          else
6487            {            {
6488            *code = OP_END;            *code = OP_END;
6489            adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm_offset);            adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, item_hwm_offset);
6490            memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));            memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));
6491            code += 1 + LINK_SIZE;            code += 1 + LINK_SIZE;
6492            len += 1 + LINK_SIZE;            len += 1 + LINK_SIZE;
# Line 6443  for (;; ptr++) Line 6535  for (;; ptr++)
6535    
6536          default:          default:
6537          *code = OP_END;          *code = OP_END;
6538          adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm_offset);          adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, item_hwm_offset);
6539          memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));          memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));
6540          code += 1 + LINK_SIZE;          code += 1 + LINK_SIZE;
6541          len += 1 + LINK_SIZE;          len += 1 + LINK_SIZE;
# Line 6474  for (;; ptr++) Line 6566  for (;; ptr++)
6566      case CHAR_LEFT_PARENTHESIS:      case CHAR_LEFT_PARENTHESIS:
6567      ptr++;      ptr++;
6568    
     /* First deal with comments. Putting this code right at the start ensures  
     that comments have no bad side effects. */  
   
     if (ptr[0] == CHAR_QUESTION_MARK && ptr[1] == CHAR_NUMBER_SIGN)  
       {  
       ptr += 2;  
       while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;  
       if (*ptr == CHAR_NULL)  
         {  
         *errorcodeptr = ERR18;  
         goto FAILED;  
         }  
       continue;  
       }  
   
6569      /* Now deal with various "verbs" that can be introduced by '*'. */      /* Now deal with various "verbs" that can be introduced by '*'. */
6570    
6571      if (ptr[0] == CHAR_ASTERISK && (ptr[1] == ':'      if (ptr[0] == CHAR_ASTERISK && (ptr[1] == ':'
# Line 6579  for (;; ptr++) Line 6656  for (;; ptr++)
6656                goto FAILED;                goto FAILED;
6657                }                }
6658              setverb = *code++ = verbs[i].op_arg;              setverb = *code++ = verbs[i].op_arg;
6659              *code++ = arglen;              if (lengthptr != NULL)    /* In pass 1 just add in the length */
6660              memcpy(code, arg, IN_UCHARS(arglen));                {                       /* to avoid potential workspace */
6661              code += arglen;                *lengthptr += arglen;   /* overflow. */
6662                  *code++ = 0;
6663                  }
6664                else
6665                  {
6666                  *code++ = arglen;
6667                  memcpy(code, arg, IN_UCHARS(arglen));
6668                  code += arglen;
6669                  }
6670              *code++ = 0;              *code++ = 0;
6671              }              }
6672    
# Line 6616  for (;; ptr++) Line 6701  for (;; ptr++)
6701      newoptions = options;      newoptions = options;
6702      skipbytes = 0;      skipbytes = 0;
6703      bravalue = OP_CBRA;      bravalue = OP_CBRA;
6704      save_hwm_offset = cd->hwm - cd->start_workspace;      item_hwm_offset = cd->hwm - cd->start_workspace;
6705      reset_bracount = FALSE;      reset_bracount = FALSE;
6706    
6707      /* Deal with the extended parentheses; all are introduced by '?', and the      /* Deal with the extended parentheses; all are introduced by '?', and the
# Line 6634  for (;; ptr++) Line 6719  for (;; ptr++)
6719          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
6720          case CHAR_VERTICAL_LINE:  /* Reset capture count for each branch */          case CHAR_VERTICAL_LINE:  /* Reset capture count for each branch */
6721          reset_bracount = TRUE;          reset_bracount = TRUE;
6722            cd->dupgroups = TRUE;     /* Record (?| encountered */
6723          /* Fall through */          /* Fall through */
6724    
6725          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
# Line 6679  for (;; ptr++) Line 6765  for (;; ptr++)
6765          if (tempptr[1] == CHAR_QUESTION_MARK &&          if (tempptr[1] == CHAR_QUESTION_MARK &&
6766                (tempptr[2] == CHAR_EQUALS_SIGN ||                (tempptr[2] == CHAR_EQUALS_SIGN ||
6767                 tempptr[2] == CHAR_EXCLAMATION_MARK ||                 tempptr[2] == CHAR_EXCLAMATION_MARK ||
6768                 tempptr[2] == CHAR_LESS_THAN_SIGN))                   (tempptr[2] == CHAR_LESS_THAN_SIGN &&
6769                       (tempptr[3] == CHAR_EQUALS_SIGN ||
6770                        tempptr[3] == CHAR_EXCLAMATION_MARK))))
6771            {            {
6772            cd->iscondassert = TRUE;            cd->iscondassert = TRUE;
6773            break;            break;
# Line 6732  for (;; ptr++) Line 6820  for (;; ptr++)
6820            {            {
6821            while (IS_DIGIT(*ptr))            while (IS_DIGIT(*ptr))
6822              {              {
6823                if (recno > INT_MAX / 10 - 1)  /* Integer overflow */
6824                  {
6825                  while (IS_DIGIT(*ptr)) ptr++;
6826                  *errorcodeptr = ERR61;
6827                  goto FAILED;
6828                  }
6829              recno = recno * 10 + (int)(*ptr - CHAR_0);              recno = recno * 10 + (int)(*ptr - CHAR_0);
6830              ptr++;              ptr++;
6831              }              }
# Line 6760  for (;; ptr++) Line 6854  for (;; ptr++)
6854              ptr++;              ptr++;
6855              }              }
6856            namelen = (int)(ptr - name);            namelen = (int)(ptr - name);
6857            if (lengthptr != NULL) *lengthptr += IMM2_SIZE;            if (lengthptr != NULL) skipbytes += IMM2_SIZE;
6858            }            }
6859    
6860          /* Check the terminator */          /* Check the terminator */
# Line 6866  for (;; ptr++) Line 6960  for (;; ptr++)
6960                *errorcodeptr = ERR15;                *errorcodeptr = ERR15;
6961                goto FAILED;                goto FAILED;
6962                }                }
6963                if (recno > INT_MAX / 10 - 1)   /* Integer overflow */
6964                  {
6965                  *errorcodeptr = ERR61;
6966                  goto FAILED;
6967                  }
6968              recno = recno * 10 + name[i] - CHAR_0;              recno = recno * 10 + name[i] - CHAR_0;
6969              }              }
6970            if (recno == 0) recno = RREF_ANY;            if (recno == 0) recno = RREF_ANY;
# Line 7142  for (;; ptr++) Line 7241  for (;; ptr++)
7241          if (lengthptr != NULL)          if (lengthptr != NULL)
7242            {            {
7243            named_group *ng;            named_group *ng;
7244              recno = 0;
7245    
7246            if (namelen == 0)            if (namelen == 0)
7247              {              {
# Line 7159  for (;; ptr++) Line 7259  for (;; ptr++)
7259              goto FAILED;              goto FAILED;
7260              }              }
7261    
           /* The name table does not exist in the first pass; instead we must  
           scan the list of names encountered so far in order to get the  
           number. If the name is not found, set the value to 0 for a forward  
           reference. */  
   
           ng = cd->named_groups;  
           for (i = 0; i < cd->names_found; i++, ng++)  
             {  
             if (namelen == ng->length &&  
                 STRNCMP_UC_UC(name, ng->name, namelen) == 0)  
               break;  
             }  
           recno = (i < cd->names_found)? ng->number : 0;  
   
7262            /* Count named back references. */            /* Count named back references. */
7263    
7264            if (!is_recurse) cd->namedrefcount++;            if (!is_recurse) cd->namedrefcount++;
# Line 7182  for (;; ptr++) Line 7268  for (;; ptr++)
7268            16-bit data item. */            16-bit data item. */
7269    
7270            *lengthptr += IMM2_SIZE;            *lengthptr += IMM2_SIZE;
7271    
7272              /* If this is a forward reference and we are within a (?|...) group,
7273              the reference may end up as the number of a group which we are
7274              currently inside, that is, it could be a recursive reference. In the
7275              real compile this will be picked up and the reference wrapped with
7276              OP_ONCE to make it atomic, so we must space in case this occurs. */
7277    
7278              /* In fact, this can happen for a non-forward reference because
7279              another group with the same number might be created later. This
7280              issue is fixed "properly" in PCRE2. As PCRE1 is now in maintenance
7281              only mode, we finesse the bug by allowing more memory always. */
7282    
7283              *lengthptr += 2 + 2*LINK_SIZE;
7284    
7285              /* It is even worse than that. The current reference may be to an
7286              existing named group with a different number (so apparently not
7287              recursive) but which later on is also attached to a group with the
7288              current number. This can only happen if $(| has been previous
7289              encountered. In that case, we allow yet more memory, just in case.
7290              (Again, this is fixed "properly" in PCRE2. */
7291    
7292              if (cd->dupgroups) *lengthptr += 4 + 4*LINK_SIZE;
7293    
7294              /* Otherwise, check for recursion here. The name table does not exist
7295              in the first pass; instead we must scan the list of names encountered
7296              so far in order to get the number. If the name is not found, leave
7297              the value of recno as 0 for a forward reference. */
7298    
7299              else
7300                {
7301                ng = cd->named_groups;
7302                for (i = 0; i < cd->names_found; i++, ng++)
7303                  {
7304                  if (namelen == ng->length &&
7305                      STRNCMP_UC_UC(name, ng->name, namelen) == 0)
7306                    {
7307                    open_capitem *oc;
7308                    recno = ng->number;
7309                    if (is_recurse) break;
7310                    for (oc = cd->open_caps; oc != NULL; oc = oc->next)
7311                      {
7312                      if (oc->number == recno)
7313                        {
7314                        oc->flag = TRUE;
7315                        break;
7316                        }
7317                      }
7318                    }
7319                  }
7320                }
7321            }            }
7322    
7323          /* In the real compile, search the name table. We check the name          /* In the real compile, search the name table. We check the name
# Line 7228  for (;; ptr++) Line 7364  for (;; ptr++)
7364            for (i++; i < cd->names_found; i++)            for (i++; i < cd->names_found; i++)
7365              {              {
7366              if (STRCMP_UC_UC(slot + IMM2_SIZE, cslot + IMM2_SIZE) != 0) break;              if (STRCMP_UC_UC(slot + IMM2_SIZE, cslot + IMM2_SIZE) != 0) break;
   
   
7367              count++;              count++;
7368              cslot += cd->name_entry_size;              cslot += cd->name_entry_size;
7369              }              }
# Line 7238  for (;; ptr++) Line 7372  for (;; ptr++)
7372              {              {
7373              if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;              if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
7374              previous = code;              previous = code;
7375                item_hwm_offset = cd->hwm - cd->start_workspace;
7376              *code++ = ((options & PCRE_CASELESS) != 0)? OP_DNREFI : OP_DNREF;              *code++ = ((options & PCRE_CASELESS) != 0)? OP_DNREFI : OP_DNREF;
7377              PUT2INC(code, 0, index);              PUT2INC(code, 0, index);
7378              PUT2INC(code, 0, count);              PUT2INC(code, 0, count);
# Line 7275  for (;; ptr++) Line 7410  for (;; ptr++)
7410    
7411    
7412          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
7413          case CHAR_R:              /* Recursion */          case CHAR_R:              /* Recursion, same as (?0) */
7414          ptr++;                    /* Same as (?0)      */          recno = 0;
7415          /* Fall through */          if (*(++ptr) != CHAR_RIGHT_PARENTHESIS)
7416              {
7417              *errorcodeptr = ERR29;
7418              goto FAILED;
7419              }
7420            goto HANDLE_RECURSION;
7421    
7422    
7423          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
# Line 7314  for (;; ptr++) Line 7454  for (;; ptr++)
7454    
7455            recno = 0;            recno = 0;
7456            while(IS_DIGIT(*ptr))            while(IS_DIGIT(*ptr))
7457                {
7458                if (recno > INT_MAX / 10 - 1) /* Integer overflow */
7459                  {
7460                  while (IS_DIGIT(*ptr)) ptr++;
7461                  *errorcodeptr = ERR61;
7462                  goto FAILED;
7463                  }
7464              recno = recno * 10 + *ptr++ - CHAR_0;              recno = recno * 10 + *ptr++ - CHAR_0;
7465                }
7466    
7467            if (*ptr != (pcre_uchar)terminator)            if (*ptr != (pcre_uchar)terminator)
7468              {              {
# Line 7351  for (;; ptr++) Line 7499  for (;; ptr++)
7499            HANDLE_RECURSION:            HANDLE_RECURSION:
7500    
7501            previous = code;            previous = code;
7502              item_hwm_offset = cd->hwm - cd->start_workspace;
7503            called = cd->start_code;            called = cd->start_code;
7504    
7505            /* When we are actually compiling, find the bracket that is being            /* When we are actually compiling, find the bracket that is being
# Line 7552  for (;; ptr++) Line 7701  for (;; ptr++)
7701        previous = NULL;        previous = NULL;
7702        cd->iscondassert = FALSE;        cd->iscondassert = FALSE;
7703        }        }
7704      else previous = code;      else
7705          {
7706          previous = code;
7707          item_hwm_offset = cd->hwm - cd->start_workspace;
7708          }
7709    
7710      *code = bravalue;      *code = bravalue;
7711      tempcode = code;      tempcode = code;
# Line 7765  for (;; ptr++) Line 7918  for (;; ptr++)
7918        c = ec;        c = ec;
7919      else      else
7920        {        {
       if (escape == ESC_Q)            /* Handle start of quoted string */  
         {  
         if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)  
           ptr += 2;               /* avoid empty string */  
             else inescq = TRUE;  
         continue;  
         }  
   
       if (escape == ESC_E) continue;  /* Perl ignores an orphan \E */  
   
7921        /* For metasequences that actually match a character, we disable the        /* For metasequences that actually match a character, we disable the
7922        setting of a first character if it hasn't already been set. */        setting of a first character if it hasn't already been set. */
7923    
# Line 7800  for (;; ptr++) Line 7943  for (;; ptr++)
7943          const pcre_uchar *p;          const pcre_uchar *p;
7944          pcre_uint32 cf;          pcre_uint32 cf;
7945    
7946          save_hwm_offset = cd->hwm - cd->start_workspace;   /* Normally this is set when '(' is read */          item_hwm_offset = cd->hwm - cd->start_workspace;   /* Normally this is set when '(' is read */
7947          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
7948            CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;            CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
7949    
# Line 7829  for (;; ptr++) Line 7972  for (;; ptr++)
7972          if (*p != (pcre_uchar)terminator)          if (*p != (pcre_uchar)terminator)
7973            {            {
7974            *errorcodeptr = ERR57;            *errorcodeptr = ERR57;
7975            break;            goto FAILED;
7976            }            }
7977          ptr++;          ptr++;
7978          goto HANDLE_NUMERICAL_RECURSION;          goto HANDLE_NUMERICAL_RECURSION;
# Line 7844  for (;; ptr++) Line 7987  for (;; ptr++)
7987            ptr[1] != CHAR_APOSTROPHE && ptr[1] != CHAR_LEFT_CURLY_BRACKET))            ptr[1] != CHAR_APOSTROPHE && ptr[1] != CHAR_LEFT_CURLY_BRACKET))
7988            {            {
7989            *errorcodeptr = ERR69;            *errorcodeptr = ERR69;
7990            break;            goto FAILED;
7991            }            }
7992          is_recurse = FALSE;          is_recurse = FALSE;
7993          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
# Line 7868  for (;; ptr++) Line 8011  for (;; ptr++)
8011          HANDLE_REFERENCE:          HANDLE_REFERENCE:
8012          if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;          if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
8013          previous = code;          previous = code;
8014            item_hwm_offset = cd->hwm - cd->start_workspace;
8015          *code++ = ((options & PCRE_CASELESS) != 0)? OP_REFI : OP_REF;          *code++ = ((options & PCRE_CASELESS) != 0)? OP_REFI : OP_REF;
8016          PUT2INC(code, 0, recno);          PUT2INC(code, 0, recno);
8017          cd->backref_map |= (recno < 32)? (1 << recno) : 1;          cd->backref_map |= (recno < 32)? (1 << recno) : 1;
# Line 7897  for (;; ptr++) Line 8041  for (;; ptr++)
8041          if (!get_ucp(&ptr, &negated, &ptype, &pdata, errorcodeptr))          if (!get_ucp(&ptr, &negated, &ptype, &pdata, errorcodeptr))
8042            goto FAILED;            goto FAILED;
8043          previous = code;          previous = code;
8044            item_hwm_offset = cd->hwm - cd->start_workspace;
8045          *code++ = ((escape == ESC_p) != negated)? OP_PROP : OP_NOTPROP;          *code++ = ((escape == ESC_p) != negated)? OP_PROP : OP_NOTPROP;
8046          *code++ = ptype;          *code++ = ptype;
8047          *code++ = pdata;          *code++ = pdata;
# Line 7937  for (;; ptr++) Line 8082  for (;; ptr++)
8082    
8083            {            {
8084            previous = (escape > ESC_b && escape < ESC_Z)? code : NULL;            previous = (escape > ESC_b && escape < ESC_Z)? code : NULL;
8085              item_hwm_offset = cd->hwm - cd->start_workspace;
8086            *code++ = (!utf && escape == ESC_C)? OP_ALLANY : escape;            *code++ = (!utf && escape == ESC_C)? OP_ALLANY : escape;
8087            }            }
8088          }          }
# Line 7980  for (;; ptr++) Line 8126  for (;; ptr++)
8126    
8127      ONE_CHAR:      ONE_CHAR:
8128      previous = code;      previous = code;
8129        item_hwm_offset = cd->hwm - cd->start_workspace;
8130    
8131      /* For caseless UTF-8 mode when UCP support is available, check whether      /* For caseless UTF-8 mode when UCP support is available, check whether
8132      this character has more than one other case. If so, generate a special      this character has more than one other case. If so, generate a special
# Line 8288  for (;;) Line 8435  for (;;)
8435        int fixed_length;        int fixed_length;
8436        *code = OP_END;        *code = OP_END;
8437        fixed_length = find_fixedlength(last_branch,  (options & PCRE_UTF8) != 0,        fixed_length = find_fixedlength(last_branch,  (options & PCRE_UTF8) != 0,
8438          FALSE, cd);          FALSE, cd, NULL);
8439        DPRINTF(("fixed length = %d\n", fixed_length));        DPRINTF(("fixed length = %d\n", fixed_length));
8440        if (fixed_length == -3)        if (fixed_length == -3)
8441          {          {
# Line 9155  cd->names_found = 0; Line 9302  cd->names_found = 0;
9302  cd->name_entry_size = 0;  cd->name_entry_size = 0;
9303  cd->name_table = NULL;  cd->name_table = NULL;
9304  cd->dupnames = FALSE;  cd->dupnames = FALSE;
9305    cd->dupgroups = FALSE;
9306  cd->namedrefcount = 0;  cd->namedrefcount = 0;
9307  cd->start_code = cworkspace;  cd->start_code = cworkspace;
9308  cd->hwm = cworkspace;  cd->hwm = cworkspace;
# Line 9327  if (cd->hwm > cd->start_workspace) Line 9475  if (cd->hwm > cd->start_workspace)
9475      int offset, recno;      int offset, recno;
9476      cd->hwm -= LINK_SIZE;      cd->hwm -= LINK_SIZE;
9477      offset = GET(cd->hwm, 0);      offset = GET(cd->hwm, 0);
9478    
9479        /* Check that the hwm handling hasn't gone wrong. This whole area is
9480        rewritten in PCRE2 because there are some obscure cases. */
9481    
9482        if (offset == 0 || codestart[offset-1] != OP_RECURSE)
9483          {
9484          errorcode = ERR10;
9485          break;
9486          }
9487    
9488      recno = GET(codestart, offset);      recno = GET(codestart, offset);
9489      if (recno != prev_recno)      if (recno != prev_recno)
9490        {        {
# Line 9357  used in this code because at least one c Line 9515  used in this code because at least one c
9515  "const" attribute if the cast (pcre_uchar *)codestart is used directly in the  "const" attribute if the cast (pcre_uchar *)codestart is used directly in the
9516  function call. */  function call. */
9517    
9518  if ((options & PCRE_NO_AUTO_POSSESS) == 0)  if (errorcode == 0 && (options & PCRE_NO_AUTO_POSSESS) == 0)
9519    {    {
9520    pcre_uchar *temp = (pcre_uchar *)codestart;    pcre_uchar *temp = (pcre_uchar *)codestart;
9521    auto_possessify(temp, utf, cd);    auto_possessify(temp, utf, cd);
# Line 9371  OP_RECURSE that are not fixed length get Line 9529  OP_RECURSE that are not fixed length get
9529  exceptional ones forgo this. We scan the pattern to check that they are fixed  exceptional ones forgo this. We scan the pattern to check that they are fixed
9530  length, and set their lengths. */  length, and set their lengths. */
9531    
9532  if (cd->check_lookbehind)  if (errorcode == 0 && cd->check_lookbehind)
9533    {    {
9534    pcre_uchar *cc = (pcre_uchar *)codestart;    pcre_uchar *cc = (pcre_uchar *)codestart;
9535    
# Line 9391  if (cd->check_lookbehind) Line 9549  if (cd->check_lookbehind)
9549        int end_op = *be;        int end_op = *be;
9550        *be = OP_END;        *be = OP_END;
9551        fixed_length = find_fixedlength(cc, (re->options & PCRE_UTF8) != 0, TRUE,        fixed_length = find_fixedlength(cc, (re->options & PCRE_UTF8) != 0, TRUE,
9552          cd);          cd, NULL);
9553        *be = end_op;        *be = end_op;
9554        DPRINTF(("fixed length = %d\n", fixed_length));        DPRINTF(("fixed length = %d\n", fixed_length));
9555        if (fixed_length < 0)        if (fixed_length < 0)
# Line 9584  return (pcre32 *)re; Line 9742  return (pcre32 *)re;
9742  }  }
9743    
9744  /* End of pcre_compile.c */  /* End of pcre_compile.c */
   

Legend:
Removed from v.1538  
changed lines
  Added in v.1616

  ViewVC Help
Powered by ViewVC 1.1.5