/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1538 by ph10, Sun Mar 29 11:22:24 2015 UTC revision 1563 by ph10, Mon Jun 8 17:55:54 2015 UTC
# Line 1704  Arguments: Line 1704  Arguments:
1704    utf      TRUE in UTF-8 / UTF-16 / UTF-32 mode    utf      TRUE in UTF-8 / UTF-16 / UTF-32 mode
1705    atend    TRUE if called when the pattern is complete    atend    TRUE if called when the pattern is complete
1706    cd       the "compile data" structure    cd       the "compile data" structure
1707      recurses    chain of recurse_check to catch mutual recursion
1708    
1709  Returns:   the fixed length,  Returns:   the fixed length,
1710               or -1 if there is no fixed length,               or -1 if there is no fixed length,
# Line 1713  Returns:   the fixed length, Line 1714  Returns:   the fixed length,
1714  */  */
1715    
1716  static int  static int
1717  find_fixedlength(pcre_uchar *code, BOOL utf, BOOL atend, compile_data *cd)  find_fixedlength(pcre_uchar *code, BOOL utf, BOOL atend, compile_data *cd,
1718      recurse_check *recurses)
1719  {  {
1720  int length = -1;  int length = -1;
1721    recurse_check this_recurse;
1722  register int branchlength = 0;  register int branchlength = 0;
1723  register pcre_uchar *cc = code + 1 + LINK_SIZE;  register pcre_uchar *cc = code + 1 + LINK_SIZE;
1724    
# Line 1741  for (;;) Line 1743  for (;;)
1743      case OP_ONCE:      case OP_ONCE:
1744      case OP_ONCE_NC:      case OP_ONCE_NC:
1745      case OP_COND:      case OP_COND:
1746      d = find_fixedlength(cc + ((op == OP_CBRA)? IMM2_SIZE : 0), utf, atend, cd);      d = find_fixedlength(cc + ((op == OP_CBRA)? IMM2_SIZE : 0), utf, atend, cd,
1747          recurses);
1748      if (d < 0) return d;      if (d < 0) return d;
1749      branchlength += d;      branchlength += d;
1750      do cc += GET(cc, 1); while (*cc == OP_ALT);      do cc += GET(cc, 1); while (*cc == OP_ALT);
# Line 1775  for (;;) Line 1778  for (;;)
1778      cs = ce = (pcre_uchar *)cd->start_code + GET(cc, 1);  /* Start subpattern */      cs = ce = (pcre_uchar *)cd->start_code + GET(cc, 1);  /* Start subpattern */
1779      do ce += GET(ce, 1); while (*ce == OP_ALT);           /* End subpattern */      do ce += GET(ce, 1); while (*ce == OP_ALT);           /* End subpattern */
1780      if (cc > cs && cc < ce) return -1;                    /* Recursion */      if (cc > cs && cc < ce) return -1;                    /* Recursion */
1781      d = find_fixedlength(cs + IMM2_SIZE, utf, atend, cd);      else   /* Check for mutual recursion */
1782          {
1783          recurse_check *r = recurses;
1784          for (r = recurses; r != NULL; r = r->prev) if (r->group == cs) break;
1785          if (r != NULL) return -1;   /* Mutual recursion */
1786          }
1787        this_recurse.prev = recurses;
1788        this_recurse.group = cs;
1789        d = find_fixedlength(cs + IMM2_SIZE, utf, atend, cd, &this_recurse);
1790      if (d < 0) return d;      if (d < 0) return d;
1791      branchlength += d;      branchlength += d;
1792      cc += 1 + LINK_SIZE;      cc += 1 + LINK_SIZE;
# Line 1788  for (;;) Line 1799  for (;;)
1799      case OP_ASSERTBACK:      case OP_ASSERTBACK:
1800      case OP_ASSERTBACK_NOT:      case OP_ASSERTBACK_NOT:
1801      do cc += GET(cc, 1); while (*cc == OP_ALT);      do cc += GET(cc, 1); while (*cc == OP_ALT);
1802      cc += PRIV(OP_lengths)[*cc];      cc += 1 + LINK_SIZE;
1803      break;      break;
1804    
1805      /* Skip over things that don't match chars */      /* Skip over things that don't match chars */
# Line 2362  Arguments: Line 2373  Arguments:
2373  Returns:      TRUE if what is matched could be empty  Returns:      TRUE if what is matched could be empty
2374  */  */
2375    
 typedef struct recurse_check {  
   struct recurse_check *prev;  
   const pcre_uchar *group;  
 } recurse_check;  
   
2376  static BOOL  static BOOL
2377  could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,  could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,
2378    BOOL utf, compile_data *cd, recurse_check *recurses)    BOOL utf, compile_data *cd, recurse_check *recurses)
# Line 2497  for (code = first_significant_code(code Line 2503  for (code = first_significant_code(code
2503        empty_branch = FALSE;        empty_branch = FALSE;
2504        do        do
2505          {          {
2506          if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd,          if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd,
2507            recurses)) empty_branch = TRUE;            recurses)) empty_branch = TRUE;
2508          code += GET(code, 1);          code += GET(code, 1);
2509          }          }
# Line 3658  for (;;) Line 3664  for (;;)
3664        get_chr_property_list(code, utf, cd->fcc, list) : NULL;        get_chr_property_list(code, utf, cd->fcc, list) : NULL;
3665      list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;      list[1] = c == OP_STAR || c == OP_PLUS || c == OP_QUERY || c == OP_UPTO;
3666    
3667      rec_limit = 10000;      rec_limit = 1000;
3668      if (end != NULL && compare_opcodes(end, utf, cd, list, end, &rec_limit))      if (end != NULL && compare_opcodes(end, utf, cd, list, end, &rec_limit))
3669        {        {
3670        switch(c)        switch(c)
# Line 3715  for (;;) Line 3721  for (;;)
3721    
3722        list[1] = (c & 1) == 0;        list[1] = (c & 1) == 0;
3723    
3724        rec_limit = 10000;        rec_limit = 1000;
3725        if (compare_opcodes(end, utf, cd, list, end, &rec_limit))        if (compare_opcodes(end, utf, cd, list, end, &rec_limit))
3726          {          {
3727          switch (c)          switch (c)
# Line 3979  have their offsets adjusted. That one of Line 3985  have their offsets adjusted. That one of
3985  is called, the partially compiled regex must be temporarily terminated with  is called, the partially compiled regex must be temporarily terminated with
3986  OP_END.  OP_END.
3987    
3988  This function has been extended with the possibility of forward references for  This function has been extended to cope with forward references for recursions
3989  recursions and subroutine calls. It must also check the list of such references  and subroutine calls. It must check the list of such references for the
3990  for the group we are dealing with. If it finds that one of the recursions in  group we are dealing with. If it finds that one of the recursions in the
3991  the current group is on this list, it adjusts the offset in the list, not the  current group is on this list, it does not adjust the value in the reference
3992  value in the reference (which is a group number).  (which is a group number). After the group has been scanned, all the offsets in
3993    the forward reference list for the group are adjusted.
3994    
3995  Arguments:  Arguments:
3996    group      points to the start of the group    group      points to the start of the group
# Line 3999  static void Line 4006  static void
4006  adjust_recurse(pcre_uchar *group, int adjust, BOOL utf, compile_data *cd,  adjust_recurse(pcre_uchar *group, int adjust, BOOL utf, compile_data *cd,
4007    size_t save_hwm_offset)    size_t save_hwm_offset)
4008  {  {
4009    int offset;
4010    pcre_uchar *hc;
4011  pcre_uchar *ptr = group;  pcre_uchar *ptr = group;
4012    
4013  while ((ptr = (pcre_uchar *)find_recurse(ptr, utf)) != NULL)  while ((ptr = (pcre_uchar *)find_recurse(ptr, utf)) != NULL)
4014    {    {
   int offset;  
   pcre_uchar *hc;  
   
   /* See if this recursion is on the forward reference list. If so, adjust the  
   reference. */  
   
4015    for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset; hc < cd->hwm;    for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset; hc < cd->hwm;
4016         hc += LINK_SIZE)         hc += LINK_SIZE)
4017      {      {
4018      offset = (int)GET(hc, 0);      offset = (int)GET(hc, 0);
4019      if (cd->start_code + offset == ptr + 1)      if (cd->start_code + offset == ptr + 1) break;
       {  
       PUT(hc, 0, offset + adjust);  
       break;  
       }  
4020      }      }
4021    
4022    /* Otherwise, adjust the recursion offset if it's after the start of this    /* If we have not found this recursion on the forward reference list, adjust
4023    group. */    the recursion's offset if it's after the start of this group. */
4024    
4025    if (hc >= cd->hwm)    if (hc >= cd->hwm)
4026      {      {
# Line 4031  while ((ptr = (pcre_uchar *)find_recurse Line 4030  while ((ptr = (pcre_uchar *)find_recurse
4030    
4031    ptr += 1 + LINK_SIZE;    ptr += 1 + LINK_SIZE;
4032    }    }
4033    
4034    /* Now adjust all forward reference offsets for the group. */
4035    
4036    for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset; hc < cd->hwm;
4037         hc += LINK_SIZE)
4038      {
4039      offset = (int)GET(hc, 0);
4040      PUT(hc, 0, offset + adjust);
4041      }
4042  }  }
4043    
4044    
# Line 4459  const pcre_uchar *tempptr; Line 4467  const pcre_uchar *tempptr;
4467  const pcre_uchar *nestptr = NULL;  const pcre_uchar *nestptr = NULL;
4468  pcre_uchar *previous = NULL;  pcre_uchar *previous = NULL;
4469  pcre_uchar *previous_callout = NULL;  pcre_uchar *previous_callout = NULL;
4470  size_t save_hwm_offset = 0;  size_t item_hwm_offset = 0;
4471  pcre_uint8 classbits[32];  pcre_uint8 classbits[32];
4472    
4473  /* We can fish out the UTF-8 setting once and for all into a BOOL, but we  /* We can fish out the UTF-8 setting once and for all into a BOOL, but we
# Line 4761  for (;; ptr++) Line 4769  for (;; ptr++)
4769      zeroreqchar = reqchar;      zeroreqchar = reqchar;
4770      zeroreqcharflags = reqcharflags;      zeroreqcharflags = reqcharflags;
4771      previous = code;      previous = code;
4772        item_hwm_offset = cd->hwm - cd->start_workspace;
4773      *code++ = ((options & PCRE_DOTALL) != 0)? OP_ALLANY: OP_ANY;      *code++ = ((options & PCRE_DOTALL) != 0)? OP_ALLANY: OP_ANY;
4774      break;      break;
4775    
# Line 4812  for (;; ptr++) Line 4821  for (;; ptr++)
4821      /* Handle a real character class. */      /* Handle a real character class. */
4822    
4823      previous = code;      previous = code;
4824        item_hwm_offset = cd->hwm - cd->start_workspace;
4825    
4826      /* PCRE supports POSIX class stuff inside a class. Perl gives an error if      /* PCRE supports POSIX class stuff inside a class. Perl gives an error if
4827      they are encountered at the top level, so we'll do that too. */      they are encountered at the top level, so we'll do that too. */
# Line 5518  for (;; ptr++) Line 5528  for (;; ptr++)
5528        PUT(previous, 1, (int)(code - previous));        PUT(previous, 1, (int)(code - previous));
5529        break;   /* End of class handling */        break;   /* End of class handling */
5530        }        }
 #endif  
5531    
5532      /* Even though any XCLASS list is now discarded, we must allow for      /* Even though any XCLASS list is now discarded, we must allow for
5533      its memory. */      its memory. */
5534    
5535      if (lengthptr != NULL)      if (lengthptr != NULL)
5536        *lengthptr += (int)(class_uchardata - class_uchardata_base);        *lengthptr += (int)(class_uchardata - class_uchardata_base);
5537    #endif
5538    
5539      /* If there are no characters > 255, or they are all to be included or      /* If there are no characters > 255, or they are all to be included or
5540      excluded, set the opcode to OP_CLASS or OP_NCLASS, depending on whether the      excluded, set the opcode to OP_CLASS or OP_NCLASS, depending on whether the
# Line 5924  for (;; ptr++) Line 5934  for (;; ptr++)
5934        {        {
5935        register int i;        register int i;
5936        int len = (int)(code - previous);        int len = (int)(code - previous);
5937          size_t base_hwm_offset = item_hwm_offset;
5938        pcre_uchar *bralink = NULL;        pcre_uchar *bralink = NULL;
5939        pcre_uchar *brazeroptr = NULL;        pcre_uchar *brazeroptr = NULL;
5940    
# Line 5978  for (;; ptr++) Line 5989  for (;; ptr++)
5989          if (repeat_max <= 1)    /* Covers 0, 1, and unlimited */          if (repeat_max <= 1)    /* Covers 0, 1, and unlimited */
5990            {            {
5991            *code = OP_END;            *code = OP_END;
5992            adjust_recurse(previous, 1, utf, cd, save_hwm_offset);            adjust_recurse(previous, 1, utf, cd, item_hwm_offset);
5993            memmove(previous + 1, previous, IN_UCHARS(len));            memmove(previous + 1, previous, IN_UCHARS(len));
5994            code++;            code++;
5995            if (repeat_max == 0)            if (repeat_max == 0)
# Line 6002  for (;; ptr++) Line 6013  for (;; ptr++)
6013            {            {
6014            int offset;            int offset;
6015            *code = OP_END;            *code = OP_END;
6016            adjust_recurse(previous, 2 + LINK_SIZE, utf, cd, save_hwm_offset);            adjust_recurse(previous, 2 + LINK_SIZE, utf, cd, item_hwm_offset);
6017            memmove(previous + 2 + LINK_SIZE, previous, IN_UCHARS(len));            memmove(previous + 2 + LINK_SIZE, previous, IN_UCHARS(len));
6018            code += 2 + LINK_SIZE;            code += 2 + LINK_SIZE;
6019            *previous++ = OP_BRAZERO + repeat_type;            *previous++ = OP_BRAZERO + repeat_type;
# Line 6070  for (;; ptr++) Line 6081  for (;; ptr++)
6081    
6082                while (cd->hwm > cd->start_workspace + cd->workspace_size -                while (cd->hwm > cd->start_workspace + cd->workspace_size -
6083                       WORK_SIZE_SAFETY_MARGIN -                       WORK_SIZE_SAFETY_MARGIN -
6084                       (this_hwm_offset - save_hwm_offset))                       (this_hwm_offset - base_hwm_offset))
6085                  {                  {
6086                  *errorcodeptr = expand_workspace(cd);                  *errorcodeptr = expand_workspace(cd);
6087                  if (*errorcodeptr != 0) goto FAILED;                  if (*errorcodeptr != 0) goto FAILED;
6088                  }                  }
6089    
6090                for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset;                for (hc = (pcre_uchar *)cd->start_workspace + base_hwm_offset;
6091                     hc < (pcre_uchar *)cd->start_workspace + this_hwm_offset;                     hc < (pcre_uchar *)cd->start_workspace + this_hwm_offset;
6092                     hc += LINK_SIZE)                     hc += LINK_SIZE)
6093                  {                  {
6094                  PUT(cd->hwm, 0, GET(hc, 0) + len);                  PUT(cd->hwm, 0, GET(hc, 0) + len);
6095                  cd->hwm += LINK_SIZE;                  cd->hwm += LINK_SIZE;
6096                  }                  }
6097                save_hwm_offset = this_hwm_offset;                base_hwm_offset = this_hwm_offset;
6098                code += len;                code += len;
6099                }                }
6100              }              }
# Line 6151  for (;; ptr++) Line 6162  for (;; ptr++)
6162    
6163            while (cd->hwm > cd->start_workspace + cd->workspace_size -            while (cd->hwm > cd->start_workspace + cd->workspace_size -
6164                   WORK_SIZE_SAFETY_MARGIN -                   WORK_SIZE_SAFETY_MARGIN -
6165                   (this_hwm_offset - save_hwm_offset))                   (this_hwm_offset - base_hwm_offset))
6166              {              {
6167              *errorcodeptr = expand_workspace(cd);              *errorcodeptr = expand_workspace(cd);
6168              if (*errorcodeptr != 0) goto FAILED;              if (*errorcodeptr != 0) goto FAILED;
6169              }              }
6170    
6171            for (hc = (pcre_uchar *)cd->start_workspace + save_hwm_offset;            for (hc = (pcre_uchar *)cd->start_workspace + base_hwm_offset;
6172                 hc < (pcre_uchar *)cd->start_workspace + this_hwm_offset;                 hc < (pcre_uchar *)cd->start_workspace + this_hwm_offset;
6173                 hc += LINK_SIZE)                 hc += LINK_SIZE)
6174              {              {
6175              PUT(cd->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1));              PUT(cd->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1));
6176              cd->hwm += LINK_SIZE;              cd->hwm += LINK_SIZE;
6177              }              }
6178            save_hwm_offset = this_hwm_offset;            base_hwm_offset = this_hwm_offset;
6179            code += len;            code += len;
6180            }            }
6181    
# Line 6260  for (;; ptr++) Line 6271  for (;; ptr++)
6271                {                {
6272                int nlen = (int)(code - bracode);                int nlen = (int)(code - bracode);
6273                *code = OP_END;                *code = OP_END;
6274                adjust_recurse(bracode, 1 + LINK_SIZE, utf, cd, save_hwm_offset);                adjust_recurse(bracode, 1 + LINK_SIZE, utf, cd, item_hwm_offset);
6275                memmove(bracode + 1 + LINK_SIZE, bracode, IN_UCHARS(nlen));                memmove(bracode + 1 + LINK_SIZE, bracode, IN_UCHARS(nlen));
6276                code += 1 + LINK_SIZE;                code += 1 + LINK_SIZE;
6277                nlen += 1 + LINK_SIZE;                nlen += 1 + LINK_SIZE;
# Line 6394  for (;; ptr++) Line 6405  for (;; ptr++)
6405          else          else
6406            {            {
6407            *code = OP_END;            *code = OP_END;
6408            adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm_offset);            adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, item_hwm_offset);
6409            memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));            memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));
6410            code += 1 + LINK_SIZE;            code += 1 + LINK_SIZE;
6411            len += 1 + LINK_SIZE;            len += 1 + LINK_SIZE;
# Line 6443  for (;; ptr++) Line 6454  for (;; ptr++)
6454    
6455          default:          default:
6456          *code = OP_END;          *code = OP_END;
6457          adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, save_hwm_offset);          adjust_recurse(tempcode, 1 + LINK_SIZE, utf, cd, item_hwm_offset);
6458          memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));          memmove(tempcode + 1 + LINK_SIZE, tempcode, IN_UCHARS(len));
6459          code += 1 + LINK_SIZE;          code += 1 + LINK_SIZE;
6460          len += 1 + LINK_SIZE;          len += 1 + LINK_SIZE;
# Line 6476  for (;; ptr++) Line 6487  for (;; ptr++)
6487    
6488      /* First deal with comments. Putting this code right at the start ensures      /* First deal with comments. Putting this code right at the start ensures
6489      that comments have no bad side effects. */      that comments have no bad side effects. */
6490    
6491      if (ptr[0] == CHAR_QUESTION_MARK && ptr[1] == CHAR_NUMBER_SIGN)      if (ptr[0] == CHAR_QUESTION_MARK && ptr[1] == CHAR_NUMBER_SIGN)
6492        {        {
6493        ptr += 2;        ptr += 2;
6494        while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;        while (*ptr != CHAR_NULL && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
6495        if (*ptr == CHAR_NULL)        if (*ptr == CHAR_NULL)
6496          {          {
6497          *errorcodeptr = ERR18;          *errorcodeptr = ERR18;
6498          goto FAILED;          goto FAILED;
6499          }          }
6500        continue;        continue;
6501        }        }
6502    
6503      /* Now deal with various "verbs" that can be introduced by '*'. */      /* Now deal with various "verbs" that can be introduced by '*'. */
6504    
# Line 6616  for (;; ptr++) Line 6627  for (;; ptr++)
6627      newoptions = options;      newoptions = options;
6628      skipbytes = 0;      skipbytes = 0;
6629      bravalue = OP_CBRA;      bravalue = OP_CBRA;
6630      save_hwm_offset = cd->hwm - cd->start_workspace;      item_hwm_offset = cd->hwm - cd->start_workspace;
6631      reset_bracount = FALSE;      reset_bracount = FALSE;
6632    
6633      /* Deal with the extended parentheses; all are introduced by '?', and the      /* Deal with the extended parentheses; all are introduced by '?', and the
# Line 6679  for (;; ptr++) Line 6690  for (;; ptr++)
6690          if (tempptr[1] == CHAR_QUESTION_MARK &&          if (tempptr[1] == CHAR_QUESTION_MARK &&
6691                (tempptr[2] == CHAR_EQUALS_SIGN ||                (tempptr[2] == CHAR_EQUALS_SIGN ||
6692                 tempptr[2] == CHAR_EXCLAMATION_MARK ||                 tempptr[2] == CHAR_EXCLAMATION_MARK ||
6693                 tempptr[2] == CHAR_LESS_THAN_SIGN))                   (tempptr[2] == CHAR_LESS_THAN_SIGN &&
6694                       (tempptr[3] == CHAR_EQUALS_SIGN ||
6695                        tempptr[3] == CHAR_EXCLAMATION_MARK))))
6696            {            {
6697            cd->iscondassert = TRUE;            cd->iscondassert = TRUE;
6698            break;            break;
# Line 6760  for (;; ptr++) Line 6773  for (;; ptr++)
6773              ptr++;              ptr++;
6774              }              }
6775            namelen = (int)(ptr - name);            namelen = (int)(ptr - name);
6776            if (lengthptr != NULL) *lengthptr += IMM2_SIZE;            if (lengthptr != NULL) skipbytes += IMM2_SIZE;
6777            }            }
6778    
6779          /* Check the terminator */          /* Check the terminator */
# Line 7164  for (;; ptr++) Line 7177  for (;; ptr++)
7177            number. If the name is not found, set the value to 0 for a forward            number. If the name is not found, set the value to 0 for a forward
7178            reference. */            reference. */
7179    
7180              recno = 0;
7181            ng = cd->named_groups;            ng = cd->named_groups;
7182            for (i = 0; i < cd->names_found; i++, ng++)            for (i = 0; i < cd->names_found; i++, ng++)
7183              {              {
7184              if (namelen == ng->length &&              if (namelen == ng->length &&
7185                  STRNCMP_UC_UC(name, ng->name, namelen) == 0)                  STRNCMP_UC_UC(name, ng->name, namelen) == 0)
7186                break;                {
7187                  open_capitem *oc;
7188                  recno = ng->number;
7189                  if (is_recurse) break;
7190                  for (oc = cd->open_caps; oc != NULL; oc = oc->next)
7191                    {
7192                    if (oc->number == recno)
7193                      {
7194                      oc->flag = TRUE;
7195                      break;
7196                      }
7197                    }
7198                  }
7199              }              }
           recno = (i < cd->names_found)? ng->number : 0;  
7200    
7201            /* Count named back references. */            /* Count named back references. */
7202    
# Line 7182  for (;; ptr++) Line 7207  for (;; ptr++)
7207            16-bit data item. */            16-bit data item. */
7208    
7209            *lengthptr += IMM2_SIZE;            *lengthptr += IMM2_SIZE;
7210    
7211              /* If this is a forward reference and we are within a (?|...) group,
7212              the reference may end up as the number of a group which we are
7213              currently inside, that is, it could be a recursive reference. In the
7214              real compile this will be picked up and the reference wrapped with
7215              OP_ONCE to make it atomic, so we must space in case this occurs. */
7216    
7217              /* In fact, this can happen for a non-forward reference because
7218              another group with the same number might be created later. This
7219              issue is fixed "properly" in PCRE2. As PCRE1 is now in maintenance
7220              only mode, we finesse the bug by allowing more memory always. */
7221    
7222              /* if (recno == 0) */ *lengthptr += 2 + 2*LINK_SIZE;
7223            }            }
7224    
7225          /* In the real compile, search the name table. We check the name          /* In the real compile, search the name table. We check the name
# Line 7238  for (;; ptr++) Line 7276  for (;; ptr++)
7276              {              {
7277              if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;              if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
7278              previous = code;              previous = code;
7279                item_hwm_offset = cd->hwm - cd->start_workspace;
7280              *code++ = ((options & PCRE_CASELESS) != 0)? OP_DNREFI : OP_DNREF;              *code++ = ((options & PCRE_CASELESS) != 0)? OP_DNREFI : OP_DNREF;
7281              PUT2INC(code, 0, index);              PUT2INC(code, 0, index);
7282              PUT2INC(code, 0, count);              PUT2INC(code, 0, count);
# Line 7314  for (;; ptr++) Line 7353  for (;; ptr++)
7353    
7354            recno = 0;            recno = 0;
7355            while(IS_DIGIT(*ptr))            while(IS_DIGIT(*ptr))
7356                {
7357                if (recno > INT_MAX / 10 - 1) /* Integer overflow */
7358                  {
7359                  while (IS_DIGIT(*ptr)) ptr++;
7360                  *errorcodeptr = ERR61;
7361                  goto FAILED;
7362                  }
7363              recno = recno * 10 + *ptr++ - CHAR_0;              recno = recno * 10 + *ptr++ - CHAR_0;
7364                }
7365    
7366            if (*ptr != (pcre_uchar)terminator)            if (*ptr != (pcre_uchar)terminator)
7367              {              {
# Line 7351  for (;; ptr++) Line 7398  for (;; ptr++)
7398            HANDLE_RECURSION:            HANDLE_RECURSION:
7399    
7400            previous = code;            previous = code;
7401              item_hwm_offset = cd->hwm - cd->start_workspace;
7402            called = cd->start_code;            called = cd->start_code;
7403    
7404            /* When we are actually compiling, find the bracket that is being            /* When we are actually compiling, find the bracket that is being
# Line 7552  for (;; ptr++) Line 7600  for (;; ptr++)
7600        previous = NULL;        previous = NULL;
7601        cd->iscondassert = FALSE;        cd->iscondassert = FALSE;
7602        }        }
7603      else previous = code;      else
7604          {
7605          previous = code;
7606          item_hwm_offset = cd->hwm - cd->start_workspace;
7607          }
7608    
7609      *code = bravalue;      *code = bravalue;
7610      tempcode = code;      tempcode = code;
# Line 7800  for (;; ptr++) Line 7852  for (;; ptr++)
7852          const pcre_uchar *p;          const pcre_uchar *p;
7853          pcre_uint32 cf;          pcre_uint32 cf;
7854    
7855          save_hwm_offset = cd->hwm - cd->start_workspace;   /* Normally this is set when '(' is read */          item_hwm_offset = cd->hwm - cd->start_workspace;   /* Normally this is set when '(' is read */
7856          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?          terminator = (*(++ptr) == CHAR_LESS_THAN_SIGN)?
7857            CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;            CHAR_GREATER_THAN_SIGN : CHAR_APOSTROPHE;
7858    
# Line 7868  for (;; ptr++) Line 7920  for (;; ptr++)
7920          HANDLE_REFERENCE:          HANDLE_REFERENCE:
7921          if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;          if (firstcharflags == REQ_UNSET) firstcharflags = REQ_NONE;
7922          previous = code;          previous = code;
7923            item_hwm_offset = cd->hwm - cd->start_workspace;
7924          *code++ = ((options & PCRE_CASELESS) != 0)? OP_REFI : OP_REF;          *code++ = ((options & PCRE_CASELESS) != 0)? OP_REFI : OP_REF;
7925          PUT2INC(code, 0, recno);          PUT2INC(code, 0, recno);
7926          cd->backref_map |= (recno < 32)? (1 << recno) : 1;          cd->backref_map |= (recno < 32)? (1 << recno) : 1;
# Line 7897  for (;; ptr++) Line 7950  for (;; ptr++)
7950          if (!get_ucp(&ptr, &negated, &ptype, &pdata, errorcodeptr))          if (!get_ucp(&ptr, &negated, &ptype, &pdata, errorcodeptr))
7951            goto FAILED;            goto FAILED;
7952          previous = code;          previous = code;
7953            item_hwm_offset = cd->hwm - cd->start_workspace;
7954          *code++ = ((escape == ESC_p) != negated)? OP_PROP : OP_NOTPROP;          *code++ = ((escape == ESC_p) != negated)? OP_PROP : OP_NOTPROP;
7955          *code++ = ptype;          *code++ = ptype;
7956          *code++ = pdata;          *code++ = pdata;
# Line 7937  for (;; ptr++) Line 7991  for (;; ptr++)
7991    
7992            {            {
7993            previous = (escape > ESC_b && escape < ESC_Z)? code : NULL;            previous = (escape > ESC_b && escape < ESC_Z)? code : NULL;
7994              item_hwm_offset = cd->hwm - cd->start_workspace;
7995            *code++ = (!utf && escape == ESC_C)? OP_ALLANY : escape;            *code++ = (!utf && escape == ESC_C)? OP_ALLANY : escape;
7996            }            }
7997          }          }
# Line 7980  for (;; ptr++) Line 8035  for (;; ptr++)
8035    
8036      ONE_CHAR:      ONE_CHAR:
8037      previous = code;      previous = code;
8038        item_hwm_offset = cd->hwm - cd->start_workspace;
8039    
8040      /* For caseless UTF-8 mode when UCP support is available, check whether      /* For caseless UTF-8 mode when UCP support is available, check whether
8041      this character has more than one other case. If so, generate a special      this character has more than one other case. If so, generate a special
# Line 8288  for (;;) Line 8344  for (;;)
8344        int fixed_length;        int fixed_length;
8345        *code = OP_END;        *code = OP_END;
8346        fixed_length = find_fixedlength(last_branch,  (options & PCRE_UTF8) != 0,        fixed_length = find_fixedlength(last_branch,  (options & PCRE_UTF8) != 0,
8347          FALSE, cd);          FALSE, cd, NULL);
8348        DPRINTF(("fixed length = %d\n", fixed_length));        DPRINTF(("fixed length = %d\n", fixed_length));
8349        if (fixed_length == -3)        if (fixed_length == -3)
8350          {          {
# Line 9391  if (cd->check_lookbehind) Line 9447  if (cd->check_lookbehind)
9447        int end_op = *be;        int end_op = *be;
9448        *be = OP_END;        *be = OP_END;
9449        fixed_length = find_fixedlength(cc, (re->options & PCRE_UTF8) != 0, TRUE,        fixed_length = find_fixedlength(cc, (re->options & PCRE_UTF8) != 0, TRUE,
9450          cd);          cd, NULL);
9451        *be = end_op;        *be = end_op;
9452        DPRINTF(("fixed length = %d\n", fixed_length));        DPRINTF(("fixed length = %d\n", fixed_length));
9453        if (fixed_length < 0)        if (fixed_length < 0)

Legend:
Removed from v.1538  
changed lines
  Added in v.1563

  ViewVC Help
Powered by ViewVC 1.1.5