/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 773 by ph10, Wed Nov 30 18:10:27 2011 UTC revision 835 by ph10, Wed Dec 28 16:10:09 2011 UTC
# Line 88  so this number is very generous. Line 88  so this number is very generous.
88  The same workspace is used during the second, actual compile phase for  The same workspace is used during the second, actual compile phase for
89  remembering forward references to groups so that they can be filled in at the  remembering forward references to groups so that they can be filled in at the
90  end. Each entry in this list occupies LINK_SIZE bytes, so even when LINK_SIZE  end. Each entry in this list occupies LINK_SIZE bytes, so even when LINK_SIZE
91  is 4 there is plenty of room for most patterns. However, the memory can get  is 4 there is plenty of room. */
 filled up by repetitions of forward references, for example patterns like  
 /(?1){0,1999}(b)/, and one user did hit the limit. The code has been changed so  
 that the workspace is expanded using malloc() in this situation. The value  
 below is therefore a minimum, and we put a maximum on it for safety. The  
 minimum is now also defined in terms of LINK_SIZE so that the use of malloc()  
 kicks in at the same number of forward references in all cases. */  
92    
93  #define COMPILE_WORK_SIZE (2048*LINK_SIZE)  #define COMPILE_WORK_SIZE (4096)
 #define COMPILE_WORK_SIZE_MAX (100*COMPILE_WORK_SIZE)  
94    
95  /* The overrun tests check for a slightly smaller size so that they detect the  /* The overrun tests check for a slightly smaller size so that they detect the
96  overrun before it actually does run off the end of the data block. */  overrun before it actually does run off the end of the data block. */
97    
98  #define WORK_SIZE_SAFETY_MARGIN (100)  #define WORK_SIZE_CHECK (COMPILE_WORK_SIZE - 100)
99    
100    
101  /* Table for handling escaped characters in the range '0'-'z'. Positive returns  /* Table for handling escaped characters in the range '0'-'z'. Positive returns
# Line 419  static const char error_texts[] = Line 412  static const char error_texts[] =
412    "\\k is not followed by a braced, angle-bracketed, or quoted name\0"    "\\k is not followed by a braced, angle-bracketed, or quoted name\0"
413    /* 70 */    /* 70 */
414    "internal error: unknown opcode in find_fixedlength()\0"    "internal error: unknown opcode in find_fixedlength()\0"
   "\\N is not supported in a class\0"  
   "too many forward references\0"  
415    ;    ;
416    
417  /* Table to identify digits and hex digits. This is used when compiling  /* Table to identify digits and hex digits. This is used when compiling
# Line 589  return s; Line 580  return s;
580    
581    
582  /*************************************************  /*************************************************
 *           Expand the workspace                 *  
 *************************************************/  
   
 /* This function is called during the second compiling phase, if the number of  
 forward references fills the existing workspace, which is originally a block on  
 the stack. A larger block is obtained from malloc() unless the ultimate limit  
 has been reached or the increase will be rather small.  
   
 Argument: pointer to the compile data block  
 Returns:  0 if all went well, else an error number  
 */  
   
 static int  
 expand_workspace(compile_data *cd)  
 {  
 uschar *newspace;  
 int newsize = cd->workspace_size * 2;  
   
 if (newsize > COMPILE_WORK_SIZE_MAX) newsize = COMPILE_WORK_SIZE_MAX;  
 if (cd->workspace_size >= COMPILE_WORK_SIZE_MAX ||  
     newsize - cd->workspace_size < WORK_SIZE_SAFETY_MARGIN)  
  return ERR72;  
   
 newspace = (pcre_malloc)(newsize);  
 if (newspace == NULL) return ERR21;  
   
 memcpy(newspace, cd->start_workspace, cd->workspace_size);  
 cd->hwm = (uschar *)newspace + (cd->hwm - cd->start_workspace);  
 if (cd->workspace_size > COMPILE_WORK_SIZE)  
   (pcre_free)((void *)cd->start_workspace);  
 cd->start_workspace = newspace;  
 cd->workspace_size = newsize;  
 return 0;  
 }  
   
   
   
 /*************************************************  
583  *            Check for counted repeat            *  *            Check for counted repeat            *
584  *************************************************/  *************************************************/
585    
# Line 3377  for (;; ptr++) Line 3330  for (;; ptr++)
3330  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
3331      if (code > cd->hwm) cd->hwm = code;                 /* High water info */      if (code > cd->hwm) cd->hwm = code;                 /* High water info */
3332  #endif  #endif
3333      if (code > cd->start_workspace + cd->workspace_size -      if (code > cd->start_workspace + WORK_SIZE_CHECK)   /* Check for overrun */
         WORK_SIZE_SAFETY_MARGIN)                       /* Check for overrun */  
3334        {        {
3335        *errorcodeptr = ERR52;        *errorcodeptr = ERR52;
3336        goto FAILED;        goto FAILED;
# Line 3428  for (;; ptr++) Line 3380  for (;; ptr++)
3380    /* In the real compile phase, just check the workspace used by the forward    /* In the real compile phase, just check the workspace used by the forward
3381    reference list. */    reference list. */
3382    
3383    else if (cd->hwm > cd->start_workspace + cd->workspace_size -    else if (cd->hwm > cd->start_workspace + WORK_SIZE_CHECK)
            WORK_SIZE_SAFETY_MARGIN)  
3384      {      {
3385      *errorcodeptr = ERR52;      *errorcodeptr = ERR52;
3386      goto FAILED;      goto FAILED;
# Line 3819  for (;; ptr++) Line 3770  for (;; ptr++)
3770          if (*errorcodeptr != 0) goto FAILED;          if (*errorcodeptr != 0) goto FAILED;
3771    
3772          if (-c == ESC_b) c = CHAR_BS;    /* \b is backspace in a class */          if (-c == ESC_b) c = CHAR_BS;    /* \b is backspace in a class */
         else if (-c == ESC_N)            /* \N is not supported in a class */  
           {  
           *errorcodeptr = ERR71;  
           goto FAILED;  
           }  
3773          else if (-c == ESC_Q)            /* Handle start of quoted string */          else if (-c == ESC_Q)            /* Handle start of quoted string */
3774            {            {
3775            if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)            if (ptr[1] == CHAR_BACKSLASH && ptr[2] == CHAR_E)
# Line 4931  for (;; ptr++) Line 4877  for (;; ptr++)
4877              *lengthptr += delta;              *lengthptr += delta;
4878              }              }
4879    
4880            /* This is compiling for real. If there is a set first byte for            /* This is compiling for real */
           the group, and we have not yet set a "required byte", set it. Make  
           sure there is enough workspace for copying forward references before  
           doing the copy. */  
4881    
4882            else            else
4883              {              {
4884              if (groupsetfirstbyte && reqbyte < 0) reqbyte = firstbyte;              if (groupsetfirstbyte && reqbyte < 0) reqbyte = firstbyte;
   
4885              for (i = 1; i < repeat_min; i++)              for (i = 1; i < repeat_min; i++)
4886                {                {
4887                uschar *hc;                uschar *hc;
4888                uschar *this_hwm = cd->hwm;                uschar *this_hwm = cd->hwm;
4889                memcpy(code, previous, len);                memcpy(code, previous, len);
   
               while (cd->hwm > cd->start_workspace + cd->workspace_size -  
                      WORK_SIZE_SAFETY_MARGIN - (this_hwm - save_hwm))  
                 {  
                 int save_offset = save_hwm - cd->start_workspace;  
                 int this_offset = this_hwm - cd->start_workspace;  
                 *errorcodeptr = expand_workspace(cd);  
                 if (*errorcodeptr != 0) goto FAILED;  
                 save_hwm = (uschar *)cd->start_workspace + save_offset;  
                 this_hwm = (uschar *)cd->start_workspace + this_offset;  
                 }  
   
4890                for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)                for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)
4891                  {                  {
4892                  PUT(cd->hwm, 0, GET(hc, 0) + len);                  PUT(cd->hwm, 0, GET(hc, 0) + len);
# Line 4986  for (;; ptr++) Line 4916  for (;; ptr++)
4916          add 2 + 2*LINKSIZE to allow for the nesting that occurs. Do some          add 2 + 2*LINKSIZE to allow for the nesting that occurs. Do some
4917          paranoid checks to avoid integer overflow. The INT64_OR_DOUBLE type is          paranoid checks to avoid integer overflow. The INT64_OR_DOUBLE type is
4918          a 64-bit integer type when available, otherwise double. */          a 64-bit integer type when available, otherwise double. */
4919    
4920          if (lengthptr != NULL && repeat_max > 0)          if (lengthptr != NULL && repeat_max > 0)
4921            {            {
4922            int delta = repeat_max * (length_prevgroup + 1 + 2 + 2*LINK_SIZE) -            int delta = repeat_max * (length_prevgroup + 1 + 2 + 2*LINK_SIZE) -
# Line 5024  for (;; ptr++) Line 4954  for (;; ptr++)
4954              }              }
4955    
4956            memcpy(code, previous, len);            memcpy(code, previous, len);
   
           /* Ensure there is enough workspace for forward references before  
           copying them. */  
   
           while (cd->hwm > cd->start_workspace + cd->workspace_size -  
                  WORK_SIZE_SAFETY_MARGIN - (this_hwm - save_hwm))  
             {  
             int save_offset = save_hwm - cd->start_workspace;  
             int this_offset = this_hwm - cd->start_workspace;  
             *errorcodeptr = expand_workspace(cd);  
             if (*errorcodeptr != 0) goto FAILED;  
             save_hwm = (uschar *)cd->start_workspace + save_offset;  
             this_hwm = (uschar *)cd->start_workspace + this_offset;  
             }  
   
4957            for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)            for (hc = save_hwm; hc < this_hwm; hc += LINK_SIZE)
4958              {              {
4959              PUT(cd->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1));              PUT(cd->hwm, 0, GET(hc, 0) + len + ((i != 0)? 2+LINK_SIZE : 1));
# Line 6056  for (;; ptr++) Line 5971  for (;; ptr++)
5971                /* Fudge the value of "called" so that when it is inserted as an                /* Fudge the value of "called" so that when it is inserted as an
5972                offset below, what it actually inserted is the reference number                offset below, what it actually inserted is the reference number
5973                of the group. Then remember the forward reference. */                of the group. Then remember the forward reference. */
5974    
5975                called = cd->start_code + recno;                called = cd->start_code + recno;
               if (cd->hwm >= cd->start_workspace + cd->workspace_size -  
                   WORK_SIZE_SAFETY_MARGIN)  
                 {  
                 *errorcodeptr = expand_workspace(cd);  
                 if (*errorcodeptr != 0) goto FAILED;  
                 }  
5976                PUTINC(cd->hwm, 0, (int)(code + 1 - cd->start_code));                PUTINC(cd->hwm, 0, (int)(code + 1 - cd->start_code));
5977                }                }
5978    
# Line 6084  for (;; ptr++) Line 5993  for (;; ptr++)
5993                }                }
5994              }              }
5995    
5996            /* Insert the recursion/subroutine item. It does not have a set first            /* Insert the recursion/subroutine item. */
           byte (relevant if it is repeated, because it will then be wrapped  
           with ONCE brackets). */  
5997    
5998            *code = OP_RECURSE;            *code = OP_RECURSE;
5999            PUT(code, 1, (int)(called - cd->start_code));            PUT(code, 1, (int)(called - cd->start_code));
6000            code += 1 + LINK_SIZE;            code += 1 + LINK_SIZE;
           groupsetfirstbyte = FALSE;  
6001            }            }
6002    
6003          /* Can't determine a first byte now */          /* Can't determine a first byte now */
# Line 7314  compile_data *cd = &compile_block; Line 7220  compile_data *cd = &compile_block;
7220  computing the amount of memory that is needed. Compiled items are thrown away  computing the amount of memory that is needed. Compiled items are thrown away
7221  as soon as possible, so that a fairly large buffer should be sufficient for  as soon as possible, so that a fairly large buffer should be sufficient for
7222  this purpose. The same space is used in the second phase for remembering where  this purpose. The same space is used in the second phase for remembering where
7223  to fill in forward references to subpatterns. That may overflow, in which case  to fill in forward references to subpatterns. */
 new memory is obtained from malloc(). */  
7224    
7225  uschar cworkspace[COMPILE_WORK_SIZE];  uschar cworkspace[COMPILE_WORK_SIZE];
7226    
# Line 7505  cd->bracount = cd->final_bracount = 0; Line 7410  cd->bracount = cd->final_bracount = 0;
7410  cd->names_found = 0;  cd->names_found = 0;
7411  cd->name_entry_size = 0;  cd->name_entry_size = 0;
7412  cd->name_table = NULL;  cd->name_table = NULL;
7413    cd->start_workspace = cworkspace;
7414  cd->start_code = cworkspace;  cd->start_code = cworkspace;
7415  cd->hwm = cworkspace;  cd->hwm = cworkspace;
 cd->start_workspace = cworkspace;  
 cd->workspace_size = COMPILE_WORK_SIZE;  
7416  cd->start_pattern = (const uschar *)pattern;  cd->start_pattern = (const uschar *)pattern;
7417  cd->end_pattern = (const uschar *)(pattern + strlen(pattern));  cd->end_pattern = (const uschar *)(pattern + strlen(pattern));
7418  cd->req_varyopt = 0;  cd->req_varyopt = 0;
# Line 7586  cd->names_found = 0; Line 7490  cd->names_found = 0;
7490  cd->name_table = (uschar *)re + re->name_table_offset;  cd->name_table = (uschar *)re + re->name_table_offset;
7491  codestart = cd->name_table + re->name_entry_size * re->name_count;  codestart = cd->name_table + re->name_entry_size * re->name_count;
7492  cd->start_code = codestart;  cd->start_code = codestart;
7493  cd->hwm = (uschar *)(cd->start_workspace);  cd->hwm = cworkspace;
7494  cd->req_varyopt = 0;  cd->req_varyopt = 0;
7495  cd->had_accept = FALSE;  cd->had_accept = FALSE;
7496  cd->check_lookbehind = FALSE;  cd->check_lookbehind = FALSE;
# Line 7620  if debugging, leave the test till after Line 7524  if debugging, leave the test till after
7524  if (code - codestart > length) errorcode = ERR23;  if (code - codestart > length) errorcode = ERR23;
7525  #endif  #endif
7526    
7527  /* Fill in any forward references that are required. There may be repeated  /* Fill in any forward references that are required. */
 references; optimize for them, as searching a large regex takes time. */  
7528    
7529  if (cd->hwm > cd->start_workspace)  while (errorcode == 0 && cd->hwm > cworkspace)
7530    {    {
7531    int prev_recno = -1;    int offset, recno;
7532    const uschar *groupptr = NULL;    const uschar *groupptr;
7533    while (errorcode == 0 && cd->hwm > cd->start_workspace)    cd->hwm -= LINK_SIZE;
7534      {    offset = GET(cd->hwm, 0);
7535      int offset, recno;    recno = GET(codestart, offset);
7536      cd->hwm -= LINK_SIZE;    groupptr = _pcre_find_bracket(codestart, utf8, recno);
7537      offset = GET(cd->hwm, 0);    if (groupptr == NULL) errorcode = ERR53;
7538      recno = GET(codestart, offset);      else PUT(((uschar *)codestart), offset, (int)(groupptr - codestart));
7539      if (recno != prev_recno)    }
       {  
       groupptr = _pcre_find_bracket(codestart, utf8, recno);  
       prev_recno = recno;  
       }  
     if (groupptr == NULL) errorcode = ERR53;  
       else PUT(((uschar *)codestart), offset, (int)(groupptr - codestart));  
     }  
   }  
   
 /* If the workspace had to be expanded, free the new memory. */  
   
 if (cd->workspace_size > COMPILE_WORK_SIZE)  
   (pcre_free)((void *)cd->start_workspace);  
7540    
7541  /* Give an error if there's back reference to a non-existent capturing  /* Give an error if there's back reference to a non-existent capturing
7542  subpattern. */  subpattern. */

Legend:
Removed from v.773  
changed lines
  Added in v.835

  ViewVC Help
Powered by ViewVC 1.1.5