/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 461 by ph10, Mon Oct 5 10:59:35 2009 UTC revision 488 by ph10, Mon Jan 11 15:29:42 2010 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2009 University of Cambridge             Copyright (c) 1997-2010 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 53  supporting internal functions that are n Line 53  supporting internal functions that are n
53  #include "pcre_internal.h"  #include "pcre_internal.h"
54    
55    
56  /* When DEBUG is defined, we need the pcre_printint() function, which is also  /* When PCRE_DEBUG is defined, we need the pcre_printint() function, which is
57  used by pcretest. DEBUG is not defined when building a production library. */  also used by pcretest. PCRE_DEBUG is not defined when building a production
58    library. */
59    
60  #ifdef DEBUG  #ifdef PCRE_DEBUG
61  #include "pcre_printint.src"  #include "pcre_printint.src"
62  #endif  #endif
63    
# Line 1994  static BOOL Line 1995  static BOOL
1995  could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr,  could_be_empty(const uschar *code, const uschar *endcode, branch_chain *bcptr,
1996    BOOL utf8)    BOOL utf8)
1997  {  {
1998  while (bcptr != NULL && bcptr->current >= code)  while (bcptr != NULL && bcptr->current_branch >= code)
1999    {    {
2000    if (!could_be_empty_branch(bcptr->current, endcode, utf8)) return FALSE;    if (!could_be_empty_branch(bcptr->current_branch, endcode, utf8))
2001        return FALSE;
2002    bcptr = bcptr->outer;    bcptr = bcptr->outer;
2003    }    }
2004  return TRUE;  return TRUE;
# Line 2658  BOOL utf8 = FALSE; Line 2660  BOOL utf8 = FALSE;
2660  uschar *utf8_char = NULL;  uschar *utf8_char = NULL;
2661  #endif  #endif
2662    
2663  #ifdef DEBUG  #ifdef PCRE_DEBUG
2664  if (lengthptr != NULL) DPRINTF((">> start branch\n"));  if (lengthptr != NULL) DPRINTF((">> start branch\n"));
2665  #endif  #endif
2666    
# Line 2717  for (;; ptr++) Line 2719  for (;; ptr++)
2719    
2720    if (lengthptr != NULL)    if (lengthptr != NULL)
2721      {      {
2722  #ifdef DEBUG  #ifdef PCRE_DEBUG
2723      if (code > cd->hwm) cd->hwm = code;                 /* High water info */      if (code > cd->hwm) cd->hwm = code;                 /* High water info */
2724  #endif  #endif
2725      if (code > cd->start_workspace + COMPILE_WORK_SIZE) /* Check for overrun */      if (code > cd->start_workspace + COMPILE_WORK_SIZE) /* Check for overrun */
# Line 4213  we set the flag only if there is a liter Line 4215  we set the flag only if there is a liter
4215            {            {
4216            /* In the pre-compile phase, we don't actually do the replication. We            /* In the pre-compile phase, we don't actually do the replication. We
4217            just adjust the length as if we had. Do some paranoid checks for            just adjust the length as if we had. Do some paranoid checks for
4218            potential integer overflow. */            potential integer overflow. The INT64_OR_DOUBLE type is a 64-bit
4219              integer type when available, otherwise double. */
4220    
4221            if (lengthptr != NULL)            if (lengthptr != NULL)
4222              {              {
4223              int delta = (repeat_min - 1)*length_prevgroup;              int delta = (repeat_min - 1)*length_prevgroup;
4224              if ((double)(repeat_min - 1)*(double)length_prevgroup >              if ((INT64_OR_DOUBLE)(repeat_min - 1)*
4225                                                              (double)INT_MAX ||                    (INT64_OR_DOUBLE)length_prevgroup >
4226                        (INT64_OR_DOUBLE)INT_MAX ||
4227                  OFLOW_MAX - *lengthptr < delta)                  OFLOW_MAX - *lengthptr < delta)
4228                {                {
4229                *errorcodeptr = ERR20;                *errorcodeptr = ERR20;
# Line 4265  we set the flag only if there is a liter Line 4269  we set the flag only if there is a liter
4269          just adjust the length as if we had. For each repetition we must add 1          just adjust the length as if we had. For each repetition we must add 1
4270          to the length for BRAZERO and for all but the last repetition we must          to the length for BRAZERO and for all but the last repetition we must
4271          add 2 + 2*LINKSIZE to allow for the nesting that occurs. Do some          add 2 + 2*LINKSIZE to allow for the nesting that occurs. Do some
4272          paranoid checks to avoid integer overflow. */          paranoid checks to avoid integer overflow. The INT64_OR_DOUBLE type is
4273            a 64-bit integer type when available, otherwise double. */
4274    
4275          if (lengthptr != NULL && repeat_max > 0)          if (lengthptr != NULL && repeat_max > 0)
4276            {            {
4277            int delta = repeat_max * (length_prevgroup + 1 + 2 + 2*LINK_SIZE) -            int delta = repeat_max * (length_prevgroup + 1 + 2 + 2*LINK_SIZE) -
4278                        2 - 2*LINK_SIZE;   /* Last one doesn't nest */                        2 - 2*LINK_SIZE;   /* Last one doesn't nest */
4279            if ((double)repeat_max *            if ((INT64_OR_DOUBLE)repeat_max *
4280                  (double)(length_prevgroup + 1 + 2 + 2*LINK_SIZE)                  (INT64_OR_DOUBLE)(length_prevgroup + 1 + 2 + 2*LINK_SIZE)
4281                    > (double)INT_MAX ||                    > (INT64_OR_DOUBLE)INT_MAX ||
4282                OFLOW_MAX - *lengthptr < delta)                OFLOW_MAX - *lengthptr < delta)
4283              {              {
4284              *errorcodeptr = ERR20;              *errorcodeptr = ERR20;
# Line 5248  we set the flag only if there is a liter Line 5253  we set the flag only if there is a liter
5253              {              {
5254              cd->external_options = newoptions;              cd->external_options = newoptions;
5255              }              }
5256           else            else
5257              {              {
5258              if ((options & PCRE_IMS) != (newoptions & PCRE_IMS))              if ((options & PCRE_IMS) != (newoptions & PCRE_IMS))
5259                {                {
# Line 5585  we set the flag only if there is a liter Line 5590  we set the flag only if there is a liter
5590    
5591        if (-c >= ESC_REF)        if (-c >= ESC_REF)
5592          {          {
5593            open_capitem *oc;
5594          recno = -c - ESC_REF;          recno = -c - ESC_REF;
5595    
5596          HANDLE_REFERENCE:    /* Come here from named backref handling */          HANDLE_REFERENCE:    /* Come here from named backref handling */
# Line 5594  we set the flag only if there is a liter Line 5600  we set the flag only if there is a liter
5600          PUT2INC(code, 0, recno);          PUT2INC(code, 0, recno);
5601          cd->backref_map |= (recno < 32)? (1 << recno) : 1;          cd->backref_map |= (recno < 32)? (1 << recno) : 1;
5602          if (recno > cd->top_backref) cd->top_backref = recno;          if (recno > cd->top_backref) cd->top_backref = recno;
5603    
5604            /* Check to see if this back reference is recursive, that it, it
5605            is inside the group that it references. A flag is set so that the
5606            group can be made atomic. */
5607    
5608            for (oc = cd->open_caps; oc != NULL; oc = oc->next)
5609              {
5610              if (oc->number == recno)
5611                {
5612                oc->flag = TRUE;
5613                break;
5614                }
5615              }
5616          }          }
5617    
5618        /* So are Unicode property matches, if supported. */        /* So are Unicode property matches, if supported. */
# Line 5783  int branchfirstbyte, branchreqbyte; Line 5802  int branchfirstbyte, branchreqbyte;
5802  int length;  int length;
5803  int orig_bracount;  int orig_bracount;
5804  int max_bracount;  int max_bracount;
5805    int old_external_options = cd->external_options;
5806  branch_chain bc;  branch_chain bc;
5807    
5808  bc.outer = bcptr;  bc.outer = bcptr;
5809  bc.current = code;  bc.current_branch = code;
5810    
5811  firstbyte = reqbyte = REQ_UNSET;  firstbyte = reqbyte = REQ_UNSET;
5812    
# Line 5805  them global. It tests the value of lengt Line 5825  them global. It tests the value of lengt
5825  pre-compile phase to find out whether anything has yet been compiled or not. */  pre-compile phase to find out whether anything has yet been compiled or not. */
5826    
5827  /* If this is a capturing subpattern, add to the chain of open capturing items  /* If this is a capturing subpattern, add to the chain of open capturing items
5828  so that we can detect them if (*ACCEPT) is encountered. */  so that we can detect them if (*ACCEPT) is encountered. This is also used to
5829    detect groups that contain recursive back references to themselves. */
5830    
5831  if (*code == OP_CBRA)  if (*code == OP_CBRA)
5832    {    {
5833    capnumber = GET2(code, 1 + LINK_SIZE);    capnumber = GET2(code, 1 + LINK_SIZE);
5834    capitem.number = capnumber;    capitem.number = capnumber;
5835    capitem.next = cd->open_caps;    capitem.next = cd->open_caps;
5836      capitem.flag = FALSE;
5837    cd->open_caps = &capitem;    cd->open_caps = &capitem;
5838    }    }
5839    
# Line 5859  for (;;) Line 5881  for (;;)
5881      return FALSE;      return FALSE;
5882      }      }
5883    
5884      /* If the external options have changed during this branch, it means that we
5885      are at the top level, and a leading option setting has been encountered. We
5886      need to re-set the original option values to take account of this so that,
5887      during the pre-compile phase, we know to allow for a re-set at the start of
5888      subsequent branches. */
5889    
5890      if (old_external_options != cd->external_options)
5891        oldims = cd->external_options & PCRE_IMS;
5892    
5893    /* Keep the highest bracket count in case (?| was used and some branch    /* Keep the highest bracket count in case (?| was used and some branch
5894    has fewer than the rest. */    has fewer than the rest. */
5895    
# Line 5959  for (;;) Line 5990  for (;;)
5990        while (branch_length > 0);        while (branch_length > 0);
5991        }        }
5992    
     /* If it was a capturing subpattern, remove it from the chain. */  
   
     if (capnumber > 0) cd->open_caps = cd->open_caps->next;  
   
5993      /* Fill in the ket */      /* Fill in the ket */
5994    
5995      *code = OP_KET;      *code = OP_KET;
5996      PUT(code, 1, code - start_bracket);      PUT(code, 1, code - start_bracket);
5997      code += 1 + LINK_SIZE;      code += 1 + LINK_SIZE;
5998    
5999      /* Resetting option if needed */      /* If it was a capturing subpattern, check to see if it contained any
6000        recursive back references. If so, we must wrap it in atomic brackets.
6001        In any event, remove the block from the chain. */
6002    
6003        if (capnumber > 0)
6004          {
6005          if (cd->open_caps->flag)
6006            {
6007            memmove(start_bracket + 1 + LINK_SIZE, start_bracket,
6008              code - start_bracket);
6009            *start_bracket = OP_ONCE;
6010            code += 1 + LINK_SIZE;
6011            PUT(start_bracket, 1, code - start_bracket);
6012            *code = OP_KET;
6013            PUT(code, 1, code - start_bracket);
6014            code += 1 + LINK_SIZE;
6015            length += 2 + 2*LINK_SIZE;
6016            }
6017          cd->open_caps = cd->open_caps->next;
6018          }
6019    
6020        /* Reset options if needed. */
6021    
6022      if ((options & PCRE_IMS) != oldims && *ptr == CHAR_RIGHT_PARENTHESIS)      if ((options & PCRE_IMS) != oldims && *ptr == CHAR_RIGHT_PARENTHESIS)
6023        {        {
# Line 6018  for (;;) Line 6066  for (;;)
6066      {      {
6067      *code = OP_ALT;      *code = OP_ALT;
6068      PUT(code, 1, code - last_branch);      PUT(code, 1, code - last_branch);
6069      bc.current = last_branch = code;      bc.current_branch = last_branch = code;
6070      code += 1 + LINK_SIZE;      code += 1 + LINK_SIZE;
6071      }      }
6072    
# Line 6434  while (ptr[skipatstart] == CHAR_LEFT_PAR Line 6482  while (ptr[skipatstart] == CHAR_LEFT_PAR
6482    
6483  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
6484  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0 &&
6485       (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)       (*erroroffset = _pcre_valid_utf8((USPTR)pattern, -1)) >= 0)
6486    {    {
6487    errorcode = ERR44;    errorcode = ERR44;
6488    goto PCRE_EARLY_ERROR_RETURN2;    goto PCRE_EARLY_ERROR_RETURN2;
# Line 6631  if debugging, leave the test till after Line 6679  if debugging, leave the test till after
6679    
6680  *code++ = OP_END;  *code++ = OP_END;
6681    
6682  #ifndef DEBUG  #ifndef PCRE_DEBUG
6683  if (code - codestart > length) errorcode = ERR23;  if (code - codestart > length) errorcode = ERR23;
6684  #endif  #endif
6685    
# Line 6755  if (reqbyte >= 0 && Line 6803  if (reqbyte >= 0 &&
6803  /* Print out the compiled data if debugging is enabled. This is never the  /* Print out the compiled data if debugging is enabled. This is never the
6804  case when building a production library. */  case when building a production library. */
6805    
6806  #ifdef DEBUG  #ifdef PCRE_DEBUG
6807    
6808  printf("Length = %d top_bracket = %d top_backref = %d\n",  printf("Length = %d top_bracket = %d top_backref = %d\n",
6809    length, re->top_bracket, re->top_backref);    length, re->top_bracket, re->top_backref);
# Line 6793  if (code - codestart > length) Line 6841  if (code - codestart > length)
6841    if (errorcodeptr != NULL) *errorcodeptr = ERR23;    if (errorcodeptr != NULL) *errorcodeptr = ERR23;
6842    return NULL;    return NULL;
6843    }    }
6844  #endif   /* DEBUG */  #endif   /* PCRE_DEBUG */
6845    
6846  return (pcre *)re;  return (pcre *)re;
6847  }  }

Legend:
Removed from v.461  
changed lines
  Added in v.488

  ViewVC Help
Powered by ViewVC 1.1.5