/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1348 by ph10, Fri Jul 5 10:38:37 2013 UTC revision 1352 by ph10, Mon Jul 29 15:49:21 2013 UTC
# Line 2361  Returns:      TRUE if what is matched co Line 2361  Returns:      TRUE if what is matched co
2361  typedef struct recurse_check {  typedef struct recurse_check {
2362    struct recurse_check *prev;    struct recurse_check *prev;
2363    const pcre_uchar *group;    const pcre_uchar *group;
2364  } recurse_check;  } recurse_check;
2365    
2366  static BOOL  static BOOL
2367  could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,  could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,
# Line 2377  for (code = first_significant_code(code Line 2377  for (code = first_significant_code(code
2377    const pcre_uchar *ccode;    const pcre_uchar *ccode;
2378    
2379    c = *code;    c = *code;
2380    
2381    /* Skip over forward assertions; the other assertions are skipped by    /* Skip over forward assertions; the other assertions are skipped by
2382    first_significant_code() with a TRUE final argument. */    first_significant_code() with a TRUE final argument. */
2383    
# Line 2405  for (code = first_significant_code(code Line 2405  for (code = first_significant_code(code
2405      NULL. */      NULL. */
2406    
2407      if (cd->start_workspace != NULL)      if (cd->start_workspace != NULL)
2408        {        {
2409        const pcre_uchar *tcode;        const pcre_uchar *tcode;
2410        for (tcode = cd->start_workspace; tcode < cd->hwm; tcode += LINK_SIZE)        for (tcode = cd->start_workspace; tcode < cd->hwm; tcode += LINK_SIZE)
2411          if ((int)GET(tcode, 0) == (int)(code + 1 - cd->start_code)) return TRUE;          if ((int)GET(tcode, 0) == (int)(code + 1 - cd->start_code)) return TRUE;
2412        if (GET(scode, 1) == 0) return TRUE;    /* Unclosed */        if (GET(scode, 1) == 0) return TRUE;    /* Unclosed */
2413        }        }
2414    
2415      /* If we are scanning a completed pattern, there are no forward references      /* If we are scanning a completed pattern, there are no forward references
2416      and all groups are complete. We need to detect whether this is a recursive      and all groups are complete. We need to detect whether this is a recursive
2417      call, as otherwise there will be an infinite loop. If it is a recursion,      call, as otherwise there will be an infinite loop. If it is a recursion,
2418      just skip over it. Simple recursions are easily detected. For mutual      just skip over it. Simple recursions are easily detected. For mutual
2419      recursions we keep a chain on the stack. */      recursions we keep a chain on the stack. */
2420    
2421      else      else
2422        {        {
2423        recurse_check *r = recurses;        recurse_check *r = recurses;
2424        const pcre_uchar *endgroup = scode;        const pcre_uchar *endgroup = scode;
2425    
2426        do endgroup += GET(endgroup, 1); while (*endgroup == OP_ALT);        do endgroup += GET(endgroup, 1); while (*endgroup == OP_ALT);
2427        if (code >= scode && code <= endgroup) continue;  /* Simple recursion */        if (code >= scode && code <= endgroup) continue;  /* Simple recursion */
2428    
2429        for (r = recurses; r != NULL; r = r->prev)        for (r = recurses; r != NULL; r = r->prev)
2430          if (r->group == scode) break;          if (r->group == scode) break;
2431        if (r != NULL) continue;   /* Mutual recursion */        if (r != NULL) continue;   /* Mutual recursion */
# Line 2436  for (code = first_significant_code(code Line 2436  for (code = first_significant_code(code
2436    
2437      empty_branch = FALSE;      empty_branch = FALSE;
2438      this_recurse.prev = recurses;      this_recurse.prev = recurses;
2439      this_recurse.group = scode;      this_recurse.group = scode;
2440    
2441      do      do
2442        {        {
2443        if (could_be_empty_branch(scode, endcode, utf, cd, &this_recurse))        if (could_be_empty_branch(scode, endcode, utf, cd, &this_recurse))
# Line 2557  for (code = first_significant_code(code Line 2557  for (code = first_significant_code(code
2557      case OP_ANY:      case OP_ANY:
2558      case OP_ALLANY:      case OP_ALLANY:
2559      case OP_ANYBYTE:      case OP_ANYBYTE:
2560    
2561      case OP_PROP:      case OP_PROP:
2562      case OP_NOTPROP:      case OP_NOTPROP:
2563      case OP_ANYNL:      case OP_ANYNL:
2564    
2565      case OP_NOT_HSPACE:      case OP_NOT_HSPACE:
2566      case OP_HSPACE:      case OP_HSPACE:
2567      case OP_NOT_VSPACE:      case OP_NOT_VSPACE:
2568      case OP_VSPACE:      case OP_VSPACE:
2569      case OP_EXTUNI:      case OP_EXTUNI:
2570    
2571      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
2572      case OP_DIGIT:      case OP_DIGIT:
2573      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
2574      case OP_WHITESPACE:      case OP_WHITESPACE:
2575      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
2576      case OP_WORDCHAR:      case OP_WORDCHAR:
2577    
2578      case OP_CHAR:      case OP_CHAR:
2579      case OP_CHARI:      case OP_CHARI:
2580      case OP_NOT:      case OP_NOT:
2581      case OP_NOTI:      case OP_NOTI:
2582    
2583      case OP_PLUS:      case OP_PLUS:
2584      case OP_PLUSI:      case OP_PLUSI:
2585      case OP_MINPLUS:      case OP_MINPLUS:
# Line 2589  for (code = first_significant_code(code Line 2589  for (code = first_significant_code(code
2589      case OP_NOTPLUSI:      case OP_NOTPLUSI:
2590      case OP_NOTMINPLUS:      case OP_NOTMINPLUS:
2591      case OP_NOTMINPLUSI:      case OP_NOTMINPLUSI:
2592    
2593      case OP_POSPLUS:      case OP_POSPLUS:
2594      case OP_POSPLUSI:      case OP_POSPLUSI:
2595      case OP_NOTPOSPLUS:      case OP_NOTPOSPLUS:
2596      case OP_NOTPOSPLUSI:      case OP_NOTPOSPLUSI:
2597    
2598      case OP_EXACT:      case OP_EXACT:
2599      case OP_EXACTI:      case OP_EXACTI:
2600      case OP_NOTEXACT:      case OP_NOTEXACT:
2601      case OP_NOTEXACTI:      case OP_NOTEXACTI:
2602    
2603      case OP_TYPEPLUS:      case OP_TYPEPLUS:
2604      case OP_TYPEMINPLUS:      case OP_TYPEMINPLUS:
2605      case OP_TYPEPOSPLUS:      case OP_TYPEPOSPLUS:
2606      case OP_TYPEEXACT:      case OP_TYPEEXACT:
2607    
2608      return FALSE;      return FALSE;
2609    
2610      /* These are going to continue, as they may be empty, but we have to      /* These are going to continue, as they may be empty, but we have to
# Line 2644  for (code = first_significant_code(code Line 2644  for (code = first_significant_code(code
2644  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2645      case OP_STAR:      case OP_STAR:
2646      case OP_STARI:      case OP_STARI:
2647      case OP_NOTSTAR:      case OP_NOTSTAR:
2648      case OP_NOTSTARI:      case OP_NOTSTARI:
2649    
2650      case OP_MINSTAR:      case OP_MINSTAR:
2651      case OP_MINSTARI:      case OP_MINSTARI:
2652      case OP_NOTMINSTAR:      case OP_NOTMINSTAR:
2653      case OP_NOTMINSTARI:      case OP_NOTMINSTARI:
2654    
2655      case OP_POSSTAR:      case OP_POSSTAR:
2656      case OP_POSSTARI:      case OP_POSSTARI:
2657      case OP_NOTPOSSTAR:      case OP_NOTPOSSTAR:
2658      case OP_NOTPOSSTARI:      case OP_NOTPOSSTARI:
2659    
2660      case OP_QUERY:      case OP_QUERY:
2661      case OP_QUERYI:      case OP_QUERYI:
2662      case OP_NOTQUERY:      case OP_NOTQUERY:
2663      case OP_NOTQUERYI:      case OP_NOTQUERYI:
2664    
2665      case OP_MINQUERY:      case OP_MINQUERY:
2666      case OP_MINQUERYI:      case OP_MINQUERYI:
2667      case OP_NOTMINQUERY:      case OP_NOTMINQUERY:
2668      case OP_NOTMINQUERYI:      case OP_NOTMINQUERYI:
2669    
2670      case OP_POSQUERY:      case OP_POSQUERY:
2671      case OP_POSQUERYI:      case OP_POSQUERYI:
2672      case OP_NOTPOSQUERY:      case OP_NOTPOSQUERY:
2673      case OP_NOTPOSQUERYI:      case OP_NOTPOSQUERYI:
2674    
2675      if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]);      if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]);
2676      break;      break;
2677    
2678      case OP_UPTO:      case OP_UPTO:
2679      case OP_UPTOI:      case OP_UPTOI:
2680      case OP_NOTUPTO:      case OP_NOTUPTO:
2681      case OP_NOTUPTOI:      case OP_NOTUPTOI:
2682    
2683      case OP_MINUPTO:      case OP_MINUPTO:
2684      case OP_MINUPTOI:      case OP_MINUPTOI:
2685      case OP_NOTMINUPTO:      case OP_NOTMINUPTO:
2686      case OP_NOTMINUPTOI:      case OP_NOTMINUPTOI:
2687    
2688      case OP_POSUPTO:      case OP_POSUPTO:
2689      case OP_POSUPTOI:      case OP_POSUPTOI:
2690      case OP_NOTPOSUPTO:      case OP_NOTPOSUPTO:
2691      case OP_NOTPOSUPTOI:      case OP_NOTPOSUPTOI:
2692    
2693      if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]);      if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]);
2694      break;      break;
2695  #endif  #endif
# Line 3754  to find out the amount of memory needed, Line 3754  to find out the amount of memory needed,
3754  phase. The value of lengthptr distinguishes the two phases.  phase. The value of lengthptr distinguishes the two phases.
3755    
3756  Arguments:  Arguments:
3757    optionsptr     pointer to the option bits    optionsptr        pointer to the option bits
3758    codeptr        points to the pointer to the current code point    codeptr           points to the pointer to the current code point
3759    ptrptr         points to the current pattern pointer    ptrptr            points to the current pattern pointer
3760    errorcodeptr   points to error code variable    errorcodeptr      points to error code variable
3761    firstcharptr    place to put the first required character    firstcharptr      place to put the first required character
3762    firstcharflagsptr place to put the first character flags, or a negative number    firstcharflagsptr place to put the first character flags, or a negative number
3763    reqcharptr     place to put the last required character    reqcharptr        place to put the last required character
3764    reqcharflagsptr place to put the last required character flags, or a negative number    reqcharflagsptr   place to put the last required character flags, or a negative number
3765    bcptr          points to current branch chain    bcptr             points to current branch chain
3766    cond_depth     conditional nesting depth    cond_depth        conditional nesting depth
3767    cd             contains pointers to tables etc.    cd                contains pointers to tables etc.
3768    lengthptr      NULL during the real compile phase    lengthptr         NULL during the real compile phase
3769                   points to length accumulator during pre-compile phase                      points to length accumulator during pre-compile phase
3770    
3771  Returns:         TRUE on success  Returns:            TRUE on success
3772                   FALSE, with *errorcodeptr set non-zero on error                      FALSE, with *errorcodeptr set non-zero on error
3773  */  */
3774    
3775  static BOOL  static BOOL
# Line 7058  for (;; ptr++) Line 7058  for (;; ptr++)
7058          *code++ = OP_PROP;          *code++ = OP_PROP;
7059          *code++ = PT_CLIST;          *code++ = PT_CLIST;
7060          *code++ = c;          *code++ = c;
7061          if (firstcharflags == REQ_UNSET) firstcharflags = zerofirstcharflags = REQ_NONE;          if (firstcharflags == REQ_UNSET)
7062              firstcharflags = zerofirstcharflags = REQ_NONE;
7063          break;          break;
7064          }          }
7065        }        }
# Line 7147  out the amount of memory needed, as well Line 7148  out the amount of memory needed, as well
7148  value of lengthptr distinguishes the two phases.  value of lengthptr distinguishes the two phases.
7149    
7150  Arguments:  Arguments:
7151    options        option bits, including any changes for this subpattern    options           option bits, including any changes for this subpattern
7152    codeptr        -> the address of the current code pointer    codeptr           -> the address of the current code pointer
7153    ptrptr         -> the address of the current pattern pointer    ptrptr            -> the address of the current pattern pointer
7154    errorcodeptr   -> pointer to error code variable    errorcodeptr      -> pointer to error code variable
7155    lookbehind     TRUE if this is a lookbehind assertion    lookbehind        TRUE if this is a lookbehind assertion
7156    reset_bracount TRUE to reset the count for each branch    reset_bracount    TRUE to reset the count for each branch
7157    skipbytes      skip this many bytes at start (for brackets and OP_COND)    skipbytes         skip this many bytes at start (for brackets and OP_COND)
7158    cond_depth     depth of nesting for conditional subpatterns    cond_depth        depth of nesting for conditional subpatterns
7159    firstcharptr    place to put the first required character    firstcharptr      place to put the first required character
7160    firstcharflagsptr place to put the first character flags, or a negative number    firstcharflagsptr place to put the first character flags, or a negative number
7161    reqcharptr     place to put the last required character    reqcharptr        place to put the last required character
7162    reqcharflagsptr place to put the last required character flags, or a negative number    reqcharflagsptr   place to put the last required character flags, or a negative number
7163    bcptr          pointer to the chain of currently open branches    bcptr             pointer to the chain of currently open branches
7164    cd             points to the data block with tables pointers etc.    cd                points to the data block with tables pointers etc.
7165    lengthptr      NULL during the real compile phase    lengthptr         NULL during the real compile phase
7166                   points to length accumulator during pre-compile phase                      points to length accumulator during pre-compile phase
7167    
7168  Returns:         TRUE on success  Returns:            TRUE on success
7169  */  */
7170    
7171  static BOOL  static BOOL
# Line 7701  return TRUE; Line 7702  return TRUE;
7702  discarded, because they can cause conflicts with actual literals that follow.  discarded, because they can cause conflicts with actual literals that follow.
7703  However, if we end up without a first char setting for an unanchored pattern,  However, if we end up without a first char setting for an unanchored pattern,
7704  it is worth scanning the regex to see if there is an initial asserted first  it is worth scanning the regex to see if there is an initial asserted first
7705  char. If all branches start with the same asserted char, or with a bracket all  char. If all branches start with the same asserted char, or with a
7706  of whose alternatives start with the same asserted char (recurse ad lib), then  non-conditional bracket all of whose alternatives start with the same asserted
7707  we return that char, otherwise -1.  char (recurse ad lib), then we return that char, with the flags set to zero or
7708    REQ_CASELESS; otherwise return zero with REQ_NONE in the flags.
7709    
7710  Arguments:  Arguments:
7711    code       points to start of expression (the bracket)    code       points to start of expression (the bracket)
7712    flags       points to the first char flags, or to REQ_NONE    flags      points to the first char flags, or to REQ_NONE
7713    inassert   TRUE if in an assertion    inassert   TRUE if in an assertion
7714    
7715  Returns:     the fixed first char, or 0 with REQ_NONE in flags  Returns:     the fixed first char, or 0 with REQ_NONE in flags
# Line 7744  do { Line 7746  do {
7746       case OP_ASSERT:       case OP_ASSERT:
7747       case OP_ONCE:       case OP_ONCE:
7748       case OP_ONCE_NC:       case OP_ONCE_NC:
      case OP_COND:  
7749       d = find_firstassertedchar(scode, &dflags, op == OP_ASSERT);       d = find_firstassertedchar(scode, &dflags, op == OP_ASSERT);
7750       if (dflags < 0)       if (dflags < 0)
7751         return 0;         return 0;
# Line 8297  if (cd->hwm > cd->start_workspace) Line 8298  if (cd->hwm > cd->start_workspace)
8298      }      }
8299    }    }
8300    
8301  /* If the workspace had to be expanded, free the new memory. Set the pointer to  /* If the workspace had to be expanded, free the new memory. Set the pointer to
8302  NULL to indicate that forward references have been filled in. */  NULL to indicate that forward references have been filled in. */
8303    
8304  if (cd->workspace_size > COMPILE_WORK_SIZE)  if (cd->workspace_size > COMPILE_WORK_SIZE)
8305    (PUBL(free))((void *)cd->start_workspace);    (PUBL(free))((void *)cd->start_workspace);
8306  cd->start_workspace = NULL;  cd->start_workspace = NULL;
8307    
8308  /* Give an error if there's back reference to a non-existent capturing  /* Give an error if there's back reference to a non-existent capturing
8309  subpattern. */  subpattern. */
# Line 8506  if (code - codestart > length) Line 8507  if (code - codestart > length)
8507    }    }
8508  #endif   /* PCRE_DEBUG */  #endif   /* PCRE_DEBUG */
8509    
8510  /* Check for a pattern than can match an empty string, so that this information  /* Check for a pattern than can match an empty string, so that this information
8511  can be provided to applications. */  can be provided to applications. */
8512    
8513  do  do
# Line 8515  do Line 8516  do
8516      {      {
8517      re->flags |= PCRE_MATCH_EMPTY;      re->flags |= PCRE_MATCH_EMPTY;
8518      break;      break;
8519      }      }
8520    codestart += GET(codestart, 1);    codestart += GET(codestart, 1);
8521    }    }
8522  while (*codestart == OP_ALT);  while (*codestart == OP_ALT);

Legend:
Removed from v.1348  
changed lines
  Added in v.1352

  ViewVC Help
Powered by ViewVC 1.1.5