/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1309 by ph10, Fri Apr 5 15:35:59 2013 UTC revision 1348 by ph10, Fri Jul 5 10:38:37 2013 UTC
# Line 487  static const char error_texts[] = Line 487  static const char error_texts[] =
487    "a numbered reference must not be zero\0"    "a numbered reference must not be zero\0"
488    "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0"    "an argument is not allowed for (*ACCEPT), (*FAIL), or (*COMMIT)\0"
489    /* 60 */    /* 60 */
490    "(*VERB) not recognized\0"    "(*VERB) not recognized or malformed\0"
491    "number is too big\0"    "number is too big\0"
492    "subpattern name expected\0"    "subpattern name expected\0"
493    "digit expected after (?+\0"    "digit expected after (?+\0"
# Line 798  Otherwise further processing may be requ Line 798  Otherwise further processing may be requ
798  #ifndef EBCDIC  /* ASCII/UTF-8 coding */  #ifndef EBCDIC  /* ASCII/UTF-8 coding */
799  /* Not alphanumeric */  /* Not alphanumeric */
800  else if (c < CHAR_0 || c > CHAR_z) {}  else if (c < CHAR_0 || c > CHAR_z) {}
801  else if ((i = escapes[c - CHAR_0]) != 0)  else if ((i = escapes[c - CHAR_0]) != 0)
802    { if (i > 0) c = (pcre_uint32)i; else escape = -i; }    { if (i > 0) c = (pcre_uint32)i; else escape = -i; }
803    
804  #else           /* EBCDIC coding */  #else           /* EBCDIC coding */
# Line 1410  if (ptr[0] == CHAR_LEFT_PARENTHESIS) Line 1410  if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1410    {    {
1411    /* Handle specials such as (*SKIP) or (*UTF8) etc. */    /* Handle specials such as (*SKIP) or (*UTF8) etc. */
1412    
1413    if (ptr[1] == CHAR_ASTERISK)    if (ptr[1] == CHAR_ASTERISK)
1414      {      {
1415      ptr += 2;      ptr += 2;
1416      while (ptr < cd->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;      while (ptr < cd->end_pattern && *ptr != CHAR_RIGHT_PARENTHESIS) ptr++;
1417      }      }
1418    
1419    /* Handle a normal, unnamed capturing parenthesis. */    /* Handle a normal, unnamed capturing parenthesis. */
1420    
# Line 2353  Arguments: Line 2353  Arguments:
2353    endcode     points to where to stop    endcode     points to where to stop
2354    utf         TRUE if in UTF-8 / UTF-16 / UTF-32 mode    utf         TRUE if in UTF-8 / UTF-16 / UTF-32 mode
2355    cd          contains pointers to tables etc.    cd          contains pointers to tables etc.
2356      recurses    chain of recurse_check to catch mutual recursion
2357    
2358  Returns:      TRUE if what is matched could be empty  Returns:      TRUE if what is matched could be empty
2359  */  */
2360    
2361    typedef struct recurse_check {
2362      struct recurse_check *prev;
2363      const pcre_uchar *group;
2364    } recurse_check;
2365    
2366  static BOOL  static BOOL
2367  could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,  could_be_empty_branch(const pcre_uchar *code, const pcre_uchar *endcode,
2368    BOOL utf, compile_data *cd)    BOOL utf, compile_data *cd, recurse_check *recurses)
2369  {  {
2370  register pcre_uchar c;  register pcre_uchar c;
2371    recurse_check this_recurse;
2372    
2373  for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);  for (code = first_significant_code(code + PRIV(OP_lengths)[*code], TRUE);
2374       code < endcode;       code < endcode;
2375       code = first_significant_code(code + PRIV(OP_lengths)[c], TRUE))       code = first_significant_code(code + PRIV(OP_lengths)[c], TRUE))
# Line 2369  for (code = first_significant_code(code Line 2377  for (code = first_significant_code(code
2377    const pcre_uchar *ccode;    const pcre_uchar *ccode;
2378    
2379    c = *code;    c = *code;
2380    
2381    /* Skip over forward assertions; the other assertions are skipped by    /* Skip over forward assertions; the other assertions are skipped by
2382    first_significant_code() with a TRUE final argument. */    first_significant_code() with a TRUE final argument. */
2383    
# Line 2389  for (code = first_significant_code(code Line 2397  for (code = first_significant_code(code
2397    
2398    if (c == OP_RECURSE)    if (c == OP_RECURSE)
2399      {      {
2400      const pcre_uchar *scode;      const pcre_uchar *scode = cd->start_code + GET(code, 1);
2401      BOOL empty_branch;      BOOL empty_branch;
2402    
2403      /* Test for forward reference */      /* Test for forward reference or uncompleted reference. This is disabled
2404        when called to scan a completed pattern by setting cd->start_workspace to
2405      for (scode = cd->start_workspace; scode < cd->hwm; scode += LINK_SIZE)      NULL. */
2406        if ((int)GET(scode, 0) == (int)(code + 1 - cd->start_code)) return TRUE;  
2407        if (cd->start_workspace != NULL)
2408          {
2409          const pcre_uchar *tcode;
2410          for (tcode = cd->start_workspace; tcode < cd->hwm; tcode += LINK_SIZE)
2411            if ((int)GET(tcode, 0) == (int)(code + 1 - cd->start_code)) return TRUE;
2412          if (GET(scode, 1) == 0) return TRUE;    /* Unclosed */
2413          }
2414    
2415        /* If we are scanning a completed pattern, there are no forward references
2416        and all groups are complete. We need to detect whether this is a recursive
2417        call, as otherwise there will be an infinite loop. If it is a recursion,
2418        just skip over it. Simple recursions are easily detected. For mutual
2419        recursions we keep a chain on the stack. */
2420    
2421        else
2422          {
2423          recurse_check *r = recurses;
2424          const pcre_uchar *endgroup = scode;
2425    
2426          do endgroup += GET(endgroup, 1); while (*endgroup == OP_ALT);
2427          if (code >= scode && code <= endgroup) continue;  /* Simple recursion */
2428    
2429          for (r = recurses; r != NULL; r = r->prev)
2430            if (r->group == scode) break;
2431          if (r != NULL) continue;   /* Mutual recursion */
2432          }
2433    
2434      /* Not a forward reference, test for completed backward reference */      /* Completed reference; scan the referenced group, remembering it on the
2435        stack chain to detect mutual recursions. */
2436    
2437      empty_branch = FALSE;      empty_branch = FALSE;
2438      scode = cd->start_code + GET(code, 1);      this_recurse.prev = recurses;
2439      if (GET(scode, 1) == 0) return TRUE;    /* Unclosed */      this_recurse.group = scode;
2440    
     /* Completed backwards reference */  
   
2441      do      do
2442        {        {
2443        if (could_be_empty_branch(scode, endcode, utf, cd))        if (could_be_empty_branch(scode, endcode, utf, cd, &this_recurse))
2444          {          {
2445          empty_branch = TRUE;          empty_branch = TRUE;
2446          break;          break;
# Line 2463  for (code = first_significant_code(code Line 2496  for (code = first_significant_code(code
2496        empty_branch = FALSE;        empty_branch = FALSE;
2497        do        do
2498          {          {
2499          if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd))          if (!empty_branch && could_be_empty_branch(code, endcode, utf, cd, NULL))
2500            empty_branch = TRUE;            empty_branch = TRUE;
2501          code += GET(code, 1);          code += GET(code, 1);
2502          }          }
# Line 2521  for (code = first_significant_code(code Line 2554  for (code = first_significant_code(code
2554    
2555      /* Opcodes that must match a character */      /* Opcodes that must match a character */
2556    
2557        case OP_ANY:
2558        case OP_ALLANY:
2559        case OP_ANYBYTE:
2560    
2561      case OP_PROP:      case OP_PROP:
2562      case OP_NOTPROP:      case OP_NOTPROP:
2563        case OP_ANYNL:
2564    
2565        case OP_NOT_HSPACE:
2566        case OP_HSPACE:
2567        case OP_NOT_VSPACE:
2568        case OP_VSPACE:
2569      case OP_EXTUNI:      case OP_EXTUNI:
2570    
2571      case OP_NOT_DIGIT:      case OP_NOT_DIGIT:
2572      case OP_DIGIT:      case OP_DIGIT:
2573      case OP_NOT_WHITESPACE:      case OP_NOT_WHITESPACE:
2574      case OP_WHITESPACE:      case OP_WHITESPACE:
2575      case OP_NOT_WORDCHAR:      case OP_NOT_WORDCHAR:
2576      case OP_WORDCHAR:      case OP_WORDCHAR:
2577      case OP_ANY:  
     case OP_ALLANY:  
     case OP_ANYBYTE:  
2578      case OP_CHAR:      case OP_CHAR:
2579      case OP_CHARI:      case OP_CHARI:
2580      case OP_NOT:      case OP_NOT:
2581      case OP_NOTI:      case OP_NOTI:
2582    
2583      case OP_PLUS:      case OP_PLUS:
2584        case OP_PLUSI:
2585      case OP_MINPLUS:      case OP_MINPLUS:
2586      case OP_POSPLUS:      case OP_MINPLUSI:
2587      case OP_EXACT:  
2588      case OP_NOTPLUS:      case OP_NOTPLUS:
2589        case OP_NOTPLUSI:
2590      case OP_NOTMINPLUS:      case OP_NOTMINPLUS:
2591        case OP_NOTMINPLUSI:
2592    
2593        case OP_POSPLUS:
2594        case OP_POSPLUSI:
2595      case OP_NOTPOSPLUS:      case OP_NOTPOSPLUS:
2596        case OP_NOTPOSPLUSI:
2597    
2598        case OP_EXACT:
2599        case OP_EXACTI:
2600      case OP_NOTEXACT:      case OP_NOTEXACT:
2601        case OP_NOTEXACTI:
2602    
2603      case OP_TYPEPLUS:      case OP_TYPEPLUS:
2604      case OP_TYPEMINPLUS:      case OP_TYPEMINPLUS:
2605      case OP_TYPEPOSPLUS:      case OP_TYPEPOSPLUS:
2606      case OP_TYPEEXACT:      case OP_TYPEEXACT:
2607    
2608      return FALSE;      return FALSE;
2609    
2610      /* These are going to continue, as they may be empty, but we have to      /* These are going to continue, as they may be empty, but we have to
# Line 2582  for (code = first_significant_code(code Line 2638  for (code = first_significant_code(code
2638      return TRUE;      return TRUE;
2639    
2640      /* In UTF-8 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY, POSQUERY, UPTO,      /* In UTF-8 mode, STAR, MINSTAR, POSSTAR, QUERY, MINQUERY, POSQUERY, UPTO,
2641      MINUPTO, and POSUPTO may be followed by a multibyte character */      MINUPTO, and POSUPTO and their caseless and negative versions may be
2642        followed by a multibyte character. */
2643    
2644  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2645      case OP_STAR:      case OP_STAR:
2646      case OP_STARI:      case OP_STARI:
2647        case OP_NOTSTAR:
2648        case OP_NOTSTARI:
2649    
2650      case OP_MINSTAR:      case OP_MINSTAR:
2651      case OP_MINSTARI:      case OP_MINSTARI:
2652        case OP_NOTMINSTAR:
2653        case OP_NOTMINSTARI:
2654    
2655      case OP_POSSTAR:      case OP_POSSTAR:
2656      case OP_POSSTARI:      case OP_POSSTARI:
2657        case OP_NOTPOSSTAR:
2658        case OP_NOTPOSSTARI:
2659    
2660      case OP_QUERY:      case OP_QUERY:
2661      case OP_QUERYI:      case OP_QUERYI:
2662        case OP_NOTQUERY:
2663        case OP_NOTQUERYI:
2664    
2665      case OP_MINQUERY:      case OP_MINQUERY:
2666      case OP_MINQUERYI:      case OP_MINQUERYI:
2667        case OP_NOTMINQUERY:
2668        case OP_NOTMINQUERYI:
2669    
2670      case OP_POSQUERY:      case OP_POSQUERY:
2671      case OP_POSQUERYI:      case OP_POSQUERYI:
2672        case OP_NOTPOSQUERY:
2673        case OP_NOTPOSQUERYI:
2674    
2675      if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]);      if (utf && HAS_EXTRALEN(code[1])) code += GET_EXTRALEN(code[1]);
2676      break;      break;
2677    
2678      case OP_UPTO:      case OP_UPTO:
2679      case OP_UPTOI:      case OP_UPTOI:
2680        case OP_NOTUPTO:
2681        case OP_NOTUPTOI:
2682    
2683      case OP_MINUPTO:      case OP_MINUPTO:
2684      case OP_MINUPTOI:      case OP_MINUPTOI:
2685        case OP_NOTMINUPTO:
2686        case OP_NOTMINUPTOI:
2687    
2688      case OP_POSUPTO:      case OP_POSUPTO:
2689      case OP_POSUPTOI:      case OP_POSUPTOI:
2690        case OP_NOTPOSUPTO:
2691        case OP_NOTPOSUPTOI:
2692    
2693      if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]);      if (utf && HAS_EXTRALEN(code[1 + IMM2_SIZE])) code += GET_EXTRALEN(code[1 + IMM2_SIZE]);
2694      break;      break;
2695  #endif  #endif
# Line 2659  could_be_empty(const pcre_uchar *code, c Line 2743  could_be_empty(const pcre_uchar *code, c
2743  {  {
2744  while (bcptr != NULL && bcptr->current_branch >= code)  while (bcptr != NULL && bcptr->current_branch >= code)
2745    {    {
2746    if (!could_be_empty_branch(bcptr->current_branch, endcode, utf, cd))    if (!could_be_empty_branch(bcptr->current_branch, endcode, utf, cd, NULL))
2747      return FALSE;      return FALSE;
2748    bcptr = bcptr->outer;    bcptr = bcptr->outer;
2749    }    }
# Line 3091  value is a character, a negative value i Line 3175  value is a character, a negative value i
3175  if (*ptr == CHAR_BACKSLASH)  if (*ptr == CHAR_BACKSLASH)
3176    {    {
3177    int temperrorcode = 0;    int temperrorcode = 0;
3178    escape = check_escape(&ptr, &next, &temperrorcode, cd->bracount, options,    escape = check_escape(&ptr, &next, &temperrorcode, cd->bracount, options,
3179      FALSE);      FALSE);
3180    if (temperrorcode != 0) return FALSE;    if (temperrorcode != 0) return FALSE;
3181    ptr++;    /* Point after the escape sequence */    ptr++;    /* Point after the escape sequence */
# Line 4275  for (;; ptr++) Line 4359  for (;; ptr++)
4359    
4360        if (c == CHAR_BACKSLASH)        if (c == CHAR_BACKSLASH)
4361          {          {
4362          escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options,          escape = check_escape(&ptr, &ec, errorcodeptr, cd->bracount, options,
4363            TRUE);            TRUE);
4364          if (*errorcodeptr != 0) goto FAILED;          if (*errorcodeptr != 0) goto FAILED;
4365          if (escape == 0) c = ec;          if (escape == 0) c = ec;
# Line 5391  for (;; ptr++) Line 5475  for (;; ptr++)
5475              pcre_uchar *scode = bracode;              pcre_uchar *scode = bracode;
5476              do              do
5477                {                {
5478                if (could_be_empty_branch(scode, ketcode, utf, cd))                if (could_be_empty_branch(scode, ketcode, utf, cd, NULL))
5479                  {                  {
5480                  *bracode += OP_SBRA - OP_BRA;                  *bracode += OP_SBRA - OP_BRA;
5481                  break;                  break;
# Line 5725  for (;; ptr++) Line 5809  for (;; ptr++)
5809          /* ------------------------------------------------------------ */          /* ------------------------------------------------------------ */
5810          case CHAR_LEFT_PARENTHESIS:          case CHAR_LEFT_PARENTHESIS:
5811          bravalue = OP_COND;       /* Conditional group */          bravalue = OP_COND;       /* Conditional group */
5812          tempptr = ptr;          tempptr = ptr;
5813    
5814          /* A condition can be an assertion, a number (referring to a numbered          /* A condition can be an assertion, a number (referring to a numbered
5815          group), a name (referring to a named group), or 'R', referring to          group), a name (referring to a named group), or 'R', referring to
# Line 5739  for (;; ptr++) Line 5823  for (;; ptr++)
5823          by digits), and (b) a number could be a name that consists of digits.          by digits), and (b) a number could be a name that consists of digits.
5824          In both cases, we look for a name first; if not found, we try the other          In both cases, we look for a name first; if not found, we try the other
5825          cases.          cases.
5826    
5827          For compatibility with auto-callouts, we allow a callout to be          For compatibility with auto-callouts, we allow a callout to be
5828          specified before a condition that is an assertion. First, check for the          specified before a condition that is an assertion. First, check for the
5829          syntax of a callout; if found, adjust the temporary pointer that is          syntax of a callout; if found, adjust the temporary pointer that is
5830          used to check for an assertion condition. That's all that is needed! */          used to check for an assertion condition. That's all that is needed! */
5831    
5832          if (ptr[1] == CHAR_QUESTION_MARK && ptr[2] == CHAR_C)          if (ptr[1] == CHAR_QUESTION_MARK && ptr[2] == CHAR_C)
5833            {            {
5834            for (i = 3;; i++) if (!IS_DIGIT(ptr[i])) break;            for (i = 3;; i++) if (!IS_DIGIT(ptr[i])) break;
5835            if (ptr[i] == CHAR_RIGHT_PARENTHESIS)            if (ptr[i] == CHAR_RIGHT_PARENTHESIS)
5836              tempptr += i + 1;              tempptr += i + 1;
5837            }            }
5838    
5839          /* For conditions that are assertions, check the syntax, and then exit          /* For conditions that are assertions, check the syntax, and then exit
5840          the switch. This will take control down to where bracketed groups,          the switch. This will take control down to where bracketed groups,
5841          including assertions, are processed. */          including assertions, are processed. */
5842    
5843          if (tempptr[1] == CHAR_QUESTION_MARK &&          if (tempptr[1] == CHAR_QUESTION_MARK &&
5844                (tempptr[2] == CHAR_EQUALS_SIGN ||                (tempptr[2] == CHAR_EQUALS_SIGN ||
5845                 tempptr[2] == CHAR_EXCLAMATION_MARK ||                 tempptr[2] == CHAR_EXCLAMATION_MARK ||
5846                 tempptr[2] == CHAR_LESS_THAN_SIGN))                 tempptr[2] == CHAR_LESS_THAN_SIGN))
5847            break;            break;
5848    
# Line 6901  for (;; ptr++) Line 6985  for (;; ptr++)
6985    
6986        else        else
6987          {          {
6988          if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) &&          if ((escape == ESC_b || escape == ESC_B || escape == ESC_A) &&
6989               cd->max_lookbehind == 0)               cd->max_lookbehind == 0)
6990            cd->max_lookbehind = 1;            cd->max_lookbehind = 1;
6991  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 7766  pcre32_compile2(PCRE_SPTR32 pattern, int Line 7850  pcre32_compile2(PCRE_SPTR32 pattern, int
7850  {  {
7851  REAL_PCRE *re;  REAL_PCRE *re;
7852  int length = 1;  /* For final END opcode */  int length = 1;  /* For final END opcode */
 pcre_uint32 firstchar, reqchar;  
7853  pcre_int32 firstcharflags, reqcharflags;  pcre_int32 firstcharflags, reqcharflags;
7854    pcre_uint32 firstchar, reqchar;
7855    pcre_uint32 limit_match = PCRE_UINT32_MAX;
7856    pcre_uint32 limit_recursion = PCRE_UINT32_MAX;
7857  int newline;  int newline;
7858  int errorcode = 0;  int errorcode = 0;
7859  int skipatstart = 0;  int skipatstart = 0;
# Line 7831  if ((options & ~PUBLIC_COMPILE_OPTIONS) Line 7917  if ((options & ~PUBLIC_COMPILE_OPTIONS)
7917    errorcode = ERR17;    errorcode = ERR17;
7918    goto PCRE_EARLY_ERROR_RETURN;    goto PCRE_EARLY_ERROR_RETURN;
7919    }    }
   
 /* If PCRE_NEVER_UTF is set, remember it. As this option steals a bit that is  
 also used for execution options, flatten it just in case. */  
7920    
7921  if ((options & PCRE_NEVER_UTF) != 0)  /* If PCRE_NEVER_UTF is set, remember it. */
7922    {  
7923    never_utf = TRUE;  if ((options & PCRE_NEVER_UTF) != 0) never_utf = TRUE;
   options &= ~PCRE_NEVER_UTF;  
   }  
7924    
7925  /* Check for global one-time settings at the start of the pattern, and remember  /* Check for global one-time settings at the start of the pattern, and remember
7926  the offset for later. */  the offset for later. */
7927    
7928    cd->external_flags = 0;   /* Initialize here for LIMIT_MATCH/RECURSION */
7929    
7930  while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&  while (ptr[skipatstart] == CHAR_LEFT_PARENTHESIS &&
7931         ptr[skipatstart+1] == CHAR_ASTERISK)         ptr[skipatstart+1] == CHAR_ASTERISK)
7932    {    {
# Line 7874  PCRE_UTF8 == PCRE_UTF16 == PCRE_UTF32. * Line 7957  PCRE_UTF8 == PCRE_UTF16 == PCRE_UTF32. *
7957    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_START_OPT_RIGHTPAR, 13) == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_NO_START_OPT_RIGHTPAR, 13) == 0)
7958      { skipatstart += 15; options |= PCRE_NO_START_OPTIMIZE; continue; }      { skipatstart += 15; options |= PCRE_NO_START_OPTIMIZE; continue; }
7959    
7960      else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LIMIT_MATCH_EQ, 12) == 0)
7961        {
7962        pcre_uint32 c = 0;
7963        int p = skipatstart + 14;
7964        while (isdigit(ptr[p]))
7965          {
7966          if (c > PCRE_UINT32_MAX / 10 - 1) break;   /* Integer overflow */
7967          c = c*10 + ptr[p++] - CHAR_0;
7968          }
7969        if (ptr[p++] != CHAR_RIGHT_PARENTHESIS) break;
7970        if (c < limit_match)
7971          {
7972          limit_match = c;
7973          cd->external_flags |= PCRE_MLSET;
7974          }
7975        skipatstart = p;
7976        continue;
7977        }
7978    
7979      else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LIMIT_RECURSION_EQ, 16) == 0)
7980        {
7981        pcre_uint32 c = 0;
7982        int p = skipatstart + 18;
7983        while (isdigit(ptr[p]))
7984          {
7985          if (c > PCRE_UINT32_MAX / 10 - 1) break;   /* Integer overflow check */
7986          c = c*10 + ptr[p++] - CHAR_0;
7987          }
7988        if (ptr[p++] != CHAR_RIGHT_PARENTHESIS) break;
7989        if (c < limit_recursion)
7990          {
7991          limit_recursion = c;
7992          cd->external_flags |= PCRE_RLSET;
7993          }
7994        skipatstart = p;
7995        continue;
7996        }
7997    
7998    if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_CR_RIGHTPAR, 3) == 0)    if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_CR_RIGHTPAR, 3) == 0)
7999      { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }      { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }
8000    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LF_RIGHTPAR, 3)  == 0)    else if (STRNCMP_UC_C8(ptr+skipatstart+2, STRING_LF_RIGHTPAR, 3)  == 0)
# Line 7896  PCRE_UTF8 == PCRE_UTF16 == PCRE_UTF32. * Line 8017  PCRE_UTF8 == PCRE_UTF16 == PCRE_UTF32. *
8017      options = (options & ~(PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) | newbsr;      options = (options & ~(PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) | newbsr;
8018    else break;    else break;
8019    }    }
8020    
8021  /* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */  /* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
8022  utf = (options & PCRE_UTF8) != 0;  utf = (options & PCRE_UTF8) != 0;
8023  if (utf && never_utf)  if (utf && never_utf)
8024    {    {
8025    errorcode = ERR78;    errorcode = ERR78;
8026    goto PCRE_EARLY_ERROR_RETURN2;    goto PCRE_EARLY_ERROR_RETURN2;
8027    }    }
8028    
8029  /* Can't support UTF unless PCRE has been compiled to include the code. The  /* Can't support UTF unless PCRE has been compiled to include the code. The
8030  return of an error code from PRIV(valid_utf)() is a new feature, introduced in  return of an error code from PRIV(valid_utf)() is a new feature, introduced in
# Line 8026  cd->req_varyopt = 0; Line 8147  cd->req_varyopt = 0;
8147  cd->assert_depth = 0;  cd->assert_depth = 0;
8148  cd->max_lookbehind = 0;  cd->max_lookbehind = 0;
8149  cd->external_options = options;  cd->external_options = options;
 cd->external_flags = 0;  
8150  cd->open_caps = NULL;  cd->open_caps = NULL;
8151    
8152  /* Now do the pre-compile. On error, errorcode will be set non-zero, so we  /* Now do the pre-compile. On error, errorcode will be set non-zero, so we
# Line 8076  re->magic_number = MAGIC_NUMBER; Line 8196  re->magic_number = MAGIC_NUMBER;
8196  re->size = (int)size;  re->size = (int)size;
8197  re->options = cd->external_options;  re->options = cd->external_options;
8198  re->flags = cd->external_flags;  re->flags = cd->external_flags;
8199    re->limit_match = limit_match;
8200    re->limit_recursion = limit_recursion;
8201  re->first_char = 0;  re->first_char = 0;
8202  re->req_char = 0;  re->req_char = 0;
8203  re->name_table_offset = sizeof(REAL_PCRE) / sizeof(pcre_uchar);  re->name_table_offset = sizeof(REAL_PCRE) / sizeof(pcre_uchar);
# Line 8085  re->ref_count = 0; Line 8207  re->ref_count = 0;
8207  re->tables = (tables == PRIV(default_tables))? NULL : tables;  re->tables = (tables == PRIV(default_tables))? NULL : tables;
8208  re->nullpad = NULL;  re->nullpad = NULL;
8209  #ifdef COMPILE_PCRE32  #ifdef COMPILE_PCRE32
8210  re->dummy1 = re->dummy2 = 0;  re->dummy = 0;
8211    #else
8212    re->dummy1 = re->dummy2 = re->dummy3 = 0;
8213  #endif  #endif
8214    
8215  /* The starting points of the name/number translation table and of the code are  /* The starting points of the name/number translation table and of the code are
# Line 8145  if (code - codestart > length) errorcode Line 8269  if (code - codestart > length) errorcode
8269    
8270  #ifdef SUPPORT_VALGRIND  #ifdef SUPPORT_VALGRIND
8271  /* If the estimated length exceeds the really used length, mark the extra  /* If the estimated length exceeds the really used length, mark the extra
8272  allocated memory as unadressable, so that any out-of-bound reads can be  allocated memory as unaddressable, so that any out-of-bound reads can be
8273  detected. */  detected. */
8274  VALGRIND_MAKE_MEM_NOACCESS(code, (length - (code - codestart)) * sizeof(pcre_uchar));  VALGRIND_MAKE_MEM_NOACCESS(code, (length - (code - codestart)) * sizeof(pcre_uchar));
8275  #endif  #endif
# Line 8173  if (cd->hwm > cd->start_workspace) Line 8297  if (cd->hwm > cd->start_workspace)
8297      }      }
8298    }    }
8299    
8300  /* If the workspace had to be expanded, free the new memory. */  /* If the workspace had to be expanded, free the new memory. Set the pointer to
8301    NULL to indicate that forward references have been filled in. */
8302    
8303  if (cd->workspace_size > COMPILE_WORK_SIZE)  if (cd->workspace_size > COMPILE_WORK_SIZE)
8304    (PUBL(free))((void *)cd->start_workspace);    (PUBL(free))((void *)cd->start_workspace);
8305    cd->start_workspace = NULL;
8306    
8307  /* Give an error if there's back reference to a non-existent capturing  /* Give an error if there's back reference to a non-existent capturing
8308  subpattern. */  subpattern. */
# Line 8380  if (code - codestart > length) Line 8506  if (code - codestart > length)
8506    }    }
8507  #endif   /* PCRE_DEBUG */  #endif   /* PCRE_DEBUG */
8508    
8509    /* Check for a pattern than can match an empty string, so that this information
8510    can be provided to applications. */
8511    
8512    do
8513      {
8514      if (could_be_empty_branch(codestart, code, utf, cd, NULL))
8515        {
8516        re->flags |= PCRE_MATCH_EMPTY;
8517        break;
8518        }
8519      codestart += GET(codestart, 1);
8520      }
8521    while (*codestart == OP_ALT);
8522    
8523  #if defined COMPILE_PCRE8  #if defined COMPILE_PCRE8
8524  return (pcre *)re;  return (pcre *)re;
8525  #elif defined COMPILE_PCRE16  #elif defined COMPILE_PCRE16

Legend:
Removed from v.1309  
changed lines
  Added in v.1348

  ViewVC Help
Powered by ViewVC 1.1.5