/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 533 by ph10, Wed Jun 2 19:02:41 2010 UTC revision 545 by ph10, Wed Jun 16 10:51:15 2010 UTC
# Line 261  static const int posix_class_maps[] = { Line 261  static const int posix_class_maps[] = {
261    cbit_xdigit,-1,          0              /* xdigit */    cbit_xdigit,-1,          0              /* xdigit */
262  };  };
263    
264  /* Table of substitutes for \d etc when PCRE_UCP is set. The POSIX class  /* Table of substitutes for \d etc when PCRE_UCP is set. The POSIX class
265  substitutes must be in the order of the names, defined above, and there are  substitutes must be in the order of the names, defined above, and there are
266  both positive and negative cases. NULL means no substitute. */  both positive and negative cases. NULL means no substitute. */
267    
268  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
# Line 272  static const uschar *substitutes[] = { Line 272  static const uschar *substitutes[] = {
272    (uschar *)"\\P{Xsp}",   /* \S */       /* NOTE: Xsp is Perl space */    (uschar *)"\\P{Xsp}",   /* \S */       /* NOTE: Xsp is Perl space */
273    (uschar *)"\\p{Xsp}",   /* \s */    (uschar *)"\\p{Xsp}",   /* \s */
274    (uschar *)"\\P{Xwd}",   /* \W */    (uschar *)"\\P{Xwd}",   /* \W */
275    (uschar *)"\\p{Xwd}"    /* \w */    (uschar *)"\\p{Xwd}"    /* \w */
276  };  };
277    
278  static const uschar *posix_substitutes[] = {  static const uschar *posix_substitutes[] = {
279    (uschar *)"\\p{L}",     /* alpha */    (uschar *)"\\p{L}",     /* alpha */
280    (uschar *)"\\p{Ll}",    /* lower */    (uschar *)"\\p{Ll}",    /* lower */
281    (uschar *)"\\p{Lu}",    /* upper */    (uschar *)"\\p{Lu}",    /* upper */
282    (uschar *)"\\p{Xan}",   /* alnum */    (uschar *)"\\p{Xan}",   /* alnum */
283    NULL,                   /* ascii */    NULL,                   /* ascii */
284    (uschar *)"\\h",        /* blank */    (uschar *)"\\h",        /* blank */
285    NULL,                   /* cntrl */    NULL,                   /* cntrl */
# Line 289  static const uschar *posix_substitutes[] Line 289  static const uschar *posix_substitutes[]
289    NULL,                   /* punct */    NULL,                   /* punct */
290    (uschar *)"\\p{Xps}",   /* space */    /* NOTE: Xps is POSIX space */    (uschar *)"\\p{Xps}",   /* space */    /* NOTE: Xps is POSIX space */
291    (uschar *)"\\p{Xwd}",   /* word */    (uschar *)"\\p{Xwd}",   /* word */
292    NULL,                   /* xdigit */    NULL,                   /* xdigit */
293    /* Negated cases */    /* Negated cases */
294    (uschar *)"\\P{L}",     /* ^alpha */    (uschar *)"\\P{L}",     /* ^alpha */
295    (uschar *)"\\P{Ll}",    /* ^lower */    (uschar *)"\\P{Ll}",    /* ^lower */
296    (uschar *)"\\P{Lu}",    /* ^upper */    (uschar *)"\\P{Lu}",    /* ^upper */
297    (uschar *)"\\P{Xan}",   /* ^alnum */    (uschar *)"\\P{Xan}",   /* ^alnum */
298    NULL,                   /* ^ascii */    NULL,                   /* ^ascii */
299    (uschar *)"\\H",        /* ^blank */    (uschar *)"\\H",        /* ^blank */
300    NULL,                   /* ^cntrl */    NULL,                   /* ^cntrl */
# Line 304  static const uschar *posix_substitutes[] Line 304  static const uschar *posix_substitutes[]
304    NULL,                   /* ^punct */    NULL,                   /* ^punct */
305    (uschar *)"\\P{Xps}",   /* ^space */   /* NOTE: Xps is POSIX space */    (uschar *)"\\P{Xps}",   /* ^space */   /* NOTE: Xps is POSIX space */
306    (uschar *)"\\P{Xwd}",   /* ^word */    (uschar *)"\\P{Xwd}",   /* ^word */
307    NULL                    /* ^xdigit */    NULL                    /* ^xdigit */
308  };  };
309  #define POSIX_SUBSIZE (sizeof(posix_substitutes)/sizeof(uschar *))  #define POSIX_SUBSIZE (sizeof(posix_substitutes)/sizeof(uschar *))
310  #endif  #endif
311    
312  #define STRING(a)  # a  #define STRING(a)  # a
313  #define XSTRING(s) STRING(s)  #define XSTRING(s) STRING(s)
# Line 407  static const char error_texts[] = Line 407  static const char error_texts[] =
407    /* 65 */    /* 65 */
408    "different names for subpatterns of the same number are not allowed\0"    "different names for subpatterns of the same number are not allowed\0"
409    "(*MARK) must have an argument\0"    "(*MARK) must have an argument\0"
410    "this version of PCRE is not compiled with PCRE_UCP support\0"    "this version of PCRE is not compiled with PCRE_UCP support\0"
411    ;    ;
412    
413  /* Table to identify digits and hex digits. This is used when compiling  /* Table to identify digits and hex digits. This is used when compiling
# Line 1129  dealing with. The very first call may no Line 1129  dealing with. The very first call may no
1129    
1130  if (ptr[0] == CHAR_LEFT_PARENTHESIS)  if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1131    {    {
1132    if (ptr[1] == CHAR_QUESTION_MARK &&    /* Handle specials such as (*SKIP) or (*UTF8) etc. */
1133        ptr[2] == CHAR_VERTICAL_LINE)  
1134      if (ptr[1] == CHAR_ASTERISK) ptr += 2;
1135    
1136      /* Handle a normal, unnamed capturing parenthesis. */
1137    
1138      else if (ptr[1] != CHAR_QUESTION_MARK)
1139        {
1140        *count += 1;
1141        if (name == NULL && *count == lorn) return *count;
1142        ptr++;
1143        }
1144    
1145      /* All cases now have (? at the start. Remember when we are in a group
1146      where the parenthesis numbers are duplicated. */
1147    
1148      else if (ptr[2] == CHAR_VERTICAL_LINE)
1149      {      {
1150      ptr += 3;      ptr += 3;
1151      dup_parens = TRUE;      dup_parens = TRUE;
1152      }      }
1153    
1154    /* Handle a normal, unnamed capturing parenthesis */    /* Handle comments; all characters are allowed until a ket is reached. */
1155    
1156    else if (ptr[1] != CHAR_QUESTION_MARK && ptr[1] != CHAR_ASTERISK)    else if (ptr[2] == CHAR_NUMBER_SIGN)
1157      {      {
1158      *count += 1;      for (ptr += 3; *ptr != 0; ptr++) if (*ptr == CHAR_RIGHT_PARENTHESIS) break;
1159      if (name == NULL && *count == lorn) return *count;      goto FAIL_EXIT;
     ptr++;  
1160      }      }
1161    
1162    /* Handle a condition. If it is an assertion, just carry on so that it    /* Handle a condition. If it is an assertion, just carry on so that it
1163    is processed as normal. If not, skip to the closing parenthesis of the    is processed as normal. If not, skip to the closing parenthesis of the
1164    condition (there can't be any nested parens. */    condition (there can't be any nested parens). */
1165    
1166    else if (ptr[2] == CHAR_LEFT_PARENTHESIS)    else if (ptr[2] == CHAR_LEFT_PARENTHESIS)
1167      {      {
# Line 1159  if (ptr[0] == CHAR_LEFT_PARENTHESIS) Line 1173  if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1173        }        }
1174      }      }
1175    
1176    /* We have either (? or (* and not a condition */    /* Start with (? but not a condition. */
1177    
1178    else    else
1179      {      {
# Line 1281  for (; *ptr != 0; ptr++) Line 1295  for (; *ptr != 0; ptr++)
1295    else if (*ptr == CHAR_RIGHT_PARENTHESIS)    else if (*ptr == CHAR_RIGHT_PARENTHESIS)
1296      {      {
1297      if (dup_parens && *count < hwm_count) *count = hwm_count;      if (dup_parens && *count < hwm_count) *count = hwm_count;
1298      *ptrptr = ptr;      goto FAIL_EXIT;
     return -1;  
1299      }      }
1300    
1301    else if (*ptr == CHAR_VERTICAL_LINE && dup_parens)    else if (*ptr == CHAR_VERTICAL_LINE && dup_parens)
# Line 2407  Arguments: Line 2420  Arguments:
2420    ptype        the property type    ptype        the property type
2421    pdata        the data for the type    pdata        the data for the type
2422    negated      TRUE if it's a negated property (\P or \p{^)    negated      TRUE if it's a negated property (\P or \p{^)
2423    
2424  Returns:       TRUE if auto-possessifying is OK  Returns:       TRUE if auto-possessifying is OK
2425  */  */
2426    
2427  static BOOL  static BOOL
2428  check_char_prop(int c, int ptype, int pdata, BOOL negated)  check_char_prop(int c, int ptype, int pdata, BOOL negated)
# Line 2453  switch(ptype) Line 2466  switch(ptype)
2466            _pcre_ucp_gentype[prop->chartype] == ucp_N ||            _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2467            c == CHAR_UNDERSCORE) == negated;            c == CHAR_UNDERSCORE) == negated;
2468    }    }
2469  return FALSE;  return FALSE;
2470  }  }
2471  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2472    
# Line 2478  Returns:        TRUE if possessifying is Line 2491  Returns:        TRUE if possessifying is
2491  */  */
2492    
2493  static BOOL  static BOOL
2494  check_auto_possessive(const uschar *previous, BOOL utf8, const uschar *ptr,  check_auto_possessive(const uschar *previous, BOOL utf8, const uschar *ptr,
2495    int options, compile_data *cd)    int options, compile_data *cd)
2496  {  {
2497  int c, next;  int c, next;
# Line 2549  the next item is a character. */ Line 2562  the next item is a character. */
2562  if (next >= 0) switch(op_code)  if (next >= 0) switch(op_code)
2563    {    {
2564    case OP_CHAR:    case OP_CHAR:
2565  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2566    GETCHARTEST(c, previous);    GETCHARTEST(c, previous);
2567  #else  #else
2568    c = *previous;    c = *previous;
2569  #endif  #endif
2570    return c != next;    return c != next;
2571    
2572    /* For CHARNC (caseless character) we must check the other case. If we have    /* For CHARNC (caseless character) we must check the other case. If we have
2573    Unicode property support, we can use it to test the other case of    Unicode property support, we can use it to test the other case of
2574    high-valued characters. */    high-valued characters. */
2575    
2576    case OP_CHARNC:    case OP_CHARNC:
2577  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2578    GETCHARTEST(c, previous);    GETCHARTEST(c, previous);
2579  #else  #else
2580    c = *previous;    c = *previous;
2581  #endif  #endif
2582    if (c == next) return FALSE;    if (c == next) return FALSE;
2583  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2584    if (utf8)    if (utf8)
# Line 2603  if (next >= 0) switch(op_code) Line 2616  if (next >= 0) switch(op_code)
2616    else    else
2617  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
2618    return (c == cd->fcc[next]);  /* Non-UTF-8 mode */    return (c == cd->fcc[next]);  /* Non-UTF-8 mode */
2619    
2620    /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set.    /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set.
2621    When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */    When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
2622    
2623    case OP_DIGIT:    case OP_DIGIT:
2624    return next > 127 || (cd->ctypes[next] & ctype_digit) == 0;    return next > 127 || (cd->ctypes[next] & ctype_digit) == 0;
# Line 2673  if (next >= 0) switch(op_code) Line 2686  if (next >= 0) switch(op_code)
2686  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2687    case OP_PROP:    case OP_PROP:
2688    return check_char_prop(next, previous[0], previous[1], FALSE);    return check_char_prop(next, previous[0], previous[1], FALSE);
2689    
2690    case OP_NOTPROP:    case OP_NOTPROP:
2691    return check_char_prop(next, previous[0], previous[1], TRUE);    return check_char_prop(next, previous[0], previous[1], TRUE);
2692  #endif  #endif
# Line 2683  if (next >= 0) switch(op_code) Line 2696  if (next >= 0) switch(op_code)
2696    }    }
2697    
2698    
2699  /* Handle the case when the next item is \d, \s, etc. Note that when PCRE_UCP  /* Handle the case when the next item is \d, \s, etc. Note that when PCRE_UCP
2700  is set, \d turns into ESC_du rather than ESC_d, etc., so ESC_d etc. are  is set, \d turns into ESC_du rather than ESC_d, etc., so ESC_d etc. are
2701  generated only when PCRE_UCP is *not* set, that is, when only ASCII  generated only when PCRE_UCP is *not* set, that is, when only ASCII
2702  characteristics are recognized. Similarly, the opcodes OP_DIGIT etc. are  characteristics are recognized. Similarly, the opcodes OP_DIGIT etc. are
2703  replaced by OP_PROP codes when PCRE_UCP is set. */  replaced by OP_PROP codes when PCRE_UCP is set. */
2704    
2705  switch(op_code)  switch(op_code)
2706    {    {
2707    case OP_CHAR:    case OP_CHAR:
2708    case OP_CHARNC:    case OP_CHARNC:
2709  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2710    GETCHARTEST(c, previous);    GETCHARTEST(c, previous);
2711  #else  #else
2712    c = *previous;    c = *previous;
2713  #endif  #endif
2714    switch(-next)    switch(-next)
2715      {      {
2716      case ESC_d:      case ESC_d:
# Line 2761  switch(op_code) Line 2774  switch(op_code)
2774        default:        default:
2775        return -next == ESC_v;        return -next == ESC_v;
2776        }        }
2777    
2778      /* When PCRE_UCP is set, these values get generated for \d etc. Find      /* When PCRE_UCP is set, these values get generated for \d etc. Find
2779      their substitutions and process them. The result will always be either      their substitutions and process them. The result will always be either
2780      -ESC_p or -ESC_P. Then fall through to process those values. */      -ESC_p or -ESC_P. Then fall through to process those values. */
2781    
2782  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2783      case ESC_du:      case ESC_du:
2784      case ESC_DU:      case ESC_DU:
# Line 2780  switch(op_code) Line 2793  switch(op_code)
2793        if (temperrorcode != 0) return FALSE;        if (temperrorcode != 0) return FALSE;
2794        ptr++;    /* For compatibility */        ptr++;    /* For compatibility */
2795        }        }
2796      /* Fall through */      /* Fall through */
2797    
2798      case ESC_p:      case ESC_p:
2799      case ESC_P:      case ESC_P:
2800        {        {
2801        int ptype, pdata, errorcodeptr;        int ptype, pdata, errorcodeptr;
2802        BOOL negated;        BOOL negated;
2803    
2804        ptr--;      /* Make ptr point at the p or P */        ptr--;      /* Make ptr point at the p or P */
2805        ptype = get_ucp(&ptr, &negated, &pdata, &errorcodeptr);        ptype = get_ucp(&ptr, &negated, &pdata, &errorcodeptr);
2806        if (ptype < 0) return FALSE;        if (ptype < 0) return FALSE;
2807        ptr++;      /* Point past the final curly ket */        ptr++;      /* Point past the final curly ket */
2808    
2809        /* If the property item is optional, we have to give up. (When generated        /* If the property item is optional, we have to give up. (When generated
2810        from \d etc by PCRE_UCP, this test will have been applied much earlier,        from \d etc by PCRE_UCP, this test will have been applied much earlier,
2811        to the original \d etc. At this point, ptr will point to a zero byte. */        to the original \d etc. At this point, ptr will point to a zero byte. */
2812    
2813        if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK ||        if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK ||
2814          strncmp((char *)ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)          strncmp((char *)ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)
2815            return FALSE;            return FALSE;
2816    
2817        /* Do the property check. */        /* Do the property check. */
2818    
2819        return check_char_prop(c, ptype, pdata, (next == -ESC_P) != negated);        return check_char_prop(c, ptype, pdata, (next == -ESC_P) != negated);
2820        }        }
2821  #endif  #endif
2822    
2823      default:      default:
2824      return FALSE;      return FALSE;
2825      }      }
2826    
2827    /* In principle, support for Unicode properties should be integrated here as    /* In principle, support for Unicode properties should be integrated here as
2828    well. It means re-organizing the above code so as to get hold of the property    well. It means re-organizing the above code so as to get hold of the property
2829    values before switching on the op-code. However, I wonder how many patterns    values before switching on the op-code. However, I wonder how many patterns
2830    combine ASCII \d etc with Unicode properties? (Note that if PCRE_UCP is set,    combine ASCII \d etc with Unicode properties? (Note that if PCRE_UCP is set,
2831    these op-codes are never generated.) */    these op-codes are never generated.) */
2832    
2833    case OP_DIGIT:    case OP_DIGIT:
2834    return next == -ESC_D || next == -ESC_s || next == -ESC_W ||    return next == -ESC_D || next == -ESC_s || next == -ESC_W ||
# Line 2831  switch(op_code) Line 2844  switch(op_code)
2844    return next == -ESC_s || next == -ESC_h || next == -ESC_v;    return next == -ESC_s || next == -ESC_h || next == -ESC_v;
2845    
2846    case OP_HSPACE:    case OP_HSPACE:
2847    return next == -ESC_S || next == -ESC_H || next == -ESC_d ||    return next == -ESC_S || next == -ESC_H || next == -ESC_d ||
2848           next == -ESC_w || next == -ESC_v || next == -ESC_R;           next == -ESC_w || next == -ESC_v || next == -ESC_R;
2849    
2850    case OP_NOT_HSPACE:    case OP_NOT_HSPACE:
2851    return next == -ESC_h;    return next == -ESC_h;
2852    
2853    /* Can't have \S in here because VT matches \S (Perl anomaly) */    /* Can't have \S in here because VT matches \S (Perl anomaly) */
2854    case OP_ANYNL:    case OP_ANYNL:
2855    case OP_VSPACE:    case OP_VSPACE:
2856    return next == -ESC_V || next == -ESC_d || next == -ESC_w;    return next == -ESC_V || next == -ESC_d || next == -ESC_w;
2857    
# Line 2846  switch(op_code) Line 2859  switch(op_code)
2859    return next == -ESC_v || next == -ESC_R;    return next == -ESC_v || next == -ESC_R;
2860    
2861    case OP_WORDCHAR:    case OP_WORDCHAR:
2862    return next == -ESC_W || next == -ESC_s || next == -ESC_h ||    return next == -ESC_W || next == -ESC_s || next == -ESC_h ||
2863           next == -ESC_v || next == -ESC_R;           next == -ESC_v || next == -ESC_R;
2864    
2865    case OP_NOT_WORDCHAR:    case OP_NOT_WORDCHAR:
# Line 2982  for (;; ptr++) Line 2995  for (;; ptr++)
2995    
2996    c = *ptr;    c = *ptr;
2997    
2998    /* If we are at the end of a nested substitution, revert to the outer level    /* If we are at the end of a nested substitution, revert to the outer level
2999    string. Nesting only happens one level deep. */    string. Nesting only happens one level deep. */
3000    
3001    if (c == 0 && nestptr != NULL)    if (c == 0 && nestptr != NULL)
# Line 3289  for (;; ptr++) Line 3302  for (;; ptr++)
3302          {                           /* Braces are required because the */          {                           /* Braces are required because the */
3303          GETCHARLEN(c, ptr, ptr);    /* macro generates multiple statements */          GETCHARLEN(c, ptr, ptr);    /* macro generates multiple statements */
3304          }          }
3305    
3306        /* In the pre-compile phase, accumulate the length of any UTF-8 extra        /* In the pre-compile phase, accumulate the length of any UTF-8 extra
3307        data and reset the pointer. This is so that very large classes that        data and reset the pointer. This is so that very large classes that
3308        contain a zillion UTF-8 characters no longer overwrite the work space        contain a zillion UTF-8 characters no longer overwrite the work space
# Line 3358  for (;; ptr++) Line 3371  for (;; ptr++)
3371    
3372          if ((options & PCRE_CASELESS) != 0 && posix_class <= 2)          if ((options & PCRE_CASELESS) != 0 && posix_class <= 2)
3373            posix_class = 0;            posix_class = 0;
3374    
3375          /* When PCRE_UCP is set, some of the POSIX classes are converted to          /* When PCRE_UCP is set, some of the POSIX classes are converted to
3376          different escape sequences that use Unicode properties. */          different escape sequences that use Unicode properties. */
3377    
3378  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3379          if ((options & PCRE_UCP) != 0)          if ((options & PCRE_UCP) != 0)
3380            {            {
3381            int pc = posix_class + ((local_negate)? POSIX_SUBSIZE/2 : 0);            int pc = posix_class + ((local_negate)? POSIX_SUBSIZE/2 : 0);
3382            if (posix_substitutes[pc] != NULL)            if (posix_substitutes[pc] != NULL)
3383              {              {
3384              nestptr = tempptr + 1;              nestptr = tempptr + 1;
3385              ptr = posix_substitutes[pc] - 1;              ptr = posix_substitutes[pc] - 1;
3386              continue;              continue;
3387              }              }
3388            }            }
3389  #endif  #endif
3390          /* In the non-UCP case, we build the bit map for the POSIX class in a          /* In the non-UCP case, we build the bit map for the POSIX class in a
3391          chunk of local store because we may be adding and subtracting from it,          chunk of local store because we may be adding and subtracting from it,
3392          and we don't want to subtract bits that may be in the main map already.          and we don't want to subtract bits that may be in the main map already.
# Line 3460  for (;; ptr++) Line 3473  for (;; ptr++)
3473              case ESC_SU:              case ESC_SU:
3474              nestptr = ptr;              nestptr = ptr;
3475              ptr = substitutes[-c - ESC_DU] - 1;  /* Just before substitute */              ptr = substitutes[-c - ESC_DU] - 1;  /* Just before substitute */
3476              class_charcount -= 2;                /* Undo! */              class_charcount -= 2;                /* Undo! */
3477              continue;              continue;
3478  #endif  #endif
3479              case ESC_d:              case ESC_d:
# Line 3911  for (;; ptr++) Line 3924  for (;; ptr++)
3924      can cause firstbyte to be set. Otherwise, there can be no first char if      can cause firstbyte to be set. Otherwise, there can be no first char if
3925      this item is first, whatever repeat count may follow. In the case of      this item is first, whatever repeat count may follow. In the case of
3926      reqbyte, save the previous value for reinstating. */      reqbyte, save the previous value for reinstating. */
3927    
3928  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3929      if (class_charcount == 1 && !class_utf8 &&      if (class_charcount == 1 && !class_utf8 &&
3930        (!utf8 || !negate_class || class_lastchar < 128))        (!utf8 || !negate_class || class_lastchar < 128))
# Line 3991  for (;; ptr++) Line 4004  for (;; ptr++)
4004        }        }
4005  #endif  #endif
4006    
4007      /* If there are no characters > 255, or they are all to be included or      /* If there are no characters > 255, or they are all to be included or
4008      excluded, set the opcode to OP_CLASS or OP_NCLASS, depending on whether the      excluded, set the opcode to OP_CLASS or OP_NCLASS, depending on whether the
4009      whole class was negated and whether there were negative specials such as \S      whole class was negated and whether there were negative specials such as \S
4010      (non-UCP) in the class. Then copy the 32-byte map into the code vector,      (non-UCP) in the class. Then copy the 32-byte map into the code vector,
# Line 5795  for (;; ptr++) Line 5808  for (;; ptr++)
5808    
5809      /* ===================================================================*/      /* ===================================================================*/
5810      /* Handle metasequences introduced by \. For ones like \d, the ESC_ values      /* Handle metasequences introduced by \. For ones like \d, the ESC_ values
5811      are arranged to be the negation of the corresponding OP_values in the      are arranged to be the negation of the corresponding OP_values in the
5812      default case when PCRE_UCP is not set. For the back references, the values      default case when PCRE_UCP is not set. For the back references, the values
5813      are ESC_REF plus the reference number. Only back references and those types      are ESC_REF plus the reference number. Only back references and those types
5814      that consume a character may be repeated. We can test for values between      that consume a character may be repeated. We can test for values between
# Line 5973  for (;; ptr++) Line 5986  for (;; ptr++)
5986            ptr = substitutes[-c - ESC_DU] - 1;  /* Just before substitute */            ptr = substitutes[-c - ESC_DU] - 1;  /* Just before substitute */
5987            }            }
5988          else          else
5989  #endif  #endif
5990            {            {
5991            previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;            previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;
5992            *code++ = -c;            *code++ = -c;
5993            }            }
5994          }          }
5995        continue;        continue;
5996        }        }
# Line 6809  while (ptr[skipatstart] == CHAR_LEFT_PAR Line 6822  while (ptr[skipatstart] == CHAR_LEFT_PAR
6822      options = (options & ~(PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) | newbsr;      options = (options & ~(PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) | newbsr;
6823    else break;    else break;
6824    }    }
6825    
6826  utf8 = (options & PCRE_UTF8) != 0;  utf8 = (options & PCRE_UTF8) != 0;
6827    
6828  /* Can't support UTF8 unless PCRE has been compiled to include the code. */  /* Can't support UTF8 unless PCRE has been compiled to include the code. */
# Line 6835  if (utf8) Line 6848  if (utf8)
6848  if ((options & PCRE_UCP) != 0)  if ((options & PCRE_UCP) != 0)
6849    {    {
6850    errorcode = ERR67;    errorcode = ERR67;
6851    goto PCRE_EARLY_ERROR_RETURN;    goto PCRE_EARLY_ERROR_RETURN;
6852    }    }
6853  #endif  #endif
6854    
6855  /* Check validity of \R options. */  /* Check validity of \R options. */

Legend:
Removed from v.533  
changed lines
  Added in v.545

  ViewVC Help
Powered by ViewVC 1.1.5