/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 513 by ph10, Mon May 3 11:13:37 2010 UTC revision 556 by ph10, Tue Oct 26 11:06:44 2010 UTC
# Line 124  static const short int escapes[] = { Line 124  static const short int escapes[] = {
124       -ESC_H,                  0,       -ESC_H,                  0,
125       0,                       -ESC_K,       0,                       -ESC_K,
126       0,                       0,       0,                       0,
127       0,                       0,       -ESC_N,                  0,
128       -ESC_P,                  -ESC_Q,       -ESC_P,                  -ESC_Q,
129       -ESC_R,                  -ESC_S,       -ESC_R,                  -ESC_S,
130       0,                       0,       0,                       0,
# Line 171  static const short int escapes[] = { Line 171  static const short int escapes[] = {
171  /*  B8 */     0,     0,      0,       0,      0,   ']',    '=',    '-',  /*  B8 */     0,     0,      0,       0,      0,   ']',    '=',    '-',
172  /*  C0 */   '{',-ESC_A, -ESC_B,  -ESC_C, -ESC_D,-ESC_E,      0, -ESC_G,  /*  C0 */   '{',-ESC_A, -ESC_B,  -ESC_C, -ESC_D,-ESC_E,      0, -ESC_G,
173  /*  C8 */-ESC_H,     0,      0,       0,      0,     0,      0,      0,  /*  C8 */-ESC_H,     0,      0,       0,      0,     0,      0,      0,
174  /*  D0 */   '}',     0, -ESC_K,       0,      0,     0,      0, -ESC_P,  /*  D0 */   '}',     0, -ESC_K,       0,      0,-ESC_N,      0, -ESC_P,
175  /*  D8 */-ESC_Q,-ESC_R,      0,       0,      0,     0,      0,      0,  /*  D8 */-ESC_Q,-ESC_R,      0,       0,      0,     0,      0,      0,
176  /*  E0 */  '\\',     0, -ESC_S,       0,      0,-ESC_V, -ESC_W, -ESC_X,  /*  E0 */  '\\',     0, -ESC_S,       0,      0,-ESC_V, -ESC_W, -ESC_X,
177  /*  E8 */     0,-ESC_Z,      0,       0,      0,     0,      0,      0,  /*  E8 */     0,-ESC_Z,      0,       0,      0,     0,      0,      0,
# Line 261  static const int posix_class_maps[] = { Line 261  static const int posix_class_maps[] = {
261    cbit_xdigit,-1,          0              /* xdigit */    cbit_xdigit,-1,          0              /* xdigit */
262  };  };
263    
264    /* Table of substitutes for \d etc when PCRE_UCP is set. The POSIX class
265    substitutes must be in the order of the names, defined above, and there are
266    both positive and negative cases. NULL means no substitute. */
267    
268    #ifdef SUPPORT_UCP
269    static const uschar *substitutes[] = {
270      (uschar *)"\\P{Nd}",    /* \D */
271      (uschar *)"\\p{Nd}",    /* \d */
272      (uschar *)"\\P{Xsp}",   /* \S */       /* NOTE: Xsp is Perl space */
273      (uschar *)"\\p{Xsp}",   /* \s */
274      (uschar *)"\\P{Xwd}",   /* \W */
275      (uschar *)"\\p{Xwd}"    /* \w */
276    };
277    
278    static const uschar *posix_substitutes[] = {
279      (uschar *)"\\p{L}",     /* alpha */
280      (uschar *)"\\p{Ll}",    /* lower */
281      (uschar *)"\\p{Lu}",    /* upper */
282      (uschar *)"\\p{Xan}",   /* alnum */
283      NULL,                   /* ascii */
284      (uschar *)"\\h",        /* blank */
285      NULL,                   /* cntrl */
286      (uschar *)"\\p{Nd}",    /* digit */
287      NULL,                   /* graph */
288      NULL,                   /* print */
289      NULL,                   /* punct */
290      (uschar *)"\\p{Xps}",   /* space */    /* NOTE: Xps is POSIX space */
291      (uschar *)"\\p{Xwd}",   /* word */
292      NULL,                   /* xdigit */
293      /* Negated cases */
294      (uschar *)"\\P{L}",     /* ^alpha */
295      (uschar *)"\\P{Ll}",    /* ^lower */
296      (uschar *)"\\P{Lu}",    /* ^upper */
297      (uschar *)"\\P{Xan}",   /* ^alnum */
298      NULL,                   /* ^ascii */
299      (uschar *)"\\H",        /* ^blank */
300      NULL,                   /* ^cntrl */
301      (uschar *)"\\P{Nd}",    /* ^digit */
302      NULL,                   /* ^graph */
303      NULL,                   /* ^print */
304      NULL,                   /* ^punct */
305      (uschar *)"\\P{Xps}",   /* ^space */   /* NOTE: Xps is POSIX space */
306      (uschar *)"\\P{Xwd}",   /* ^word */
307      NULL                    /* ^xdigit */
308    };
309    #define POSIX_SUBSIZE (sizeof(posix_substitutes)/sizeof(uschar *))
310    #endif
311    
312  #define STRING(a)  # a  #define STRING(a)  # a
313  #define XSTRING(s) STRING(s)  #define XSTRING(s) STRING(s)
# Line 324  static const char error_texts[] = Line 371  static const char error_texts[] =
371    /* 35 */    /* 35 */
372    "invalid condition (?(0)\0"    "invalid condition (?(0)\0"
373    "\\C not allowed in lookbehind assertion\0"    "\\C not allowed in lookbehind assertion\0"
374    "PCRE does not support \\L, \\l, \\N, \\U, or \\u\0"    "PCRE does not support \\L, \\l, \\N{name}, \\U, or \\u\0"
375    "number after (?C is > 255\0"    "number after (?C is > 255\0"
376    "closing ) for (?C expected\0"    "closing ) for (?C expected\0"
377    /* 40 */    /* 40 */
# Line 360  static const char error_texts[] = Line 407  static const char error_texts[] =
407    /* 65 */    /* 65 */
408    "different names for subpatterns of the same number are not allowed\0"    "different names for subpatterns of the same number are not allowed\0"
409    "(*MARK) must have an argument\0"    "(*MARK) must have an argument\0"
410      "this version of PCRE is not compiled with PCRE_UCP support\0"
411    ;    ;
412    
413  /* Table to identify digits and hex digits. This is used when compiling  /* Table to identify digits and hex digits. This is used when compiling
# Line 593  else Line 641  else
641    
642      case CHAR_l:      case CHAR_l:
643      case CHAR_L:      case CHAR_L:
     case CHAR_N:  
644      case CHAR_u:      case CHAR_u:
645      case CHAR_U:      case CHAR_U:
646      *errorcodeptr = ERR37;      *errorcodeptr = ERR37;
# Line 831  else Line 878  else
878      }      }
879    }    }
880    
881    /* Perl supports \N{name} for character names, as well as plain \N for "not
882    newline". PCRE does not support \N{name}. */
883    
884    if (c == -ESC_N && ptr[1] == CHAR_LEFT_CURLY_BRACKET)
885      *errorcodeptr = ERR37;
886    
887    /* If PCRE_UCP is set, we change the values for \d etc. */
888    
889    if ((options & PCRE_UCP) != 0 && c <= -ESC_D && c >= -ESC_w)
890      c -= (ESC_DU - ESC_D);
891    
892    /* Set the pointer to the final character before returning. */
893    
894  *ptrptr = ptr;  *ptrptr = ptr;
895  return c;  return c;
896  }  }
# Line 1050  Arguments: Line 1110  Arguments:
1110    name         name to seek, or NULL if seeking a numbered subpattern    name         name to seek, or NULL if seeking a numbered subpattern
1111    lorn         name length, or subpattern number if name is NULL    lorn         name length, or subpattern number if name is NULL
1112    xmode        TRUE if we are in /x mode    xmode        TRUE if we are in /x mode
1113      utf8         TRUE if we are in UTF-8 mode
1114    count        pointer to the current capturing subpattern number (updated)    count        pointer to the current capturing subpattern number (updated)
1115    
1116  Returns:       the number of the named subpattern, or -1 if not found  Returns:       the number of the named subpattern, or -1 if not found
# Line 1057  Returns:       the number of the named s Line 1118  Returns:       the number of the named s
1118    
1119  static int  static int
1120  find_parens_sub(uschar **ptrptr, compile_data *cd, const uschar *name, int lorn,  find_parens_sub(uschar **ptrptr, compile_data *cd, const uschar *name, int lorn,
1121    BOOL xmode, int *count)    BOOL xmode, BOOL utf8, int *count)
1122  {  {
1123  uschar *ptr = *ptrptr;  uschar *ptr = *ptrptr;
1124  int start_count = *count;  int start_count = *count;
# Line 1069  dealing with. The very first call may no Line 1130  dealing with. The very first call may no
1130    
1131  if (ptr[0] == CHAR_LEFT_PARENTHESIS)  if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1132    {    {
1133    if (ptr[1] == CHAR_QUESTION_MARK &&    /* Handle specials such as (*SKIP) or (*UTF8) etc. */
1134        ptr[2] == CHAR_VERTICAL_LINE)  
1135      if (ptr[1] == CHAR_ASTERISK) ptr += 2;
1136    
1137      /* Handle a normal, unnamed capturing parenthesis. */
1138    
1139      else if (ptr[1] != CHAR_QUESTION_MARK)
1140        {
1141        *count += 1;
1142        if (name == NULL && *count == lorn) return *count;
1143        ptr++;
1144        }
1145    
1146      /* All cases now have (? at the start. Remember when we are in a group
1147      where the parenthesis numbers are duplicated. */
1148    
1149      else if (ptr[2] == CHAR_VERTICAL_LINE)
1150      {      {
1151      ptr += 3;      ptr += 3;
1152      dup_parens = TRUE;      dup_parens = TRUE;
1153      }      }
1154    
1155    /* Handle a normal, unnamed capturing parenthesis */    /* Handle comments; all characters are allowed until a ket is reached. */
1156    
1157    else if (ptr[1] != CHAR_QUESTION_MARK && ptr[1] != CHAR_ASTERISK)    else if (ptr[2] == CHAR_NUMBER_SIGN)
1158      {      {
1159      *count += 1;      for (ptr += 3; *ptr != 0; ptr++) if (*ptr == CHAR_RIGHT_PARENTHESIS) break;
1160      if (name == NULL && *count == lorn) return *count;      goto FAIL_EXIT;
     ptr++;  
1161      }      }
1162    
1163    /* Handle a condition. If it is an assertion, just carry on so that it    /* Handle a condition. If it is an assertion, just carry on so that it
1164    is processed as normal. If not, skip to the closing parenthesis of the    is processed as normal. If not, skip to the closing parenthesis of the
1165    condition (there can't be any nested parens. */    condition (there can't be any nested parens). */
1166    
1167    else if (ptr[2] == CHAR_LEFT_PARENTHESIS)    else if (ptr[2] == CHAR_LEFT_PARENTHESIS)
1168      {      {
# Line 1099  if (ptr[0] == CHAR_LEFT_PARENTHESIS) Line 1174  if (ptr[0] == CHAR_LEFT_PARENTHESIS)
1174        }        }
1175      }      }
1176    
1177    /* We have either (? or (* and not a condition */    /* Start with (? but not a condition. */
1178    
1179    else    else
1180      {      {
# Line 1204  for (; *ptr != 0; ptr++) Line 1279  for (; *ptr != 0; ptr++)
1279    
1280    if (xmode && *ptr == CHAR_NUMBER_SIGN)    if (xmode && *ptr == CHAR_NUMBER_SIGN)
1281      {      {
1282      while (*(++ptr) != 0 && *ptr != CHAR_NL) {};      ptr++;
1283        while (*ptr != 0)
1284          {
1285          if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; }
1286          ptr++;
1287    #ifdef SUPPORT_UTF8
1288          if (utf8) while ((*ptr & 0xc0) == 0x80) ptr++;
1289    #endif
1290          }
1291      if (*ptr == 0) goto FAIL_EXIT;      if (*ptr == 0) goto FAIL_EXIT;
1292      continue;      continue;
1293      }      }
# Line 1213  for (; *ptr != 0; ptr++) Line 1296  for (; *ptr != 0; ptr++)
1296    
1297    if (*ptr == CHAR_LEFT_PARENTHESIS)    if (*ptr == CHAR_LEFT_PARENTHESIS)
1298      {      {
1299      int rc = find_parens_sub(&ptr, cd, name, lorn, xmode, count);      int rc = find_parens_sub(&ptr, cd, name, lorn, xmode, utf8, count);
1300      if (rc > 0) return rc;      if (rc > 0) return rc;
1301      if (*ptr == 0) goto FAIL_EXIT;      if (*ptr == 0) goto FAIL_EXIT;
1302      }      }
# Line 1221  for (; *ptr != 0; ptr++) Line 1304  for (; *ptr != 0; ptr++)
1304    else if (*ptr == CHAR_RIGHT_PARENTHESIS)    else if (*ptr == CHAR_RIGHT_PARENTHESIS)
1305      {      {
1306      if (dup_parens && *count < hwm_count) *count = hwm_count;      if (dup_parens && *count < hwm_count) *count = hwm_count;
1307      *ptrptr = ptr;      goto FAIL_EXIT;
     return -1;  
1308      }      }
1309    
1310    else if (*ptr == CHAR_VERTICAL_LINE && dup_parens)    else if (*ptr == CHAR_VERTICAL_LINE && dup_parens)
# Line 1260  Arguments: Line 1342  Arguments:
1342    name         name to seek, or NULL if seeking a numbered subpattern    name         name to seek, or NULL if seeking a numbered subpattern
1343    lorn         name length, or subpattern number if name is NULL    lorn         name length, or subpattern number if name is NULL
1344    xmode        TRUE if we are in /x mode    xmode        TRUE if we are in /x mode
1345      utf8         TRUE if we are in UTF-8 mode
1346    
1347  Returns:       the number of the found subpattern, or -1 if not found  Returns:       the number of the found subpattern, or -1 if not found
1348  */  */
1349    
1350  static int  static int
1351  find_parens(compile_data *cd, const uschar *name, int lorn, BOOL xmode)  find_parens(compile_data *cd, const uschar *name, int lorn, BOOL xmode,
1352      BOOL utf8)
1353  {  {
1354  uschar *ptr = (uschar *)cd->start_pattern;  uschar *ptr = (uschar *)cd->start_pattern;
1355  int count = 0;  int count = 0;
# Line 1278  matching closing parens. That is why we Line 1362  matching closing parens. That is why we
1362    
1363  for (;;)  for (;;)
1364    {    {
1365    rc = find_parens_sub(&ptr, cd, name, lorn, xmode, &count);    rc = find_parens_sub(&ptr, cd, name, lorn, xmode, utf8, &count);
1366    if (rc > 0 || *ptr++ == 0) break;    if (rc > 0 || *ptr++ == 0) break;
1367    }    }
1368    
# Line 1651  for (;;) Line 1735  for (;;)
1735        case OP_MARK:        case OP_MARK:
1736        case OP_PRUNE_ARG:        case OP_PRUNE_ARG:
1737        case OP_SKIP_ARG:        case OP_SKIP_ARG:
       case OP_THEN_ARG:  
1738        code += code[1];        code += code[1];
1739        break;        break;
1740    
1741          case OP_THEN_ARG:
1742          code += code[1+LINK_SIZE];
1743          break;
1744        }        }
1745    
1746      /* Add in the fixed length from the table */      /* Add in the fixed length from the table */
# Line 1754  for (;;) Line 1841  for (;;)
1841        case OP_MARK:        case OP_MARK:
1842        case OP_PRUNE_ARG:        case OP_PRUNE_ARG:
1843        case OP_SKIP_ARG:        case OP_SKIP_ARG:
       case OP_THEN_ARG:  
1844        code += code[1];        code += code[1];
1845        break;        break;
1846    
1847          case OP_THEN_ARG:
1848          code += code[1+LINK_SIZE];
1849          break;
1850        }        }
1851    
1852      /* Add in the fixed length from the table */      /* Add in the fixed length from the table */
# Line 2032  for (code = first_significant_code(code Line 2122  for (code = first_significant_code(code
2122      case OP_MARK:      case OP_MARK:
2123      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
2124      case OP_SKIP_ARG:      case OP_SKIP_ARG:
     case OP_THEN_ARG:  
2125      code += code[1];      code += code[1];
2126      break;      break;
2127    
2128        case OP_THEN_ARG:
2129        code += code[1+LINK_SIZE];
2130        break;
2131    
2132      /* None of the remaining opcodes are required to match a character. */      /* None of the remaining opcodes are required to match a character. */
2133    
2134      default:      default:
# Line 2256  auto_callout(uschar *code, const uschar Line 2349  auto_callout(uschar *code, const uschar
2349  {  {
2350  *code++ = OP_CALLOUT;  *code++ = OP_CALLOUT;
2351  *code++ = 255;  *code++ = 255;
2352  PUT(code, 0, ptr - cd->start_pattern);  /* Pattern offset */  PUT(code, 0, (int)(ptr - cd->start_pattern));  /* Pattern offset */
2353  PUT(code, LINK_SIZE, 0);                /* Default length */  PUT(code, LINK_SIZE, 0);                       /* Default length */
2354  return code + 2*LINK_SIZE;  return code + 2*LINK_SIZE;
2355  }  }
2356    
# Line 2282  Returns:             nothing Line 2375  Returns:             nothing
2375  static void  static void
2376  complete_callout(uschar *previous_callout, const uschar *ptr, compile_data *cd)  complete_callout(uschar *previous_callout, const uschar *ptr, compile_data *cd)
2377  {  {
2378  int length = ptr - cd->start_pattern - GET(previous_callout, 2);  int length = (int)(ptr - cd->start_pattern - GET(previous_callout, 2));
2379  PUT(previous_callout, 2 + LINK_SIZE, length);  PUT(previous_callout, 2 + LINK_SIZE, length);
2380  }  }
2381    
# Line 2332  for (++c; c <= d; c++) Line 2425  for (++c; c <= d; c++)
2425    
2426  return TRUE;  return TRUE;
2427  }  }
2428    
2429    
2430    
2431    /*************************************************
2432    *        Check a character and a property        *
2433    *************************************************/
2434    
2435    /* This function is called by check_auto_possessive() when a property item
2436    is adjacent to a fixed character.
2437    
2438    Arguments:
2439      c            the character
2440      ptype        the property type
2441      pdata        the data for the type
2442      negated      TRUE if it's a negated property (\P or \p{^)
2443    
2444    Returns:       TRUE if auto-possessifying is OK
2445    */
2446    
2447    static BOOL
2448    check_char_prop(int c, int ptype, int pdata, BOOL negated)
2449    {
2450    const ucd_record *prop = GET_UCD(c);
2451    switch(ptype)
2452      {
2453      case PT_LAMP:
2454      return (prop->chartype == ucp_Lu ||
2455              prop->chartype == ucp_Ll ||
2456              prop->chartype == ucp_Lt) == negated;
2457    
2458      case PT_GC:
2459      return (pdata == _pcre_ucp_gentype[prop->chartype]) == negated;
2460    
2461      case PT_PC:
2462      return (pdata == prop->chartype) == negated;
2463    
2464      case PT_SC:
2465      return (pdata == prop->script) == negated;
2466    
2467      /* These are specials */
2468    
2469      case PT_ALNUM:
2470      return (_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2471              _pcre_ucp_gentype[prop->chartype] == ucp_N) == negated;
2472    
2473      case PT_SPACE:    /* Perl space */
2474      return (_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2475              c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR)
2476              == negated;
2477    
2478      case PT_PXSPACE:  /* POSIX space */
2479      return (_pcre_ucp_gentype[prop->chartype] == ucp_Z ||
2480              c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
2481              c == CHAR_FF || c == CHAR_CR)
2482              == negated;
2483    
2484      case PT_WORD:
2485      return (_pcre_ucp_gentype[prop->chartype] == ucp_L ||
2486              _pcre_ucp_gentype[prop->chartype] == ucp_N ||
2487              c == CHAR_UNDERSCORE) == negated;
2488      }
2489    return FALSE;
2490    }
2491  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2492    
2493    
# Line 2345  whether the next thing could possibly ma Line 2501  whether the next thing could possibly ma
2501  sense to automatically possessify the repeated item.  sense to automatically possessify the repeated item.
2502    
2503  Arguments:  Arguments:
2504    op_code       the repeated op code    previous      pointer to the repeated opcode
   this          data for this item, depends on the opcode  
2505    utf8          TRUE in UTF-8 mode    utf8          TRUE in UTF-8 mode
   utf8_char     used for utf8 character bytes, NULL if not relevant  
2506    ptr           next character in pattern    ptr           next character in pattern
2507    options       options bits    options       options bits
2508    cd            contains pointers to tables etc.    cd            contains pointers to tables etc.
# Line 2357  Returns:        TRUE if possessifying is Line 2511  Returns:        TRUE if possessifying is
2511  */  */
2512    
2513  static BOOL  static BOOL
2514  check_auto_possessive(int op_code, int item, BOOL utf8, uschar *utf8_char,  check_auto_possessive(const uschar *previous, BOOL utf8, const uschar *ptr,
2515    const uschar *ptr, int options, compile_data *cd)    int options, compile_data *cd)
2516  {  {
2517  int next;  int c, next;
2518    int op_code = *previous++;
2519    
2520  /* Skip whitespace and comments in extended mode */  /* Skip whitespace and comments in extended mode */
2521    
# Line 2371  if ((options & PCRE_EXTENDED) != 0) Line 2526  if ((options & PCRE_EXTENDED) != 0)
2526      while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;      while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;
2527      if (*ptr == CHAR_NUMBER_SIGN)      if (*ptr == CHAR_NUMBER_SIGN)
2528        {        {
2529        while (*(++ptr) != 0)        ptr++;
2530          while (*ptr != 0)
2531            {
2532          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }
2533            ptr++;
2534    #ifdef SUPPORT_UTF8
2535            if (utf8) while ((*ptr & 0xc0) == 0x80) ptr++;
2536    #endif
2537            }
2538        }        }
2539      else break;      else break;
2540      }      }
# Line 2408  if ((options & PCRE_EXTENDED) != 0) Line 2570  if ((options & PCRE_EXTENDED) != 0)
2570      while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;      while ((cd->ctypes[*ptr] & ctype_space) != 0) ptr++;
2571      if (*ptr == CHAR_NUMBER_SIGN)      if (*ptr == CHAR_NUMBER_SIGN)
2572        {        {
2573        while (*(++ptr) != 0)        ptr++;
2574          while (*ptr != 0)
2575            {
2576          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }          if (IS_NEWLINE(ptr)) { ptr += cd->nllen; break; }
2577            ptr++;
2578    #ifdef SUPPORT_UTF8
2579            if (utf8) while ((*ptr & 0xc0) == 0x80) ptr++;
2580    #endif
2581            }
2582        }        }
2583      else break;      else break;
2584      }      }
# Line 2421  if (*ptr == CHAR_ASTERISK || *ptr == CHA Line 2590  if (*ptr == CHAR_ASTERISK || *ptr == CHA
2590    strncmp((char *)ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)    strncmp((char *)ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)
2591      return FALSE;      return FALSE;
2592    
2593  /* Now compare the next item with the previous opcode. If the previous is a  /* Now compare the next item with the previous opcode. First, handle cases when
2594  positive single character match, "item" either contains the character or, if  the next item is a character. */
 "item" is greater than 127 in utf8 mode, the character's bytes are in  
 utf8_char. */  
   
   
 /* Handle cases when the next item is a character. */  
2595    
2596  if (next >= 0) switch(op_code)  if (next >= 0) switch(op_code)
2597    {    {
2598    case OP_CHAR:    case OP_CHAR:
2599  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2600    if (utf8 && item > 127) { GETCHAR(item, utf8_char); }    GETCHARTEST(c, previous);
2601  #else  #else
2602    (void)(utf8_char);  /* Keep compiler happy by referencing function argument */    c = *previous;
2603  #endif  #endif
2604    return item != next;    return c != next;
2605    
2606    /* For CHARNC (caseless character) we must check the other case. If we have    /* For CHARNC (caseless character) we must check the other case. If we have
2607    Unicode property support, we can use it to test the other case of    Unicode property support, we can use it to test the other case of
# Line 2445  if (next >= 0) switch(op_code) Line 2609  if (next >= 0) switch(op_code)
2609    
2610    case OP_CHARNC:    case OP_CHARNC:
2611  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2612    if (utf8 && item > 127) { GETCHAR(item, utf8_char); }    GETCHARTEST(c, previous);
2613    #else
2614      c = *previous;
2615  #endif  #endif
2616    if (item == next) return FALSE;    if (c == next) return FALSE;
2617  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2618    if (utf8)    if (utf8)
2619      {      {
# Line 2458  if (next >= 0) switch(op_code) Line 2624  if (next >= 0) switch(op_code)
2624  #else  #else
2625      othercase = NOTACHAR;      othercase = NOTACHAR;
2626  #endif  #endif
2627      return (unsigned int)item != othercase;      return (unsigned int)c != othercase;
2628      }      }
2629    else    else
2630  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
2631    return (item != cd->fcc[next]);  /* Non-UTF-8 mode */    return (c != cd->fcc[next]);  /* Non-UTF-8 mode */
2632    
2633    /* For OP_NOT, "item" must be a single-byte character. */    /* For OP_NOT, its data is always a single-byte character. */
2634    
2635    case OP_NOT:    case OP_NOT:
2636    if (item == next) return TRUE;    if ((c = *previous) == next) return TRUE;
2637    if ((options & PCRE_CASELESS) == 0) return FALSE;    if ((options & PCRE_CASELESS) == 0) return FALSE;
2638  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2639    if (utf8)    if (utf8)
# Line 2479  if (next >= 0) switch(op_code) Line 2645  if (next >= 0) switch(op_code)
2645  #else  #else
2646      othercase = NOTACHAR;      othercase = NOTACHAR;
2647  #endif  #endif
2648      return (unsigned int)item == othercase;      return (unsigned int)c == othercase;
2649      }      }
2650    else    else
2651  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
2652    return (item == cd->fcc[next]);  /* Non-UTF-8 mode */    return (c == cd->fcc[next]);  /* Non-UTF-8 mode */
2653    
2654      /* Note that OP_DIGIT etc. are generated only when PCRE_UCP is *not* set.
2655      When it is set, \d etc. are converted into OP_(NOT_)PROP codes. */
2656    
2657    case OP_DIGIT:    case OP_DIGIT:
2658    return next > 127 || (cd->ctypes[next] & ctype_digit) == 0;    return next > 127 || (cd->ctypes[next] & ctype_digit) == 0;
# Line 2526  if (next >= 0) switch(op_code) Line 2695  if (next >= 0) switch(op_code)
2695      case 0x202f:      case 0x202f:
2696      case 0x205f:      case 0x205f:
2697      case 0x3000:      case 0x3000:
2698      return op_code != OP_HSPACE;      return op_code == OP_NOT_HSPACE;
2699      default:      default:
2700      return op_code == OP_HSPACE;      return op_code != OP_NOT_HSPACE;
2701      }      }
2702    
2703      case OP_ANYNL:
2704    case OP_VSPACE:    case OP_VSPACE:
2705    case OP_NOT_VSPACE:    case OP_NOT_VSPACE:
2706    switch(next)    switch(next)
# Line 2542  if (next >= 0) switch(op_code) Line 2712  if (next >= 0) switch(op_code)
2712      case 0x85:      case 0x85:
2713      case 0x2028:      case 0x2028:
2714      case 0x2029:      case 0x2029:
2715      return op_code != OP_VSPACE;      return op_code == OP_NOT_VSPACE;
2716      default:      default:
2717      return op_code == OP_VSPACE;      return op_code != OP_NOT_VSPACE;
2718      }      }
2719    
2720    #ifdef SUPPORT_UCP
2721      case OP_PROP:
2722      return check_char_prop(next, previous[0], previous[1], FALSE);
2723    
2724      case OP_NOTPROP:
2725      return check_char_prop(next, previous[0], previous[1], TRUE);
2726    #endif
2727    
2728    default:    default:
2729    return FALSE;    return FALSE;
2730    }    }
2731    
2732    
2733  /* Handle the case when the next item is \d, \s, etc. */  /* Handle the case when the next item is \d, \s, etc. Note that when PCRE_UCP
2734    is set, \d turns into ESC_du rather than ESC_d, etc., so ESC_d etc. are
2735    generated only when PCRE_UCP is *not* set, that is, when only ASCII
2736    characteristics are recognized. Similarly, the opcodes OP_DIGIT etc. are
2737    replaced by OP_PROP codes when PCRE_UCP is set. */
2738    
2739  switch(op_code)  switch(op_code)
2740    {    {
2741    case OP_CHAR:    case OP_CHAR:
2742    case OP_CHARNC:    case OP_CHARNC:
2743  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2744    if (utf8 && item > 127) { GETCHAR(item, utf8_char); }    GETCHARTEST(c, previous);
2745    #else
2746      c = *previous;
2747  #endif  #endif
2748    switch(-next)    switch(-next)
2749      {      {
2750      case ESC_d:      case ESC_d:
2751      return item > 127 || (cd->ctypes[item] & ctype_digit) == 0;      return c > 127 || (cd->ctypes[c] & ctype_digit) == 0;
2752    
2753      case ESC_D:      case ESC_D:
2754      return item <= 127 && (cd->ctypes[item] & ctype_digit) != 0;      return c <= 127 && (cd->ctypes[c] & ctype_digit) != 0;
2755    
2756      case ESC_s:      case ESC_s:
2757      return item > 127 || (cd->ctypes[item] & ctype_space) == 0;      return c > 127 || (cd->ctypes[c] & ctype_space) == 0;
2758    
2759      case ESC_S:      case ESC_S:
2760      return item <= 127 && (cd->ctypes[item] & ctype_space) != 0;      return c <= 127 && (cd->ctypes[c] & ctype_space) != 0;
2761    
2762      case ESC_w:      case ESC_w:
2763      return item > 127 || (cd->ctypes[item] & ctype_word) == 0;      return c > 127 || (cd->ctypes[c] & ctype_word) == 0;
2764    
2765      case ESC_W:      case ESC_W:
2766      return item <= 127 && (cd->ctypes[item] & ctype_word) != 0;      return c <= 127 && (cd->ctypes[c] & ctype_word) != 0;
2767    
2768      case ESC_h:      case ESC_h:
2769      case ESC_H:      case ESC_H:
2770      switch(item)      switch(c)
2771        {        {
2772        case 0x09:        case 0x09:
2773        case 0x20:        case 0x20:
# Line 2611  switch(op_code) Line 2795  switch(op_code)
2795    
2796      case ESC_v:      case ESC_v:
2797      case ESC_V:      case ESC_V:
2798      switch(item)      switch(c)
2799        {        {
2800        case 0x0a:        case 0x0a:
2801        case 0x0b:        case 0x0b:
# Line 2625  switch(op_code) Line 2809  switch(op_code)
2809        return -next == ESC_v;        return -next == ESC_v;
2810        }        }
2811    
2812        /* When PCRE_UCP is set, these values get generated for \d etc. Find
2813        their substitutions and process them. The result will always be either
2814        -ESC_p or -ESC_P. Then fall through to process those values. */
2815    
2816    #ifdef SUPPORT_UCP
2817        case ESC_du:
2818        case ESC_DU:
2819        case ESC_wu:
2820        case ESC_WU:
2821        case ESC_su:
2822        case ESC_SU:
2823          {
2824          int temperrorcode = 0;
2825          ptr = substitutes[-next - ESC_DU];
2826          next = check_escape(&ptr, &temperrorcode, 0, options, FALSE);
2827          if (temperrorcode != 0) return FALSE;
2828          ptr++;    /* For compatibility */
2829          }
2830        /* Fall through */
2831    
2832        case ESC_p:
2833        case ESC_P:
2834          {
2835          int ptype, pdata, errorcodeptr;
2836          BOOL negated;
2837    
2838          ptr--;      /* Make ptr point at the p or P */
2839          ptype = get_ucp(&ptr, &negated, &pdata, &errorcodeptr);
2840          if (ptype < 0) return FALSE;
2841          ptr++;      /* Point past the final curly ket */
2842    
2843          /* If the property item is optional, we have to give up. (When generated
2844          from \d etc by PCRE_UCP, this test will have been applied much earlier,
2845          to the original \d etc. At this point, ptr will point to a zero byte. */
2846    
2847          if (*ptr == CHAR_ASTERISK || *ptr == CHAR_QUESTION_MARK ||
2848            strncmp((char *)ptr, STR_LEFT_CURLY_BRACKET STR_0 STR_COMMA, 3) == 0)
2849              return FALSE;
2850    
2851          /* Do the property check. */
2852    
2853          return check_char_prop(c, ptype, pdata, (next == -ESC_P) != negated);
2854          }
2855    #endif
2856    
2857      default:      default:
2858      return FALSE;      return FALSE;
2859      }      }
2860    
2861      /* In principle, support for Unicode properties should be integrated here as
2862      well. It means re-organizing the above code so as to get hold of the property
2863      values before switching on the op-code. However, I wonder how many patterns
2864      combine ASCII \d etc with Unicode properties? (Note that if PCRE_UCP is set,
2865      these op-codes are never generated.) */
2866    
2867    case OP_DIGIT:    case OP_DIGIT:
2868    return next == -ESC_D || next == -ESC_s || next == -ESC_W ||    return next == -ESC_D || next == -ESC_s || next == -ESC_W ||
2869           next == -ESC_h || next == -ESC_v;           next == -ESC_h || next == -ESC_v || next == -ESC_R;
2870    
2871    case OP_NOT_DIGIT:    case OP_NOT_DIGIT:
2872    return next == -ESC_d;    return next == -ESC_d;
2873    
2874    case OP_WHITESPACE:    case OP_WHITESPACE:
2875    return next == -ESC_S || next == -ESC_d || next == -ESC_w;    return next == -ESC_S || next == -ESC_d || next == -ESC_w || next == -ESC_R;
2876    
2877    case OP_NOT_WHITESPACE:    case OP_NOT_WHITESPACE:
2878    return next == -ESC_s || next == -ESC_h || next == -ESC_v;    return next == -ESC_s || next == -ESC_h || next == -ESC_v;
2879    
2880    case OP_HSPACE:    case OP_HSPACE:
2881    return next == -ESC_S || next == -ESC_H || next == -ESC_d || next == -ESC_w;    return next == -ESC_S || next == -ESC_H || next == -ESC_d ||
2882             next == -ESC_w || next == -ESC_v || next == -ESC_R;
2883    
2884    case OP_NOT_HSPACE:    case OP_NOT_HSPACE:
2885    return next == -ESC_h;    return next == -ESC_h;
2886    
2887    /* Can't have \S in here because VT matches \S (Perl anomaly) */    /* Can't have \S in here because VT matches \S (Perl anomaly) */
2888      case OP_ANYNL:
2889    case OP_VSPACE:    case OP_VSPACE:
2890    return next == -ESC_V || next == -ESC_d || next == -ESC_w;    return next == -ESC_V || next == -ESC_d || next == -ESC_w;
2891    
2892    case OP_NOT_VSPACE:    case OP_NOT_VSPACE:
2893    return next == -ESC_v;    return next == -ESC_v || next == -ESC_R;
2894    
2895    case OP_WORDCHAR:    case OP_WORDCHAR:
2896    return next == -ESC_W || next == -ESC_s || next == -ESC_h || next == -ESC_v;    return next == -ESC_W || next == -ESC_s || next == -ESC_h ||
2897             next == -ESC_v || next == -ESC_R;
2898    
2899    case OP_NOT_WORDCHAR:    case OP_NOT_WORDCHAR:
2900    return next == -ESC_w || next == -ESC_d;    return next == -ESC_w || next == -ESC_d;
# Line 2720  BOOL inescq = FALSE; Line 2958  BOOL inescq = FALSE;
2958  BOOL groupsetfirstbyte = FALSE;  BOOL groupsetfirstbyte = FALSE;
2959  const uschar *ptr = *ptrptr;  const uschar *ptr = *ptrptr;
2960  const uschar *tempptr;  const uschar *tempptr;
2961    const uschar *nestptr = NULL;
2962  uschar *previous = NULL;  uschar *previous = NULL;
2963  uschar *previous_callout = NULL;  uschar *previous_callout = NULL;
2964  uschar *save_hwm = NULL;  uschar *save_hwm = NULL;
# Line 2790  for (;; ptr++) Line 3029  for (;; ptr++)
3029    
3030    c = *ptr;    c = *ptr;
3031    
3032      /* If we are at the end of a nested substitution, revert to the outer level
3033      string. Nesting only happens one level deep. */
3034    
3035      if (c == 0 && nestptr != NULL)
3036        {
3037        ptr = nestptr;
3038        nestptr = NULL;
3039        c = *ptr;
3040        }
3041    
3042    /* If we are in the pre-compile phase, accumulate the length used for the    /* If we are in the pre-compile phase, accumulate the length used for the
3043    previous cycle of this loop. */    previous cycle of this loop. */
3044    
# Line 2820  for (;; ptr++) Line 3069  for (;; ptr++)
3069        goto FAILED;        goto FAILED;
3070        }        }
3071    
3072      *lengthptr += code - last_code;      *lengthptr += (int)(code - last_code);
3073      DPRINTF(("length=%d added %d c=%c\n", *lengthptr, code - last_code, c));      DPRINTF(("length=%d added %d c=%c\n", *lengthptr, code - last_code, c));
3074    
3075      /* If "previous" is set and it is not at the start of the work space, move      /* If "previous" is set and it is not at the start of the work space, move
# Line 2902  for (;; ptr++) Line 3151  for (;; ptr++)
3151      if ((cd->ctypes[c] & ctype_space) != 0) continue;      if ((cd->ctypes[c] & ctype_space) != 0) continue;
3152      if (c == CHAR_NUMBER_SIGN)      if (c == CHAR_NUMBER_SIGN)
3153        {        {
3154        while (*(++ptr) != 0)        ptr++;
3155          while (*ptr != 0)
3156          {          {
3157          if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; }          if (IS_NEWLINE(ptr)) { ptr += cd->nllen - 1; break; }
3158            ptr++;
3159    #ifdef SUPPORT_UTF8
3160            if (utf8) while ((*ptr & 0xc0) == 0x80) ptr++;
3161    #endif
3162          }          }
3163        if (*ptr != 0) continue;        if (*ptr != 0) continue;
3164    
# Line 2938  for (;; ptr++) Line 3192  for (;; ptr++)
3192          *errorcodeptr = ERR20;          *errorcodeptr = ERR20;
3193          goto FAILED;          goto FAILED;
3194          }          }
3195        *lengthptr += code - last_code;   /* To include callout length */        *lengthptr += (int)(code - last_code);   /* To include callout length */
3196        DPRINTF((">> end branch\n"));        DPRINTF((">> end branch\n"));
3197        }        }
3198      return TRUE;      return TRUE;
# Line 3143  for (;; ptr++) Line 3397  for (;; ptr++)
3397            ptr++;            ptr++;
3398            }            }
3399    
3400          posix_class = check_posix_name(ptr, tempptr - ptr);          posix_class = check_posix_name(ptr, (int)(tempptr - ptr));
3401          if (posix_class < 0)          if (posix_class < 0)
3402            {            {
3403            *errorcodeptr = ERR30;            *errorcodeptr = ERR30;
# Line 3157  for (;; ptr++) Line 3411  for (;; ptr++)
3411          if ((options & PCRE_CASELESS) != 0 && posix_class <= 2)          if ((options & PCRE_CASELESS) != 0 && posix_class <= 2)
3412            posix_class = 0;            posix_class = 0;
3413    
3414          /* We build the bit map for the POSIX class in a chunk of local store          /* When PCRE_UCP is set, some of the POSIX classes are converted to
3415          because we may be adding and subtracting from it, and we don't want to          different escape sequences that use Unicode properties. */
3416          subtract bits that may be in the main map already. At the end we or the  
3417          result into the bit map that is being built. */  #ifdef SUPPORT_UCP
3418            if ((options & PCRE_UCP) != 0)
3419              {
3420              int pc = posix_class + ((local_negate)? POSIX_SUBSIZE/2 : 0);
3421              if (posix_substitutes[pc] != NULL)
3422                {
3423                nestptr = tempptr + 1;
3424                ptr = posix_substitutes[pc] - 1;
3425                continue;
3426                }
3427              }
3428    #endif
3429            /* In the non-UCP case, we build the bit map for the POSIX class in a
3430            chunk of local store because we may be adding and subtracting from it,
3431            and we don't want to subtract bits that may be in the main map already.
3432            At the end we or the result into the bit map that is being built. */
3433    
3434          posix_class *= 3;          posix_class *= 3;
3435    
# Line 3232  for (;; ptr++) Line 3501  for (;; ptr++)
3501            register const uschar *cbits = cd->cbits;            register const uschar *cbits = cd->cbits;
3502            class_charcount += 2;     /* Greater than 1 is what matters */            class_charcount += 2;     /* Greater than 1 is what matters */
3503    
3504            /* Save time by not doing this in the pre-compile phase. */            switch (-c)
   
           if (lengthptr == NULL) switch (-c)  
3505              {              {
3506    #ifdef SUPPORT_UCP
3507                case ESC_du:     /* These are the values given for \d etc */
3508                case ESC_DU:     /* when PCRE_UCP is set. We replace the */
3509                case ESC_wu:     /* escape sequence with an appropriate \p */
3510                case ESC_WU:     /* or \P to test Unicode properties instead */
3511                case ESC_su:     /* of the default ASCII testing. */
3512                case ESC_SU:
3513                nestptr = ptr;
3514                ptr = substitutes[-c - ESC_DU] - 1;  /* Just before substitute */
3515                class_charcount -= 2;                /* Undo! */
3516                continue;
3517    #endif
3518              case ESC_d:              case ESC_d:
3519              for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_digit];              for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_digit];
3520              continue;              continue;
# Line 3254  for (;; ptr++) Line 3533  for (;; ptr++)
3533              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word];              for (c = 0; c < 32; c++) classbits[c] |= ~cbits[c+cbit_word];
3534              continue;              continue;
3535    
3536                /* Perl 5.004 onwards omits VT from \s, but we must preserve it
3537                if it was previously set by something earlier in the character
3538                class. */
3539    
3540              case ESC_s:              case ESC_s:
3541              for (c = 0; c < 32; c++) classbits[c] |= cbits[c+cbit_space];              classbits[0] |= cbits[cbit_space];
3542              classbits[1] &= ~0x08;   /* Perl 5.004 onwards omits VT from \s */              classbits[1] |= cbits[cbit_space+1] & ~0x08;
3543                for (c = 2; c < 32; c++) classbits[c] |= cbits[c+cbit_space];
3544              continue;              continue;
3545    
3546              case ESC_S:              case ESC_S:
# Line 3265  for (;; ptr++) Line 3549  for (;; ptr++)
3549              classbits[1] |= 0x08;    /* Perl 5.004 onwards omits VT from \s */              classbits[1] |= 0x08;    /* Perl 5.004 onwards omits VT from \s */
3550              continue;              continue;
3551    
3552              default:    /* Not recognized; fall through */              case ESC_h:
             break;      /* Need "default" setting to stop compiler warning. */  
             }  
   
           /* In the pre-compile phase, just do the recognition. */  
   
           else if (c == -ESC_d || c == -ESC_D || c == -ESC_w ||  
                    c == -ESC_W || c == -ESC_s || c == -ESC_S) continue;  
   
           /* We need to deal with \H, \h, \V, and \v in both phases because  
           they use extra memory. */  
   
           if (-c == ESC_h)  
             {  
3553              SETBIT(classbits, 0x09); /* VT */              SETBIT(classbits, 0x09); /* VT */
3554              SETBIT(classbits, 0x20); /* SPACE */              SETBIT(classbits, 0x20); /* SPACE */
3555              SETBIT(classbits, 0xa0); /* NSBP */              SETBIT(classbits, 0xa0); /* NSBP */
# Line 3302  for (;; ptr++) Line 3573  for (;; ptr++)
3573                }                }
3574  #endif  #endif
3575              continue;              continue;
             }  
3576    
3577            if (-c == ESC_H)              case ESC_H:
             {  
3578              for (c = 0; c < 32; c++)              for (c = 0; c < 32; c++)
3579                {                {
3580                int x = 0xff;                int x = 0xff;
# Line 3347  for (;; ptr++) Line 3616  for (;; ptr++)
3616                }                }
3617  #endif  #endif
3618              continue;              continue;
             }  
3619    
3620            if (-c == ESC_v)              case ESC_v:
             {  
3621              SETBIT(classbits, 0x0a); /* LF */              SETBIT(classbits, 0x0a); /* LF */
3622              SETBIT(classbits, 0x0b); /* VT */              SETBIT(classbits, 0x0b); /* VT */
3623              SETBIT(classbits, 0x0c); /* FF */              SETBIT(classbits, 0x0c); /* FF */
# Line 3366  for (;; ptr++) Line 3633  for (;; ptr++)
3633                }                }
3634  #endif  #endif
3635              continue;              continue;
             }  
3636    
3637            if (-c == ESC_V)              case ESC_V:
             {  
3638              for (c = 0; c < 32; c++)              for (c = 0; c < 32; c++)
3639                {                {
3640                int x = 0xff;                int x = 0xff;
# Line 3399  for (;; ptr++) Line 3664  for (;; ptr++)
3664                }                }
3665  #endif  #endif
3666              continue;              continue;
             }  
   
           /* We need to deal with \P and \p in both phases. */  
3667    
3668  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
3669            if (-c == ESC_p || -c == ESC_P)              case ESC_p:
3670              {              case ESC_P:
3671              BOOL negated;                {
3672              int pdata;                BOOL negated;
3673              int ptype = get_ucp(&ptr, &negated, &pdata, errorcodeptr);                int pdata;
3674              if (ptype < 0) goto FAILED;                int ptype = get_ucp(&ptr, &negated, &pdata, errorcodeptr);
3675              class_utf8 = TRUE;                if (ptype < 0) goto FAILED;
3676              *class_utf8data++ = ((-c == ESC_p) != negated)?                class_utf8 = TRUE;
3677                XCL_PROP : XCL_NOTPROP;                *class_utf8data++ = ((-c == ESC_p) != negated)?
3678              *class_utf8data++ = ptype;                  XCL_PROP : XCL_NOTPROP;
3679              *class_utf8data++ = pdata;                *class_utf8data++ = ptype;
3680              class_charcount -= 2;   /* Not a < 256 character */                *class_utf8data++ = pdata;
3681              continue;                class_charcount -= 2;   /* Not a < 256 character */
3682              }                continue;
3683                  }
3684  #endif  #endif
3685            /* Unrecognized escapes are faulted if PCRE is running in its              /* Unrecognized escapes are faulted if PCRE is running in its
3686            strict mode. By default, for compatibility with Perl, they are              strict mode. By default, for compatibility with Perl, they are
3687            treated as literals. */              treated as literals. */
3688    
3689            if ((options & PCRE_EXTRA) != 0)              default:
3690              {              if ((options & PCRE_EXTRA) != 0)
3691              *errorcodeptr = ERR7;                {
3692              goto FAILED;                *errorcodeptr = ERR7;
3693                  goto FAILED;
3694                  }
3695                class_charcount -= 2;  /* Undo the default count from above */
3696                c = *ptr;              /* Get the final character and fall through */
3697                break;
3698              }              }
   
           class_charcount -= 2;  /* Undo the default count from above */  
           c = *ptr;              /* Get the final character and fall through */  
3699            }            }
3700    
3701          /* Fall through if we have a single character (c >= 0). This may be          /* Fall through if we have a single character (c >= 0). This may be
# Line 3500  for (;; ptr++) Line 3765  for (;; ptr++)
3765            d = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);            d = check_escape(&ptr, errorcodeptr, cd->bracount, options, TRUE);
3766            if (*errorcodeptr != 0) goto FAILED;            if (*errorcodeptr != 0) goto FAILED;
3767    
3768            /* \b is backspace; \X is literal X; \R is literal R; any other            /* \b is backspace; any other special means the '-' was literal */
           special means the '-' was literal */  
3769    
3770            if (d < 0)            if (d < 0)
3771              {              {
3772              if (d == -ESC_b) d = CHAR_BS;              if (d == -ESC_b) d = CHAR_BS; else
             else if (d == -ESC_X) d = CHAR_X;  
             else if (d == -ESC_R) d = CHAR_R; else  
3773                {                {
3774                ptr = oldptr;                ptr = oldptr;
3775                goto LONE_SINGLE_CHARACTER;  /* A few lines below */                goto LONE_SINGLE_CHARACTER;  /* A few lines below */
# Line 3673  for (;; ptr++) Line 3935  for (;; ptr++)
3935          }          }
3936        }        }
3937    
3938      /* Loop until ']' reached. This "while" is the end of the "do" above. */      /* Loop until ']' reached. This "while" is the end of the "do" far above.
3939        If we are at the end of an internal nested string, revert to the outer
3940        string. */
3941    
3942        while (((c = *(++ptr)) != 0 ||
3943               (nestptr != NULL &&
3944                 (ptr = nestptr, nestptr = NULL, c = *(++ptr)) != 0)) &&
3945               (c != CHAR_RIGHT_SQUARE_BRACKET || inescq));
3946    
3947      while ((c = *(++ptr)) != 0 && (c != CHAR_RIGHT_SQUARE_BRACKET || inescq));      /* Check for missing terminating ']' */
3948    
3949      if (c == 0)                          /* Missing terminating ']' */      if (c == 0)
3950        {        {
3951        *errorcodeptr = ERR6;        *errorcodeptr = ERR6;
3952        goto FAILED;        goto FAILED;
3953        }        }
3954    
   
 /* This code has been disabled because it would mean that \s counts as  
 an explicit \r or \n reference, and that's not really what is wanted. Now  
 we set the flag only if there is a literal "\r" or "\n" in the class. */  
   
 #if 0  
     /* Remember whether \r or \n are in this class */  
   
     if (negate_class)  
       {  
       if ((classbits[1] & 0x24) != 0x24) cd->external_flags |= PCRE_HASCRORLF;  
       }  
     else  
       {  
       if ((classbits[1] & 0x24) != 0) cd->external_flags |= PCRE_HASCRORLF;  
       }  
 #endif  
   
   
3955      /* If class_charcount is 1, we saw precisely one character whose value is      /* If class_charcount is 1, we saw precisely one character whose value is
3956      less than 256. As long as there were no characters >= 128 and there was no      less than 256. As long as there were no characters >= 128 and there was no
3957      use of \p or \P, in other words, no use of any XCLASS features, we can      use of \p or \P, in other words, no use of any XCLASS features, we can
# Line 3765  we set the flag only if there is a liter Line 4015  we set the flag only if there is a liter
4015    
4016      /* If there are characters with values > 255, we have to compile an      /* If there are characters with values > 255, we have to compile an
4017      extended class, with its own opcode, unless there was a negated special      extended class, with its own opcode, unless there was a negated special
4018      such as \S in the class, because in that case all characters > 255 are in      such as \S in the class, and PCRE_UCP is not set, because in that case all
4019      the class, so any that were explicitly given as well can be ignored. If      characters > 255 are in the class, so any that were explicitly given as
4020      (when there are explicit characters > 255 that must be listed) there are no      well can be ignored. If (when there are explicit characters > 255 that must
4021      characters < 256, we can omit the bitmap in the actual compiled code. */      be listed) there are no characters < 256, we can omit the bitmap in the
4022        actual compiled code. */
4023    
4024  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
4025      if (class_utf8 && !should_flip_negation)      if (class_utf8 && (!should_flip_negation || (options & PCRE_UCP) != 0))
4026        {        {
4027        *class_utf8data++ = XCL_END;    /* Marks the end of extra data */        *class_utf8data++ = XCL_END;    /* Marks the end of extra data */
4028        *code++ = OP_XCLASS;        *code++ = OP_XCLASS;
# Line 3797  we set the flag only if there is a liter Line 4048  we set the flag only if there is a liter
4048        }        }
4049  #endif  #endif
4050    
4051      /* If there are no characters > 255, set the opcode to OP_CLASS or      /* If there are no characters > 255, or they are all to be included or
4052      OP_NCLASS, depending on whether the whole class was negated and whether      excluded, set the opcode to OP_CLASS or OP_NCLASS, depending on whether the
4053      there were negative specials such as \S in the class. Then copy the 32-byte      whole class was negated and whether there were negative specials such as \S
4054      map into the code vector, negating it if necessary. */      (non-UCP) in the class. Then copy the 32-byte map into the code vector,
4055        negating it if necessary. */
4056    
4057      *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS;      *code++ = (negate_class == should_flip_negation) ? OP_CLASS : OP_NCLASS;
4058      if (negate_class)      if (negate_class)
# Line 3924  we set the flag only if there is a liter Line 4176  we set the flag only if there is a liter
4176    
4177        if (!possessive_quantifier &&        if (!possessive_quantifier &&
4178            repeat_max < 0 &&            repeat_max < 0 &&
4179            check_auto_possessive(*previous, c, utf8, utf8_char, ptr + 1,            check_auto_possessive(previous, utf8, ptr + 1, options, cd))
             options, cd))  
4180          {          {
4181          repeat_type = 0;    /* Force greedy */          repeat_type = 0;    /* Force greedy */
4182          possessive_quantifier = TRUE;          possessive_quantifier = TRUE;
# Line 3946  we set the flag only if there is a liter Line 4197  we set the flag only if there is a liter
4197        c = previous[1];        c = previous[1];
4198        if (!possessive_quantifier &&        if (!possessive_quantifier &&
4199            repeat_max < 0 &&            repeat_max < 0 &&
4200            check_auto_possessive(OP_NOT, c, utf8, NULL, ptr + 1, options, cd))            check_auto_possessive(previous, utf8, ptr + 1, options, cd))
4201          {          {
4202          repeat_type = 0;    /* Force greedy */          repeat_type = 0;    /* Force greedy */
4203          possessive_quantifier = TRUE;          possessive_quantifier = TRUE;
# Line 3970  we set the flag only if there is a liter Line 4221  we set the flag only if there is a liter
4221    
4222        if (!possessive_quantifier &&        if (!possessive_quantifier &&
4223            repeat_max < 0 &&            repeat_max < 0 &&
4224            check_auto_possessive(c, 0, utf8, NULL, ptr + 1, options, cd))            check_auto_possessive(previous, utf8, ptr + 1, options, cd))
4225          {          {
4226          repeat_type = 0;    /* Force greedy */          repeat_type = 0;    /* Force greedy */
4227          possessive_quantifier = TRUE;          possessive_quantifier = TRUE;
# Line 4180  we set the flag only if there is a liter Line 4431  we set the flag only if there is a liter
4431        {        {
4432        register int i;        register int i;
4433        int ketoffset = 0;        int ketoffset = 0;
4434        int len = code - previous;        int len = (int)(code - previous);
4435        uschar *bralink = NULL;        uschar *bralink = NULL;
4436    
4437        /* Repeating a DEFINE group is pointless */        /* Repeating a DEFINE group is pointless */
# Line 4201  we set the flag only if there is a liter Line 4452  we set the flag only if there is a liter
4452          {          {
4453          register uschar *ket = previous;          register uschar *ket = previous;
4454          do ket += GET(ket, 1); while (*ket != OP_KET);          do ket += GET(ket, 1); while (*ket != OP_KET);
4455          ketoffset = code - ket;          ketoffset = (int)(code - ket);
4456          }          }
4457    
4458        /* The case of a zero minimum is special because of the need to stick        /* The case of a zero minimum is special because of the need to stick
# Line 4269  we set the flag only if there is a liter Line 4520  we set the flag only if there is a liter
4520            /* We chain together the bracket offset fields that have to be            /* We chain together the bracket offset fields that have to be
4521            filled in later when the ends of the brackets are reached. */            filled in later when the ends of the brackets are reached. */
4522    
4523            offset = (bralink == NULL)? 0 : previous - bralink;            offset = (bralink == NULL)? 0 : (int)(previous - bralink);
4524            bralink = previous;            bralink = previous;
4525            PUTINC(previous, 0, offset);            PUTINC(previous, 0, offset);
4526            }            }
# Line 4378  we set the flag only if there is a liter Line 4629  we set the flag only if there is a liter
4629              {              {
4630              int offset;              int offset;
4631              *code++ = OP_BRA;              *code++ = OP_BRA;
4632              offset = (bralink == NULL)? 0 : code - bralink;              offset = (bralink == NULL)? 0 : (int)(code - bralink);
4633              bralink = code;              bralink = code;
4634              PUTINC(code, 0, offset);              PUTINC(code, 0, offset);
4635              }              }
# Line 4399  we set the flag only if there is a liter Line 4650  we set the flag only if there is a liter
4650          while (bralink != NULL)          while (bralink != NULL)
4651            {            {
4652            int oldlinkoffset;            int oldlinkoffset;
4653            int offset = code - bralink + 1;            int offset = (int)(code - bralink + 1);
4654            uschar *bra = code - offset;            uschar *bra = code - offset;
4655            oldlinkoffset = GET(bra, 1);            oldlinkoffset = GET(bra, 1);
4656            bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;            bralink = (oldlinkoffset == 0)? NULL : bralink - oldlinkoffset;
# Line 4487  we set the flag only if there is a liter Line 4738  we set the flag only if there is a liter
4738  #endif  #endif
4739          }          }
4740    
4741        len = code - tempcode;        len = (int)(code - tempcode);
4742        if (len > 0) switch (*tempcode)        if (len > 0) switch (*tempcode)
4743          {          {
4744          case OP_STAR:  *tempcode = OP_POSSTAR; break;          case OP_STAR:  *tempcode = OP_POSSTAR; break;
# Line 4556  we set the flag only if there is a liter Line 4807  we set the flag only if there is a liter
4807        const uschar *arg = NULL;        const uschar *arg = NULL;
4808        previous = NULL;        previous = NULL;
4809        while ((cd->ctypes[*++ptr] & ctype_letter) != 0) {};        while ((cd->ctypes[*++ptr] & ctype_letter) != 0) {};
4810        namelen = ptr - name;        namelen = (int)(ptr - name);
4811    
4812        if (*ptr == CHAR_COLON)        if (*ptr == CHAR_COLON)
4813          {          {
4814          arg = ++ptr;          arg = ++ptr;
4815          while ((cd->ctypes[*ptr] & (ctype_letter|ctype_digit)) != 0          while ((cd->ctypes[*ptr] & (ctype_letter|ctype_digit)) != 0
4816            || *ptr == '_') ptr++;            || *ptr == '_') ptr++;
4817          arglen = ptr - arg;          arglen = (int)(ptr - arg);
4818          }          }
4819    
4820        if (*ptr != CHAR_RIGHT_PARENTHESIS)        if (*ptr != CHAR_RIGHT_PARENTHESIS)
# Line 4601  we set the flag only if there is a liter Line 4852  we set the flag only if there is a liter
4852                *errorcodeptr = ERR66;                *errorcodeptr = ERR66;
4853                goto FAILED;                goto FAILED;
4854                }                }
4855              *code++ = verbs[i].op;              *code = verbs[i].op;
4856                if (*code++ == OP_THEN)
4857                  {
4858                  PUT(code, 0, code - bcptr->current_branch - 1);
4859                  code += LINK_SIZE;
4860                  }
4861              }              }
4862    
4863            else            else
# Line 4611  we set the flag only if there is a liter Line 4867  we set the flag only if there is a liter
4867                *errorcodeptr = ERR59;                *errorcodeptr = ERR59;
4868                goto FAILED;                goto FAILED;
4869                }                }
4870              *code++ = verbs[i].op_arg;              *code = verbs[i].op_arg;
4871                if (*code++ == OP_THEN_ARG)
4872                  {
4873                  PUT(code, 0, code - bcptr->current_branch - 1);
4874                  code += LINK_SIZE;
4875                  }
4876              *code++ = arglen;              *code++ = arglen;
4877              memcpy(code, arg, arglen);              memcpy(code, arg, arglen);
4878              code += arglen;              code += arglen;
# Line 4744  we set the flag only if there is a liter Line 5005  we set the flag only if there is a liter
5005                recno * 10 + *ptr - CHAR_0 : -1;                recno * 10 + *ptr - CHAR_0 : -1;
5006            ptr++;            ptr++;
5007            }            }
5008          namelen = ptr - name;          namelen = (int)(ptr - name);
5009    
5010          if ((terminator > 0 && *ptr++ != terminator) ||          if ((terminator > 0 && *ptr++ != terminator) ||
5011              *ptr++ != CHAR_RIGHT_PARENTHESIS)              *ptr++ != CHAR_RIGHT_PARENTHESIS)
# Line 4805  we set the flag only if there is a liter Line 5066  we set the flag only if there is a liter
5066          /* Search the pattern for a forward reference */          /* Search the pattern for a forward reference */
5067    
5068          else if ((i = find_parens(cd, name, namelen,          else if ((i = find_parens(cd, name, namelen,
5069                          (options & PCRE_EXTENDED) != 0)) > 0)                          (options & PCRE_EXTENDED) != 0, utf8)) > 0)
5070            {            {
5071            PUT2(code, 2+LINK_SIZE, i);            PUT2(code, 2+LINK_SIZE, i);
5072            code[1+LINK_SIZE]++;            code[1+LINK_SIZE]++;
# Line 4940  we set the flag only if there is a liter Line 5201  we set the flag only if there is a liter
5201              goto FAILED;              goto FAILED;
5202              }              }
5203            *code++ = n;            *code++ = n;
5204            PUT(code, 0, ptr - cd->start_pattern + 1);  /* Pattern offset */            PUT(code, 0, (int)(ptr - cd->start_pattern + 1)); /* Pattern offset */
5205            PUT(code, LINK_SIZE, 0);                    /* Default length */            PUT(code, LINK_SIZE, 0);                          /* Default length */
5206            code += 2 * LINK_SIZE;            code += 2 * LINK_SIZE;
5207            }            }
5208          previous = NULL;          previous = NULL;
# Line 4974  we set the flag only if there is a liter Line 5235  we set the flag only if there is a liter
5235            name = ++ptr;            name = ++ptr;
5236    
5237            while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++;            while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
5238            namelen = ptr - name;            namelen = (int)(ptr - name);
5239    
5240            /* In the pre-compile phase, just do a syntax check. */            /* In the pre-compile phase, just do a syntax check. */
5241    
# Line 5104  we set the flag only if there is a liter Line 5365  we set the flag only if there is a liter
5365          NAMED_REF_OR_RECURSE:          NAMED_REF_OR_RECURSE:
5366          name = ++ptr;          name = ++ptr;
5367          while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++;          while ((cd->ctypes[*ptr] & ctype_word) != 0) ptr++;
5368          namelen = ptr - name;          namelen = (int)(ptr - name);
5369    
5370          /* In the pre-compile phase, do a syntax check and set a dummy          /* In the pre-compile phase, do a syntax check and set a dummy
5371          reference number. */          reference number. */
# Line 5151  we set the flag only if there is a liter Line 5412  we set the flag only if there is a liter
5412              }              }
5413            else if ((recno =                /* Forward back reference */            else if ((recno =                /* Forward back reference */
5414                      find_parens(cd, name, namelen,                      find_parens(cd, name, namelen,
5415                        (options & PCRE_EXTENDED) != 0)) <= 0)                        (options & PCRE_EXTENDED) != 0, utf8)) <= 0)
5416              {              {
5417              *errorcodeptr = ERR15;              *errorcodeptr = ERR15;
5418              goto FAILED;              goto FAILED;
# Line 5262  we set the flag only if there is a liter Line 5523  we set the flag only if there is a liter
5523              if (called == NULL)              if (called == NULL)
5524                {                {
5525                if (find_parens(cd, NULL, recno,                if (find_parens(cd, NULL, recno,
5526                      (options & PCRE_EXTENDED) != 0) < 0)                      (options & PCRE_EXTENDED) != 0, utf8) < 0)
5527                  {                  {
5528                  *errorcodeptr = ERR15;                  *errorcodeptr = ERR15;
5529                  goto FAILED;                  goto FAILED;
# Line 5273  we set the flag only if there is a liter Line 5534  we set the flag only if there is a liter
5534                of the group. */                of the group. */
5535    
5536                called = cd->start_code + recno;                called = cd->start_code + recno;
5537                PUTINC(cd->hwm, 0, code + 2 + LINK_SIZE - cd->start_code);                PUTINC(cd->hwm, 0, (int)(code + 2 + LINK_SIZE - cd->start_code));
5538                }                }
5539    
5540              /* If not a forward reference, and the subpattern is still open,              /* If not a forward reference, and the subpattern is still open,
# Line 5297  we set the flag only if there is a liter Line 5558  we set the flag only if there is a liter
5558            code += 1 + LINK_SIZE;            code += 1 + LINK_SIZE;
5559    
5560            *code = OP_RECURSE;            *code = OP_RECURSE;
5561            PUT(code, 1, called - cd->start_code);            PUT(code, 1, (int)(called - cd->start_code));
5562            code += 1 + LINK_SIZE;            code += 1 + LINK_SIZE;
5563    
5564            *code = OP_KET;            *code = OP_KET;
# Line 5601  we set the flag only if there is a liter Line 5862  we set the flag only if there is a liter
5862    
5863      /* ===================================================================*/      /* ===================================================================*/
5864      /* Handle metasequences introduced by \. For ones like \d, the ESC_ values      /* Handle metasequences introduced by \. For ones like \d, the ESC_ values
5865      are arranged to be the negation of the corresponding OP_values. For the      are arranged to be the negation of the corresponding OP_values in the
5866      back references, the values are ESC_REF plus the reference number. Only      default case when PCRE_UCP is not set. For the back references, the values
5867      back references and those types that consume a character may be repeated.      are ESC_REF plus the reference number. Only back references and those types
5868      We can test for values between ESC_b and ESC_Z for the latter; this may      that consume a character may be repeated. We can test for values between
5869      have to change if any new ones are ever created. */      ESC_b and ESC_Z for the latter; this may have to change if any new ones are
5870        ever created. */
5871    
5872      case CHAR_BACKSLASH:      case CHAR_BACKSLASH:
5873      tempptr = ptr;      tempptr = ptr;
# Line 5765  we set the flag only if there is a liter Line 6027  we set the flag only if there is a liter
6027  #endif  #endif
6028    
6029        /* For the rest (including \X when Unicode properties are supported), we        /* For the rest (including \X when Unicode properties are supported), we
6030        can obtain the OP value by negating the escape value. */        can obtain the OP value by negating the escape value in the default
6031          situation when PCRE_UCP is not set. When it *is* set, we substitute
6032          Unicode property tests. */
6033    
6034        else        else
6035          {          {
6036          previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;  #ifdef SUPPORT_UCP
6037          *code++ = -c;          if (-c >= ESC_DU && -c <= ESC_wu)
6038              {
6039              nestptr = ptr + 1;                   /* Where to resume */
6040              ptr = substitutes[-c - ESC_DU] - 1;  /* Just before substitute */
6041              }
6042            else
6043    #endif
6044              {
6045              previous = (-c > ESC_b && -c < ESC_Z)? code : NULL;
6046              *code++ = -c;
6047              }
6048          }          }
6049        continue;        continue;
6050        }        }
# Line 6102  for (;;) Line 6376  for (;;)
6376      {      {
6377      if (lengthptr == NULL)      if (lengthptr == NULL)
6378        {        {
6379        int branch_length = code - last_branch;        int branch_length = (int)(code - last_branch);
6380        do        do
6381          {          {
6382          int prev_length = GET(last_branch, 1);          int prev_length = GET(last_branch, 1);
# Line 6116  for (;;) Line 6390  for (;;)
6390      /* Fill in the ket */      /* Fill in the ket */
6391    
6392      *code = OP_KET;      *code = OP_KET;
6393      PUT(code, 1, code - start_bracket);      PUT(code, 1, (int)(code - start_bracket));
6394      code += 1 + LINK_SIZE;      code += 1 + LINK_SIZE;
6395    
6396      /* If it was a capturing subpattern, check to see if it contained any      /* If it was a capturing subpattern, check to see if it contained any
# Line 6131  for (;;) Line 6405  for (;;)
6405            code - start_bracket);            code - start_bracket);
6406          *start_bracket = OP_ONCE;          *start_bracket = OP_ONCE;
6407          code += 1 + LINK_SIZE;          code += 1 + LINK_SIZE;
6408          PUT(start_bracket, 1, code - start_bracket);          PUT(start_bracket, 1, (int)(code - start_bracket));
6409          *code = OP_KET;          *code = OP_KET;
6410          PUT(code, 1, code - start_bracket);          PUT(code, 1, (int)(code - start_bracket));
6411          code += 1 + LINK_SIZE;          code += 1 + LINK_SIZE;
6412          length += 2 + 2*LINK_SIZE;          length += 2 + 2*LINK_SIZE;
6413          }          }
# Line 6188  for (;;) Line 6462  for (;;)
6462    else    else
6463      {      {
6464      *code = OP_ALT;      *code = OP_ALT;
6465      PUT(code, 1, code - last_branch);      PUT(code, 1, (int)(code - last_branch));
6466      bc.current_branch = last_branch = code;      bc.current_branch = last_branch = code;
6467      code += 1 + LINK_SIZE;      code += 1 + LINK_SIZE;
6468      }      }
# Line 6507  int length = 1;  /* For final END opcode Line 6781  int length = 1;  /* For final END opcode
6781  int firstbyte, reqbyte, newline;  int firstbyte, reqbyte, newline;
6782  int errorcode = 0;  int errorcode = 0;
6783  int skipatstart = 0;  int skipatstart = 0;
6784  BOOL utf8 = (options & PCRE_UTF8) != 0;  BOOL utf8;
6785  size_t size;  size_t size;
6786  uschar *code;  uschar *code;
6787  const uschar *codestart;  const uschar *codestart;
# Line 6577  while (ptr[skipatstart] == CHAR_LEFT_PAR Line 6851  while (ptr[skipatstart] == CHAR_LEFT_PAR
6851    
6852    if (strncmp((char *)(ptr+skipatstart+2), STRING_UTF8_RIGHTPAR, 5) == 0)    if (strncmp((char *)(ptr+skipatstart+2), STRING_UTF8_RIGHTPAR, 5) == 0)
6853      { skipatstart += 7; options |= PCRE_UTF8; continue; }      { skipatstart += 7; options |= PCRE_UTF8; continue; }
6854      else if (strncmp((char *)(ptr+skipatstart+2), STRING_UCP_RIGHTPAR, 4) == 0)
6855        { skipatstart += 6; options |= PCRE_UCP; continue; }
6856    
6857    if (strncmp((char *)(ptr+skipatstart+2), STRING_CR_RIGHTPAR, 3) == 0)    if (strncmp((char *)(ptr+skipatstart+2), STRING_CR_RIGHTPAR, 3) == 0)
6858      { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }      { skipatstart += 5; newnl = PCRE_NEWLINE_CR; }
# Line 6601  while (ptr[skipatstart] == CHAR_LEFT_PAR Line 6877  while (ptr[skipatstart] == CHAR_LEFT_PAR
6877    else break;    else break;
6878    }    }
6879    
6880    utf8 = (options & PCRE_UTF8) != 0;
6881    
6882  /* Can't support UTF8 unless PCRE has been compiled to include the code. */  /* Can't support UTF8 unless PCRE has been compiled to include the code. */
6883    
6884  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 6618  if (utf8) Line 6896  if (utf8)
6896    }    }
6897  #endif  #endif
6898    
6899    /* Can't support UCP unless PCRE has been compiled to include the code. */
6900    
6901    #ifndef SUPPORT_UCP
6902    if ((options & PCRE_UCP) != 0)
6903      {
6904      errorcode = ERR67;
6905      goto PCRE_EARLY_ERROR_RETURN;
6906      }
6907    #endif
6908    
6909  /* Check validity of \R options. */  /* Check validity of \R options. */
6910    
6911  switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))  switch (options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE))
# Line 6746  regex compiled on a system with 4-byte p Line 7034  regex compiled on a system with 4-byte p
7034  pointers. */  pointers. */
7035    
7036  re->magic_number = MAGIC_NUMBER;  re->magic_number = MAGIC_NUMBER;
7037  re->size = size;  re->size = (int)size;
7038  re->options = cd->external_options;  re->options = cd->external_options;
7039  re->flags = cd->external_flags;  re->flags = cd->external_flags;
7040  re->dummy1 = 0;  re->dummy1 = 0;
# Line 6817  while (errorcode == 0 && cd->hwm > cwork Line 7105  while (errorcode == 0 && cd->hwm > cwork
7105    recno = GET(codestart, offset);    recno = GET(codestart, offset);
7106    groupptr = _pcre_find_bracket(codestart, utf8, recno);    groupptr = _pcre_find_bracket(codestart, utf8, recno);
7107    if (groupptr == NULL) errorcode = ERR53;    if (groupptr == NULL) errorcode = ERR53;
7108      else PUT(((uschar *)codestart), offset, groupptr - codestart);      else PUT(((uschar *)codestart), offset, (int)(groupptr - codestart));
7109    }    }
7110    
7111  /* Give an error if there's back reference to a non-existent capturing  /* Give an error if there's back reference to a non-existent capturing
# Line 6872  if (errorcode != 0) Line 7160  if (errorcode != 0)
7160    {    {
7161    (pcre_free)(re);    (pcre_free)(re);
7162    PCRE_EARLY_ERROR_RETURN:    PCRE_EARLY_ERROR_RETURN:
7163    *erroroffset = ptr - (const uschar *)pattern;    *erroroffset = (int)(ptr - (const uschar *)pattern);
7164    PCRE_EARLY_ERROR_RETURN2:    PCRE_EARLY_ERROR_RETURN2:
7165    *errorptr = find_error_text(errorcode);    *errorptr = find_error_text(errorcode);
7166    if (errorcodeptr != NULL) *errorcodeptr = errorcode;    if (errorcodeptr != NULL) *errorcodeptr = errorcode;

Legend:
Removed from v.513  
changed lines
  Added in v.556

  ViewVC Help
Powered by ViewVC 1.1.5