/[pcre]/code/trunk/pcre.c
ViewVC logotype

Diff of /code/trunk/pcre.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 9 by nigel, Sat Feb 24 21:38:13 2007 UTC revision 13 by nigel, Sat Feb 24 21:38:21 2007 UTC
# Line 54  the external pcre header. */ Line 54  the external pcre header. */
54  static char rep_min[] = { 0, 0, 1, 1, 0, 0 };  static char rep_min[] = { 0, 0, 1, 1, 0, 0 };
55  static char rep_max[] = { 0, 0, 0, 0, 1, 1 };  static char rep_max[] = { 0, 0, 0, 0, 1, 1 };
56    
57  /* Text forms of OP_ values and things, for debugging */  /* Text forms of OP_ values and things, for debugging (not all used) */
58    
59  #ifdef DEBUG  #ifdef DEBUG
60  static const char *OP_names[] = {  static const char *OP_names[] = {
# Line 65  static const char *OP_names[] = { Line 65  static const char *OP_names[] = {
65    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
66    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
67    "*", "*?", "+", "+?", "?", "??", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{",
68    "class", "Ref",    "class", "negclass", "Ref",
69    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not", "Once",
70    "Brazero", "Braminzero", "Bra"    "Brazero", "Braminzero", "Bra"
71  };  };
# Line 91  static short int escapes[] = { Line 91  static short int escapes[] = {
91    
92  /* Definition to allow mutual recursion */  /* Definition to allow mutual recursion */
93    
94  static BOOL compile_regex(int, int *, uschar **, const uschar **, const char **);  static BOOL
95      compile_regex(int, int *, uschar **, const uschar **, const char **);
96    
97  /* Structure for passing "static" information around between the functions  /* Structure for passing "static" information around between the functions
98  doing the matching, so that they are thread-safe. */  doing the matching, so that they are thread-safe. */
# Line 306  do { Line 307  do {
307        /* Check a class or a back reference for a zero minimum */        /* Check a class or a back reference for a zero minimum */
308    
309        case OP_CLASS:        case OP_CLASS:
310          case OP_NEGCLASS:
311        case OP_REF:        case OP_REF:
312        cc += (*cc == OP_REF)? 2 : 33;        cc += (*cc == OP_REF)? 2 : 33;
313    
# Line 670  for (;; ptr++) Line 672  for (;; ptr++)
672    
673      case '[':      case '[':
674      previous = code;      previous = code;
     *code++ = OP_CLASS;  
675    
676      /* If the first character is '^', set the negation flag */      /* If the first character is '^', set the negation flag, and use a
677        different opcode. This only matters if caseless matching is specified at
678        runtime. */
679    
680      if ((c = *(++ptr)) == '^')      if ((c = *(++ptr)) == '^')
681        {        {
682        negate_class = TRUE;        negate_class = TRUE;
683          *code++ = OP_NEGCLASS;
684        c = *(++ptr);        c = *(++ptr);
685        }        }
686      else negate_class = FALSE;      else
687          {
688          negate_class = FALSE;
689          *code++ = OP_CLASS;
690          }
691    
692      /* Keep a count of chars so that we can optimize the case of just a single      /* Keep a count of chars so that we can optimize the case of just a single
693      character. */      character. */
# Line 1015  for (;; ptr++) Line 1023  for (;; ptr++)
1023      /* If previous was a character class or a back reference, we put the repeat      /* If previous was a character class or a back reference, we put the repeat
1024      stuff after it. */      stuff after it. */
1025    
1026      else if (*previous == OP_CLASS || *previous == OP_REF)      else if (*previous == OP_CLASS || *previous == OP_NEGCLASS ||
1027                 *previous == OP_REF)
1028        {        {
1029        if (repeat_min == 0 && repeat_max == -1)        if (repeat_min == 0 && repeat_max == -1)
1030          *code++ = OP_CRSTAR + repeat_type;          *code++ = OP_CRSTAR + repeat_type;
# Line 2041  while (code < code_end) Line 2050  while (code < code_end)
2050      case OP_MINUPTO:      case OP_MINUPTO:
2051      if (isprint(c = code[3])) printf("    %c{", c);      if (isprint(c = code[3])) printf("    %c{", c);
2052        else printf("    \\x%02x{", c);        else printf("    \\x%02x{", c);
2053      if (*code != OP_EXACT) printf(",");      if (*code != OP_EXACT) printf("0,");
2054      printf("%d}", (code[1] << 8) + code[2]);      printf("%d}", (code[1] << 8) + code[2]);
2055      if (*code == OP_MINUPTO) printf("?");      if (*code == OP_MINUPTO) printf("?");
2056      code += 3;      code += 3;
# Line 2090  while (code < code_end) Line 2099  while (code < code_end)
2099      goto CLASS_REF_REPEAT;      goto CLASS_REF_REPEAT;
2100    
2101      case OP_CLASS:      case OP_CLASS:
2102        case OP_NEGCLASS:
2103        {        {
2104        int i, min, max;        int i, min, max;
2105    
2106        code++;        if (*code++ == OP_CLASS) printf("    [");
2107        printf("    [");          else printf("   ^[");
2108    
2109        for (i = 0; i < 256; i++)        for (i = 0; i < 256; i++)
2110          {          {
# Line 2714  for (;;) Line 2724  for (;;)
2724      item to see if there is repeat information following. Then obey similar      item to see if there is repeat information following. Then obey similar
2725      code to character type repeats - written out again for speed. If caseless      code to character type repeats - written out again for speed. If caseless
2726      matching was set at runtime but not at compile time, we have to check both      matching was set at runtime but not at compile time, we have to check both
2727      versions of a character. */      versions of a character, and we have to behave differently for positive and
2728        negative classes. This is the only time where OP_CLASS and OP_NEGCLASS are
2729        treated differently. */
2730    
2731      case OP_CLASS:      case OP_CLASS:
2732        case OP_NEGCLASS:
2733        {        {
2734          BOOL nasty_case = *ecode == OP_NEGCLASS && md->runtime_caseless;
2735        const uschar *data = ecode + 1;  /* Save for matching */        const uschar *data = ecode + 1;  /* Save for matching */
2736        ecode += 33;                     /* Advance past the item */        ecode += 33;                     /* Advance past the item */
2737    
# Line 2746  for (;;) Line 2760  for (;;)
2760          break;          break;
2761    
2762          default:               /* No repeat follows */          default:               /* No repeat follows */
2763          if (eptr >= md->end_subject) return FALSE;          min = max = 1;
2764          c = *eptr++;          break;
         if ((data[c/8] & (1 << (c&7))) != 0) continue;    /* With main loop */  
         if (md->runtime_caseless)  
           {  
           c = pcre_fcc[c];  
           if ((data[c/8] & (1 << (c&7))) != 0) continue;  /* With main loop */  
           }  
         return FALSE;  
2765          }          }
2766    
2767        /* First, ensure the minimum number of matches are present. */        /* First, ensure the minimum number of matches are present. */
# Line 2763  for (;;) Line 2770  for (;;)
2770          {          {
2771          if (eptr >= md->end_subject) return FALSE;          if (eptr >= md->end_subject) return FALSE;
2772          c = *eptr++;          c = *eptr++;
2773          if ((data[c/8] & (1 << (c&7))) != 0) continue;  
2774          if (md->runtime_caseless)          /* Either not runtime caseless, or it was a positive class. For
2775            runtime caseless, continue if either case is in the map. */
2776    
2777            if (!nasty_case)
2778            {            {
2779              if ((data[c/8] & (1 << (c&7))) != 0) continue;
2780              if (md->runtime_caseless)
2781                {
2782                c = pcre_fcc[c];
2783                if ((data[c/8] & (1 << (c&7))) != 0) continue;
2784                }
2785              }
2786    
2787            /* Runtime caseless and it was a negative class. Continue only if
2788            both cases are in the map. */
2789    
2790            else
2791              {
2792              if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;
2793            c = pcre_fcc[c];            c = pcre_fcc[c];
2794            if ((data[c/8] & (1 << (c&7))) != 0) continue;            if ((data[c/8] & (1 << (c&7))) != 0) continue;
2795            }            }
2796    
2797          return FALSE;          return FALSE;
2798          }          }
2799    
# Line 2787  for (;;) Line 2812  for (;;)
2812            if (match(eptr, ecode, offset_top, md)) return TRUE;            if (match(eptr, ecode, offset_top, md)) return TRUE;
2813            if (i >= max || eptr >= md->end_subject) return FALSE;            if (i >= max || eptr >= md->end_subject) return FALSE;
2814            c = *eptr++;            c = *eptr++;
2815            if ((data[c/8] & (1 << (c&7))) != 0) continue;  
2816            if (md->runtime_caseless)            /* Either not runtime caseless, or it was a positive class. For
2817              runtime caseless, continue if either case is in the map. */
2818    
2819              if (!nasty_case)
2820              {              {
2821                if ((data[c/8] & (1 << (c&7))) != 0) continue;
2822                if (md->runtime_caseless)
2823                  {
2824                  c = pcre_fcc[c];
2825                  if ((data[c/8] & (1 << (c&7))) != 0) continue;
2826                  }
2827                }
2828    
2829              /* Runtime caseless and it was a negative class. Continue only if
2830              both cases are in the map. */
2831    
2832              else
2833                {
2834                if ((data[c/8] & (1 << (c&7))) == 0) return FALSE;
2835              c = pcre_fcc[c];              c = pcre_fcc[c];
2836              if ((data[c/8] & (1 << (c&7))) != 0) continue;              if ((data[c/8] & (1 << (c&7))) != 0) continue;
2837              }              }
2838    
2839            return FALSE;            return FALSE;
2840            }            }
2841          /* Control never gets here */          /* Control never gets here */
# Line 2807  for (;;) Line 2850  for (;;)
2850            {            {
2851            if (eptr >= md->end_subject) break;            if (eptr >= md->end_subject) break;
2852            c = *eptr;            c = *eptr;
2853            if ((data[c/8] & (1 << (c&7))) != 0) continue;  
2854            if (md->runtime_caseless)            /* Either not runtime caseless, or it was a positive class. For
2855              runtime caseless, continue if either case is in the map. */
2856    
2857              if (!nasty_case)
2858              {              {
2859                if ((data[c/8] & (1 << (c&7))) != 0) continue;
2860                if (md->runtime_caseless)
2861                  {
2862                  c = pcre_fcc[c];
2863                  if ((data[c/8] & (1 << (c&7))) != 0) continue;
2864                  }
2865                }
2866    
2867              /* Runtime caseless and it was a negative class. Continue only if
2868              both cases are in the map. */
2869    
2870              else
2871                {
2872                if ((data[c/8] & (1 << (c&7))) == 0) break;
2873              c = pcre_fcc[c];              c = pcre_fcc[c];
2874              if ((data[c/8] & (1 << (c&7))) != 0) continue;              if ((data[c/8] & (1 << (c&7))) != 0) continue;
2875              }              }
2876    
2877            break;            break;
2878            }            }
2879    
# Line 3299  hide it in a separate function. This is Line 3360  hide it in a separate function. This is
3360  since it's needed only for the extension \X option, and with any luck, a good  since it's needed only for the extension \X option, and with any luck, a good
3361  compiler will spot the tail recursion and compile it efficiently.  compiler will spot the tail recursion and compile it efficiently.
3362    
3363  Arguments:    The block containing the match data  Arguments:
3364  Returns:      The return from setjump()     eptr        pointer in subject
3365       ecode       position in code
3366       offset_top  current top pointer
3367       md          pointer to "static" info for the match
3368    
3369    Returns:       TRUE if matched
3370  */  */
3371    
3372  static int  static BOOL
3373  my_setjmp(match_data *match_block)  match_with_setjmp(const uschar *eptr, const uschar *ecode, int offset_top,
3374      match_data *match_block)
3375  {  {
3376  return setjmp(match_block->fail_env);  return setjmp(match_block->fail_env) == 0 &&
3377          match(eptr, ecode, offset_top, match_block);
3378  }  }
3379    
3380    
# Line 3338  int Line 3406  int
3406  pcre_exec(const pcre *external_re, const pcre_extra *external_extra,  pcre_exec(const pcre *external_re, const pcre_extra *external_extra,
3407    const char *subject, int length, int options, int *offsets, int offsetcount)    const char *subject, int length, int options, int *offsets, int offsetcount)
3408  {  {
3409  int resetcount;  int resetcount, ocount;
 int ocount = offsetcount;  
3410  int first_char = -1;  int first_char = -1;
3411  match_data match_block;  match_data match_block;
3412  const uschar *start_bits = NULL;  const uschar *start_bits = NULL;
# Line 3347  const uschar *start_match = (const uscha Line 3414  const uschar *start_match = (const uscha
3414  const uschar *end_subject;  const uschar *end_subject;
3415  const real_pcre *re = (const real_pcre *)external_re;  const real_pcre *re = (const real_pcre *)external_re;
3416  const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;  const real_pcre_extra *extra = (const real_pcre_extra *)external_extra;
3417    BOOL using_temporary_offsets = FALSE;
3418  BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;  BOOL anchored = ((re->options | options) & PCRE_ANCHORED) != 0;
3419  BOOL startline = (re->options & PCRE_STARTLINE) != 0;  BOOL startline = (re->options & PCRE_STARTLINE) != 0;
3420    
# Line 3375  match_block.errorcode = PCRE_ERROR_NOMAT Line 3443  match_block.errorcode = PCRE_ERROR_NOMAT
3443    
3444  /* If the expression has got more back references than the offsets supplied can  /* If the expression has got more back references than the offsets supplied can
3445  hold, we get a temporary bit of working store to use during the matching.  hold, we get a temporary bit of working store to use during the matching.
3446  Otherwise, we can use the vector supplied, rounding down the size of it to a  Otherwise, we can use the vector supplied, rounding down its size to a multiple
3447  multiple of 2. */  of 2. */
3448    
3449  ocount &= (-2);  ocount = offsetcount & (-2);
3450  if (re->top_backref > 0 && re->top_backref + 1 >= ocount/2)  if (re->top_backref > 0 && re->top_backref >= ocount/2)
3451    {    {
3452    ocount = re->top_backref * 2 + 2;    ocount = re->top_backref * 2 + 2;
3453    match_block.offset_vector = (pcre_malloc)(ocount * sizeof(int));    match_block.offset_vector = (pcre_malloc)(ocount * sizeof(int));
3454    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;    if (match_block.offset_vector == NULL) return PCRE_ERROR_NOMEMORY;
3455      using_temporary_offsets = TRUE;
3456    DPRINTF(("Got memory to hold back references\n"));    DPRINTF(("Got memory to hold back references\n"));
3457    }    }
3458  else match_block.offset_vector = offsets;  else match_block.offset_vector = offsets;
# Line 3498  do Line 3567  do
3567    it unless PCRE_EXTRA is set, since only in that case is the "cut" operation    it unless PCRE_EXTRA is set, since only in that case is the "cut" operation
3568    enabled. */    enabled. */
3569    
3570    if (((re->options & PCRE_EXTRA) != 0 && my_setjmp(&match_block) != 0) ||    if ((re->options & PCRE_EXTRA) != 0)
3571        !match(start_match, re->code, 2, &match_block))      {
3572      continue;      if (!match_with_setjmp(start_match, re->code, 2, &match_block))
3573          continue;
3574        }
3575      else if (!match(start_match, re->code, 2, &match_block)) continue;
3576    
3577    /* Copy the offset information from temporary store if necessary */    /* Copy the offset information from temporary store if necessary */
3578    
3579    if (ocount != offsetcount)    if (using_temporary_offsets)
3580      {      {
3581      if (offsetcount >= 4)      if (offsetcount >= 4)
3582        {        {
3583        memcpy(offsets + 2, match_block.offset_vector + 2,        memcpy(offsets + 2, match_block.offset_vector + 2,
3584          (offsetcount - 2) * sizeof(int));          (offsetcount - 2) * sizeof(int));
3585        DPRINTF(("Copied offsets; freeing temporary memory\n"));        DPRINTF(("Copied offsets from temporary memory\n"));
3586        }        }
3587      if (match_block.end_offset_top > offsetcount)      if (match_block.end_offset_top > offsetcount)
3588        match_block.offset_overflow = TRUE;        match_block.offset_overflow = TRUE;
# Line 3534  while (!anchored && Line 3606  while (!anchored &&
3606         match_block.errorcode == PCRE_ERROR_NOMATCH &&         match_block.errorcode == PCRE_ERROR_NOMATCH &&
3607         start_match++ < end_subject);         start_match++ < end_subject);
3608    
3609    if (using_temporary_offsets)
3610      {
3611      DPRINTF(("Freeing temporary memory\n"));
3612      (pcre_free)(match_block.offset_vector);
3613      }
3614    
3615  DPRINTF((">>>> returning %d\n", match_block.errorcode));  DPRINTF((">>>> returning %d\n", match_block.errorcode));
3616    
3617  return match_block.errorcode;  return match_block.errorcode;

Legend:
Removed from v.9  
changed lines
  Added in v.13

  ViewVC Help
Powered by ViewVC 1.1.5