# Diff of /code/trunk/pcre.c

revision 29 by nigel, Sat Feb 24 21:38:53 2007 UTC revision 33 by nigel, Sat Feb 24 21:39:01 2007 UTC
# Line 1091  for (;; ptr++) Line 1091  for (;; ptr++)
1091      else if ((int)*previous >= OP_BRA || (int)*previous == OP_ONCE ||      else if ((int)*previous >= OP_BRA || (int)*previous == OP_ONCE ||
1092               (int)*previous == OP_COND)               (int)*previous == OP_COND)
1093        {        {
1094        int i, ketoffset = 0;        register int i;
1095          int ketoffset = 0;
1096        int len = code - previous;        int len = code - previous;
1098
1099        /* If the maximum repeat count is unlimited, find the end of the bracket        /* If the maximum repeat count is unlimited, find the end of the bracket
1100        by scanning through from the start, and compute the offset back to it        by scanning through from the start, and compute the offset back to it
# Line 1107  for (;; ptr++) Line 1109  for (;; ptr++)
1109          ketoffset = code - ket;          ketoffset = code - ket;
1110          }          }
1111
1112          /* The case of a zero minimum is special because of the need to stick
1113          OP_BRAZERO in front of it, and because the group appears once in the
1114          data, whereas in other cases it appears the minimum number of times. For
1115          this reason, it is simplest to treat this case separately, as otherwise
1116          the code gets far too mess. There are several special subcases when the
1117          minimum is zero. */
1118
1119          if (repeat_min == 0)
1120            {
1121            /* If the maximum is also zero, we just omit the group from the output
1122            altogether. */
1123
1124            if (repeat_max == 0)
1125              {
1126              code = previous;
1127              previous = NULL;
1128              break;
1129              }
1130
1131            /* If the maximum is 1 or unlimited, we just have to stick in the
1132            BRAZERO and do no more at this point. */
1133
1134            if (repeat_max <= 1)
1135              {
1136              memmove(previous+1, previous, len);
1137              code++;
1138              *previous++ = OP_BRAZERO + repeat_type;
1139              }
1140
1141            /* If the maximum is greater than 1 and limited, we have to replicate
1142            in a nested fashion, sticking OP_BRAZERO before each set of brackets.
1143            The first one has to be handled carefully because it's the original
1144            copy, which has to be moved up. The remainder can be handled by code
1145            that is common with the non-zero minimum case below. We just have to
1146            adjust the value or repeat_max, since one less copy is required. */
1147
1148            else
1149              {
1150              int offset;
1151              memmove(previous+4, previous, len);
1152              code += 4;
1153              *previous++ = OP_BRAZERO + repeat_type;
1154              *previous++ = OP_BRA;
1155
1156              /* We chain together the bracket offset fields that have to be
1157              filled in later when the ends of the brackets are reached. */
1158
1161              *previous++ = offset >> 8;
1162              *previous++ = offset & 255;
1163              }
1164
1165            repeat_max--;
1166            }
1167
1168          /* If the minimum is greater than zero, replicate the group as many
1169          times as necessary, and adjust the maximum to the number of subsequent
1170          copies that we need. */
1171
1172          else
1173            {
1174            for (i = 1; i < repeat_min; i++)
1175              {
1176              memcpy(code, previous, len);
1177              code += len;
1178              }
1179            if (repeat_max > 0) repeat_max -= repeat_min;
1180            }
1181
1182          /* This code is common to both the zero and non-zero minimum cases. If
1183          the maximum is limited, it replicates the group in a nested fashion,
1184          remembering the bracket starts on a stack. In the case of a zero minimum,
1185          the first one was set up above. In all cases the repeat_max now specifies
1186          the number of additional copies needed. */
1187
1188          if (repeat_max >= 0)
1189            {
1190            for (i = repeat_max - 1; i >= 0; i--)
1191              {
1192              *code++ = OP_BRAZERO + repeat_type;
1193
1194              /* All but the final copy start a new nesting, maintaining the
1195              chain of brackets outstanding. */
1196
1197              if (i != 0)
1198                {
1199                int offset;
1200                *code++ = OP_BRA;
1203                *code++ = offset >> 8;
1204                *code++ = offset & 255;
1205                }
1206
1207              memcpy(code, previous, len);
1208              code += len;
1209              }
1210
1211            /* Now chain through the pending brackets, and fill in their length
1212            fields (which are holding the chain links pro tem). */
1213
1215              {
1217              int offset = code - bralink + 1;
1218              uschar *bra = code - offset;
1219              oldlinkoffset = (bra[1] << 8) + bra[2];
1221              *code++ = OP_KET;
1222              *code++ = bra[1] = offset >> 8;
1223              *code++ = bra[2] = (offset & 255);
1224              }
1225            }
1226
1227          /* If the maximum is unlimited, set a repeater in the final copy. We
1228          can't just offset backwards from the current code point, because we
1229          don't know if there's been an options resetting after the ket. The
1230          correct offset was computed above. */
1231
1232          else code[-ketoffset] = OP_KETRMAX + repeat_type;
1233
1234
1235    #ifdef NEVER
1236        /* If the minimum is greater than zero, and the maximum is unlimited or        /* If the minimum is greater than zero, and the maximum is unlimited or
1237        equal to the minimum, the first copy remains where it is, and is        equal to the minimum, the first copy remains where it is, and is
1238        replicated up to the minimum number of times. This case includes the +        replicated up to the minimum number of times. This case includes the +
# Line 1154  for (;; ptr++) Line 1280  for (;; ptr++)
1280        correct offset was computed above. */        correct offset was computed above. */
1281
1282        if (repeat_max == -1) code[-ketoffset] = OP_KETRMAX + repeat_type;        if (repeat_max == -1) code[-ketoffset] = OP_KETRMAX + repeat_type;
1283    #endif
1284
1285
1286        }        }
1287
1288      /* Else there's some kind of shambles */      /* Else there's some kind of shambles */
1817  it's anchored. However, if this is a multiline pattern, then only OP_SOD  it's anchored. However, if this is a multiline pattern, then only OP_SOD
1818  counts, since OP_CIRC can match in the middle.  counts, since OP_CIRC can match in the middle.
1819
1820  A branch is also implicitly anchored if it starts with .* because that will try  A branch is also implicitly anchored if it starts with .* and DOTALL is set,
1821  the rest of the pattern at all possible matching points, so there is no point  because that will try the rest of the pattern at all possible matching points,
1822  trying them again.  so there is no point trying them again.
1823
1824  Arguments:  Arguments:
1825    code       points to start of expression (the bracket)    code       points to start of expression (the bracket)
# Line 1708  do { Line 1837  do {
1837     register int op = *scode;     register int op = *scode;
1838     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
1839       { if (!is_anchored(scode, options)) return FALSE; }       { if (!is_anchored(scode, options)) return FALSE; }
1840     else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)     else if ((op == OP_TYPESTAR || op == OP_TYPEMINSTAR) &&
1841                (*options & PCRE_DOTALL) != 0)
1842       { if (scode[1] != OP_ANY) return FALSE; }       { if (scode[1] != OP_ANY) return FALSE; }
1843     else if (op != OP_SOD &&     else if (op != OP_SOD &&
1844             ((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC))             ((*options & PCRE_MULTILINE) != 0 || op != OP_CIRC))
# Line 1722  return TRUE; Line 1852  return TRUE;
1852
1853
1854  /*************************************************  /*************************************************
1855  *     Check for start with \n line expression    *  *         Check for starting with ^ or .*        *
1856  *************************************************/  *************************************************/
1857
1858  /* This is called for multiline expressions to try to find out if every branch  /* This is called to find out if every branch starts with ^ or .* so that
1859  starts with ^ so that "first char" processing can be done to speed things up.  "first char" processing can be done to speed things up in multiline
1860    matching and for non-DOTALL patterns that start with .* (which must start at
1861    the beginning or after \n).
1862
1863  Argument:  points to start of expression (the bracket)  Argument:  points to start of expression (the bracket)
1864  Returns:   TRUE or FALSE  Returns:   TRUE or FALSE
# Line 1740  do { Line 1872  do {
1872     register int op = *scode;     register int op = *scode;
1873     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)     if (op >= OP_BRA || op == OP_ASSERT || op == OP_ONCE || op == OP_COND)
1874       { if (!is_startline(scode)) return FALSE; }       { if (!is_startline(scode)) return FALSE; }
1875       else if (op == OP_TYPESTAR || op == OP_TYPEMINSTAR)
1876         { if (scode[1] != OP_ANY) return FALSE; }
1877     else if (op != OP_CIRC) return FALSE;     else if (op != OP_CIRC) return FALSE;
1878     code += (code[1] << 8) + code[2];     code += (code[1] << 8) + code[2];
1879     }     }
# Line 2272  while ((c = *(++ptr)) != 0) Line 2406  while ((c = *(++ptr)) != 0)
2406        else if (c == '+') { maxval = -1; ptr++; }        else if (c == '+') { maxval = -1; ptr++; }
2407        else if (c == '?') { minval = 0; ptr++; }        else if (c == '?') { minval = 0; ptr++; }
2408
2409        /* If there is a minimum > 1 we have to replicate up to minval-1 times;        /* If the minimum is zero, we have to allow for an OP_BRAZERO before the
2410        if there is a limited maximum we have to replicate up to maxval-1 times        group, and if the maximum is greater than zero, we have to replicate
2411        and allow for a BRAZERO item before each optional copy, as we also have        maxval-1 times; each replication acquires an OP_BRAZERO plus a nesting
2412        to do before the first copy if the minimum is zero. */        bracket set - hence the 7. */
2413
2414        if (minval == 0) length++;        if (minval == 0)
2415          else if (minval > 1) length += (minval - 1) * duplength;          {
2416        if (maxval > minval) length += (maxval - minval) * (duplength + 1);          length++;
2417            if (maxval > 0) length += (maxval - 1) * (duplength + 7);
2418            }
2419
2420          /* When the minimum is greater than zero, 1 we have to replicate up to
2421          minval-1 times, with no additions required in the copies. Then, if
2422          there is a limited maximum we have to replicate up to maxval-1 times
2423          allowing for a BRAZERO item before each optional copy and nesting
2424          brackets for all but one of the optional copies. */
2425
2426          else
2427            {
2428            length += (minval - 1) * duplength;
2429            if (maxval > minval)   /* Need this test as maxval=-1 means no limit */
2430              length += (maxval - minval) * (duplength + 7) - 6;
2431            }
2432        }        }
2433      continue;      continue;
2434
# Line 2402  if (*errorptr != NULL) Line 2551  if (*errorptr != NULL)
2551    return NULL;    return NULL;
2552    }    }
2553
2554  /* If the anchored option was not passed, set flag if we can determine that it  /* If the anchored option was not passed, set flag if we can determine that the
2555  is anchored by virtue of ^ characters or \A or anything else. Otherwise, see if  pattern is anchored by virtue of ^ characters or \A or anything else (such as
2556  we can determine what the first character has to be, because that speeds up  starting with .* when DOTALL is set).
2557  unanchored matches no end. In the case of multiline matches, an alternative is
2558  to set the PCRE_STARTLINE flag if all branches start with ^. */  Otherwise, see if we can determine what the first character has to be, because
2559    that speeds up unanchored matches no end. If not, see if we can set the
2560    PCRE_STARTLINE flag. This is helpful for multiline matches when all branches
2562    */
2563
2564  if ((options & PCRE_ANCHORED) == 0)  if ((options & PCRE_ANCHORED) == 0)
2565    {    {
# Line 2775  for (;;) Line 2928  for (;;)
2928      int number = op - OP_BRA;      int number = op - OP_BRA;
2929      int offset = number << 1;      int offset = number << 1;
2930
2931      DPRINTF(("start bracket %d\n", number));  #ifdef DEBUG
2932        printf("start bracket %d subject=", number);
2933        pchars(eptr, 16, TRUE, md);
2934        printf("\n");
2935    #endif
2936
2937      if (offset < md->offset_max)      if (offset < md->offset_max)
2938        {        {

Legend:
 Removed from v.29 changed lines Added in v.33