/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 178 by ph10, Wed Jun 13 08:44:34 2007 UTC revision 231 by ph10, Tue Sep 11 11:15:33 2007 UTC
# Line 44  FSM). This is NOT Perl- compatible, but Line 44  FSM). This is NOT Perl- compatible, but
44  applications. */  applications. */
45    
46    
47    #ifdef HAVE_CONFIG_H
48    #include <config.h>
49    #endif
50    
51  #define NLBLOCK md             /* Block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
52  #define PSSTART start_subject  /* Field containing processed string start */  #define PSSTART start_subject  /* Field containing processed string start */
53  #define PSEND   end_subject    /* Field containing processed string end */  #define PSEND   end_subject    /* Field containing processed string end */
# Line 63  applications. */ Line 67  applications. */
67    
68  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
69  into others, under special conditions. A gap of 20 between the blocks should be  into others, under special conditions. A gap of 20 between the blocks should be
70  enough. The resulting opcodes don't have to be less than 256 because they are  enough. The resulting opcodes don't have to be less than 256 because they are
71  never stored, so we push them well clear of the normal opcodes. */  never stored, so we push them well clear of the normal opcodes. */
72    
73  #define OP_PROP_EXTRA       300  #define OP_PROP_EXTRA       300
# Line 126  static uschar coptable[] = { Line 130  static uschar coptable[] = {
130    0,                             /* CREF                                   */    0,                             /* CREF                                   */
131    0,                             /* RREF                                   */    0,                             /* RREF                                   */
132    0,                             /* DEF                                    */    0,                             /* DEF                                    */
133    0, 0                           /* BRAZERO, BRAMINZERO                    */    0, 0,                          /* BRAZERO, BRAMINZERO                    */
134      0, 0, 0, 0,                    /* PRUNE, SKIP, THEN, COMMIT              */
135      0, 0                           /* FAIL, ACCEPT                           */
136  };  };
137    
138  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
# Line 585  for (;;) Line 591  for (;;)
591            case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;            case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
592            case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;            case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
593            case OP_NOT_HSPACE:            case OP_NOT_HSPACE:
594            case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;            case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
595            case OP_NOT_VSPACE:            case OP_NOT_VSPACE:
596            case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;            case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
597            default: break;            default: break;
598            }            }
599          }          }
# Line 1074  for (;;) Line 1080  for (;;)
1080          int ncount = 0;          int ncount = 0;
1081          switch (c)          switch (c)
1082            {            {
           case 0x000d:  
           if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;  
           /* Fall through */  
           case 0x000a:  
1083            case 0x000b:            case 0x000b:
1084            case 0x000c:            case 0x000c:
1085            case 0x0085:            case 0x0085:
1086            case 0x2028:            case 0x2028:
1087            case 0x2029:            case 0x2029:
1088              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1089              goto ANYNL01;
1090    
1091              case 0x000d:
1092              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1093              /* Fall through */
1094    
1095              ANYNL01:
1096              case 0x000a:
1097            if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)            if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1098              {              {
1099              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
# Line 1091  for (;;) Line 1102  for (;;)
1102            count++;            count++;
1103            ADD_NEW_DATA(-state_offset, count, ncount);            ADD_NEW_DATA(-state_offset, count, ncount);
1104            break;            break;
1105    
1106            default:            default:
1107            break;            break;
1108            }            }
# Line 1105  for (;;) Line 1117  for (;;)
1117        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1118        if (clen > 0)        if (clen > 0)
1119          {          {
1120          BOOL OK;          BOOL OK;
1121          switch (c)          switch (c)
1122            {            {
1123            case 0x000a:            case 0x000a:
# Line 1116  for (;;) Line 1128  for (;;)
1128            case 0x2028:            case 0x2028:
1129            case 0x2029:            case 0x2029:
1130            OK = TRUE;            OK = TRUE;
1131            break;            break;
1132    
1133            default:            default:
1134            OK = FALSE;            OK = FALSE;
1135            break;            break;
1136            }            }
1137    
1138          if (OK == (d == OP_VSPACE))          if (OK == (d == OP_VSPACE))
1139            {            {
1140            if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)            if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
1141              {              {
1142              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
# Line 1144  for (;;) Line 1156  for (;;)
1156        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1157        if (clen > 0)        if (clen > 0)
1158          {          {
1159          BOOL OK;          BOOL OK;
1160          switch (c)          switch (c)
1161            {            {
1162            case 0x09:      /* HT */            case 0x09:      /* HT */
# Line 1168  for (;;) Line 1180  for (;;)
1180            case 0x3000:    /* IDEOGRAPHIC SPACE */            case 0x3000:    /* IDEOGRAPHIC SPACE */
1181            OK = TRUE;            OK = TRUE;
1182            break;            break;
1183    
1184            default:            default:
1185            OK = FALSE;            OK = FALSE;
1186            break;            break;
1187            }            }
1188    
1189          if (OK == (d == OP_HSPACE))          if (OK == (d == OP_HSPACE))
1190            {            {
1191            if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)            if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
1192              {              {
1193              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
# Line 1307  for (;;) Line 1319  for (;;)
1319          int ncount = 0;          int ncount = 0;
1320          switch (c)          switch (c)
1321            {            {
           case 0x000d:  
           if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;  
           /* Fall through */  
           case 0x000a:  
1322            case 0x000b:            case 0x000b:
1323            case 0x000c:            case 0x000c:
1324            case 0x0085:            case 0x0085:
1325            case 0x2028:            case 0x2028:
1326            case 0x2029:            case 0x2029:
1327              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1328              goto ANYNL02;
1329    
1330              case 0x000d:
1331              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1332              /* Fall through */
1333    
1334              ANYNL02:
1335              case 0x000a:
1336            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1337                codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)                codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1338              {              {
# Line 1324  for (;;) Line 1341  for (;;)
1341              }              }
1342            ADD_NEW_DATA(-(state_offset + count), 0, ncount);            ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1343            break;            break;
1344    
1345            default:            default:
1346            break;            break;
1347            }            }
# Line 1346  for (;;) Line 1364  for (;;)
1364        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1365        if (clen > 0)        if (clen > 0)
1366          {          {
1367          BOOL OK;          BOOL OK;
1368          switch (c)          switch (c)
1369            {            {
1370            case 0x000a:            case 0x000a:
# Line 1358  for (;;) Line 1376  for (;;)
1376            case 0x2029:            case 0x2029:
1377            OK = TRUE;            OK = TRUE;
1378            break;            break;
1379    
1380            default:            default:
1381            OK = FALSE;            OK = FALSE;
1382            break;            break;
1383            }            }
1384          if (OK == (d == OP_VSPACE))          if (OK == (d == OP_VSPACE))
1385            {            {
1386            if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||            if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
1387                codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)                codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
1388              {              {
# Line 1392  for (;;) Line 1410  for (;;)
1410        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1411        if (clen > 0)        if (clen > 0)
1412          {          {
1413          BOOL OK;          BOOL OK;
1414          switch (c)          switch (c)
1415            {            {
1416            case 0x09:      /* HT */            case 0x09:      /* HT */
# Line 1416  for (;;) Line 1434  for (;;)
1434            case 0x3000:    /* IDEOGRAPHIC SPACE */            case 0x3000:    /* IDEOGRAPHIC SPACE */
1435            OK = TRUE;            OK = TRUE;
1436            break;            break;
1437    
1438            default:            default:
1439            OK = FALSE;            OK = FALSE;
1440            break;            break;
1441            }            }
1442    
1443          if (OK == (d == OP_HSPACE))          if (OK == (d == OP_HSPACE))
1444            {            {
1445            if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||            if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
1446                codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)                codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
1447              {              {
# Line 1539  for (;;) Line 1557  for (;;)
1557          int ncount = 0;          int ncount = 0;
1558          switch (c)          switch (c)
1559            {            {
           case 0x000d:  
           if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;  
           /* Fall through */  
           case 0x000a:  
1560            case 0x000b:            case 0x000b:
1561            case 0x000c:            case 0x000c:
1562            case 0x0085:            case 0x0085:
1563            case 0x2028:            case 0x2028:
1564            case 0x2029:            case 0x2029:
1565              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1566              goto ANYNL03;
1567    
1568              case 0x000d:
1569              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1570              /* Fall through */
1571    
1572              ANYNL03:
1573              case 0x000a:
1574            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
1575              {              {
1576              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
# Line 1558  for (;;) Line 1581  for (;;)
1581            else            else
1582              { ADD_NEW_DATA(-state_offset, count, ncount); }              { ADD_NEW_DATA(-state_offset, count, ncount); }
1583            break;            break;
1584    
1585            default:            default:
1586            break;            break;
1587            }            }
# Line 1574  for (;;) Line 1598  for (;;)
1598        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1599        if (clen > 0)        if (clen > 0)
1600          {          {
1601          BOOL OK;          BOOL OK;
1602          switch (c)          switch (c)
1603            {            {
1604            case 0x000a:            case 0x000a:
# Line 1586  for (;;) Line 1610  for (;;)
1610            case 0x2029:            case 0x2029:
1611            OK = TRUE;            OK = TRUE;
1612            break;            break;
1613    
1614            default:            default:
1615            OK = FALSE;            OK = FALSE;
1616            }            }
1617    
1618          if (OK == (d == OP_VSPACE))          if (OK == (d == OP_VSPACE))
1619            {            {
1620            if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)            if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
1621              {              {
1622              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
# Line 1616  for (;;) Line 1640  for (;;)
1640        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1641        if (clen > 0)        if (clen > 0)
1642          {          {
1643          BOOL OK;          BOOL OK;
1644          switch (c)          switch (c)
1645            {            {
1646            case 0x09:      /* HT */            case 0x09:      /* HT */
# Line 1640  for (;;) Line 1664  for (;;)
1664            case 0x3000:    /* IDEOGRAPHIC SPACE */            case 0x3000:    /* IDEOGRAPHIC SPACE */
1665            OK = TRUE;            OK = TRUE;
1666            break;            break;
1667    
1668            default:            default:
1669            OK = FALSE;            OK = FALSE;
1670            break;            break;
1671            }            }
1672    
1673          if (OK == (d == OP_HSPACE))          if (OK == (d == OP_HSPACE))
1674            {            {
1675            if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)            if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
1676              {              {
1677              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
# Line 1738  for (;;) Line 1762  for (;;)
1762        case OP_ANYNL:        case OP_ANYNL:
1763        if (clen > 0) switch(c)        if (clen > 0) switch(c)
1764          {          {
         case 0x000a:  
1765          case 0x000b:          case 0x000b:
1766          case 0x000c:          case 0x000c:
1767          case 0x0085:          case 0x0085:
1768          case 0x2028:          case 0x2028:
1769          case 0x2029:          case 0x2029:
1770            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1771    
1772            case 0x000a:
1773          ADD_NEW(state_offset + 1, 0);          ADD_NEW(state_offset + 1, 0);
1774          break;          break;
1775    
1776          case 0x000d:          case 0x000d:
1777          if (ptr + 1 < end_subject && ptr[1] == 0x0a)          if (ptr + 1 < end_subject && ptr[1] == 0x0a)
1778            {            {
# Line 1771  for (;;) Line 1798  for (;;)
1798          case 0x2028:          case 0x2028:
1799          case 0x2029:          case 0x2029:
1800          break;          break;
1801    
1802          default:          default:
1803          ADD_NEW(state_offset + 1, 0);          ADD_NEW(state_offset + 1, 0);
1804          break;          break;
1805          }          }
# Line 1791  for (;;) Line 1818  for (;;)
1818          case 0x2029:          case 0x2029:
1819          ADD_NEW(state_offset + 1, 0);          ADD_NEW(state_offset + 1, 0);
1820          break;          break;
1821    
1822          default: break;          default: break;
1823          }          }
1824        break;        break;
# Line 1820  for (;;) Line 1847  for (;;)
1847          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1848          case 0x3000:    /* IDEOGRAPHIC SPACE */          case 0x3000:    /* IDEOGRAPHIC SPACE */
1849          break;          break;
1850    
1851          default:          default:
1852          ADD_NEW(state_offset + 1, 0);          ADD_NEW(state_offset + 1, 0);
1853          break;          break;
1854          }          }
# Line 2568  md->end_subject = end_subject; Line 2595  md->end_subject = end_subject;
2595  md->moptions = options;  md->moptions = options;
2596  md->poptions = re->options;  md->poptions = re->options;
2597    
2598    /* If the BSR option is not set at match time, copy what was set
2599    at compile time. */
2600    
2601    if ((md->moptions & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 0)
2602      {
2603      if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
2604        md->moptions |= re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE);
2605    #ifdef BSR_ANYCRLF
2606      else md->moptions |= PCRE_BSR_ANYCRLF;
2607    #endif
2608      }
2609    
2610  /* Handle different types of newline. The three bits give eight cases. If  /* Handle different types of newline. The three bits give eight cases. If
2611  nothing is set at run time, whatever was used at compile time applies. */  nothing is set at run time, whatever was used at compile time applies. */
2612    
# Line 2638  if (md->tables == NULL) md->tables = _pc Line 2677  if (md->tables == NULL) md->tables = _pc
2677  used in a loop when finding where to start. */  used in a loop when finding where to start. */
2678    
2679  lcc = md->tables + lcc_offset;  lcc = md->tables + lcc_offset;
2680  startline = (re->options & PCRE_STARTLINE) != 0;  startline = (re->flags & PCRE_STARTLINE) != 0;
2681  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
2682    
2683  /* Set up the first character to match, if available. The first_byte value is  /* Set up the first character to match, if available. The first_byte value is
# Line 2649  studied, there may be a bitmap of possib Line 2688  studied, there may be a bitmap of possib
2688    
2689  if (!anchored)  if (!anchored)
2690    {    {
2691    if ((re->options & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
2692      {      {
2693      first_byte = re->first_byte & 255;      first_byte = re->first_byte & 255;
2694      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
# Line 2666  if (!anchored) Line 2705  if (!anchored)
2705  /* For anchored or unanchored matches, there may be a "last known required  /* For anchored or unanchored matches, there may be a "last known required
2706  character" set. */  character" set. */
2707    
2708  if ((re->options & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
2709    {    {
2710    req_byte = re->req_byte & 255;    req_byte = re->req_byte & 255;
2711    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
# Line 2836  for (;;) Line 2875  for (;;)
2875      }      }
2876    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
2877    
2878    /* If we have just passed a CR and the newline option is CRLF or ANY or    /* If we have just passed a CR and we are now at a LF, and the pattern does
2879    ANYCRLF, and we are now at a LF, advance the match position by one more    not contain any explicit matches for \r or \n, and the newline option is CRLF
2880    character. */    or ANY or ANYCRLF, advance the match position by one more character. */
2881    
2882    if (current_subject[-1] == '\r' &&    if (current_subject[-1] == '\r' &&
2883         (md->nltype == NLTYPE_ANY ||        current_subject < end_subject &&
2884          md->nltype == NLTYPE_ANYCRLF ||        *current_subject == '\n' &&
2885          md->nllen == 2) &&        (re->flags & PCRE_HASCRORLF) == 0 &&
2886         current_subject < end_subject &&          (md->nltype == NLTYPE_ANY ||
2887         *current_subject == '\n')           md->nltype == NLTYPE_ANYCRLF ||
2888             md->nllen == 2))
2889      current_subject++;      current_subject++;
2890    
2891    }   /* "Bumpalong" loop */    }   /* "Bumpalong" loop */

Legend:
Removed from v.178  
changed lines
  Added in v.231

  ViewVC Help
Powered by ViewVC 1.1.5