/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 178 by ph10, Wed Jun 13 08:44:34 2007 UTC revision 230 by ph10, Mon Sep 10 13:23:56 2007 UTC
# Line 44  FSM). This is NOT Perl- compatible, but Line 44  FSM). This is NOT Perl- compatible, but
44  applications. */  applications. */
45    
46    
47    #ifdef HAVE_CONFIG_H
48    #include <config.h>
49    #endif
50    
51  #define NLBLOCK md             /* Block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
52  #define PSSTART start_subject  /* Field containing processed string start */  #define PSSTART start_subject  /* Field containing processed string start */
53  #define PSEND   end_subject    /* Field containing processed string end */  #define PSEND   end_subject    /* Field containing processed string end */
# Line 63  applications. */ Line 67  applications. */
67    
68  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
69  into others, under special conditions. A gap of 20 between the blocks should be  into others, under special conditions. A gap of 20 between the blocks should be
70  enough. The resulting opcodes don't have to be less than 256 because they are  enough. The resulting opcodes don't have to be less than 256 because they are
71  never stored, so we push them well clear of the normal opcodes. */  never stored, so we push them well clear of the normal opcodes. */
72    
73  #define OP_PROP_EXTRA       300  #define OP_PROP_EXTRA       300
# Line 126  static uschar coptable[] = { Line 130  static uschar coptable[] = {
130    0,                             /* CREF                                   */    0,                             /* CREF                                   */
131    0,                             /* RREF                                   */    0,                             /* RREF                                   */
132    0,                             /* DEF                                    */    0,                             /* DEF                                    */
133    0, 0                           /* BRAZERO, BRAMINZERO                    */    0, 0,                          /* BRAZERO, BRAMINZERO                    */
134      0, 0, 0, 0,                    /* PRUNE, SKIP, THEN, COMMIT              */
135      0, 0                           /* FAIL, ACCEPT                           */
136  };  };
137    
138  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
# Line 585  for (;;) Line 591  for (;;)
591            case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;            case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
592            case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;            case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
593            case OP_NOT_HSPACE:            case OP_NOT_HSPACE:
594            case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;            case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
595            case OP_NOT_VSPACE:            case OP_NOT_VSPACE:
596            case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;            case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
597            default: break;            default: break;
598            }            }
599          }          }
# Line 1105  for (;;) Line 1111  for (;;)
1111        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1112        if (clen > 0)        if (clen > 0)
1113          {          {
1114          BOOL OK;          BOOL OK;
1115          switch (c)          switch (c)
1116            {            {
1117            case 0x000a:            case 0x000a:
# Line 1116  for (;;) Line 1122  for (;;)
1122            case 0x2028:            case 0x2028:
1123            case 0x2029:            case 0x2029:
1124            OK = TRUE;            OK = TRUE;
1125            break;            break;
1126    
1127            default:            default:
1128            OK = FALSE;            OK = FALSE;
1129            break;            break;
1130            }            }
1131    
1132          if (OK == (d == OP_VSPACE))          if (OK == (d == OP_VSPACE))
1133            {            {
1134            if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)            if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
1135              {              {
1136              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
# Line 1144  for (;;) Line 1150  for (;;)
1150        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1151        if (clen > 0)        if (clen > 0)
1152          {          {
1153          BOOL OK;          BOOL OK;
1154          switch (c)          switch (c)
1155            {            {
1156            case 0x09:      /* HT */            case 0x09:      /* HT */
# Line 1168  for (;;) Line 1174  for (;;)
1174            case 0x3000:    /* IDEOGRAPHIC SPACE */            case 0x3000:    /* IDEOGRAPHIC SPACE */
1175            OK = TRUE;            OK = TRUE;
1176            break;            break;
1177    
1178            default:            default:
1179            OK = FALSE;            OK = FALSE;
1180            break;            break;
1181            }            }
1182    
1183          if (OK == (d == OP_HSPACE))          if (OK == (d == OP_HSPACE))
1184            {            {
1185            if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)            if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
1186              {              {
1187              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
# Line 1346  for (;;) Line 1352  for (;;)
1352        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1353        if (clen > 0)        if (clen > 0)
1354          {          {
1355          BOOL OK;          BOOL OK;
1356          switch (c)          switch (c)
1357            {            {
1358            case 0x000a:            case 0x000a:
# Line 1358  for (;;) Line 1364  for (;;)
1364            case 0x2029:            case 0x2029:
1365            OK = TRUE;            OK = TRUE;
1366            break;            break;
1367    
1368            default:            default:
1369            OK = FALSE;            OK = FALSE;
1370            break;            break;
1371            }            }
1372          if (OK == (d == OP_VSPACE))          if (OK == (d == OP_VSPACE))
1373            {            {
1374            if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||            if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
1375                codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)                codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
1376              {              {
# Line 1392  for (;;) Line 1398  for (;;)
1398        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1399        if (clen > 0)        if (clen > 0)
1400          {          {
1401          BOOL OK;          BOOL OK;
1402          switch (c)          switch (c)
1403            {            {
1404            case 0x09:      /* HT */            case 0x09:      /* HT */
# Line 1416  for (;;) Line 1422  for (;;)
1422            case 0x3000:    /* IDEOGRAPHIC SPACE */            case 0x3000:    /* IDEOGRAPHIC SPACE */
1423            OK = TRUE;            OK = TRUE;
1424            break;            break;
1425    
1426            default:            default:
1427            OK = FALSE;            OK = FALSE;
1428            break;            break;
1429            }            }
1430    
1431          if (OK == (d == OP_HSPACE))          if (OK == (d == OP_HSPACE))
1432            {            {
1433            if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||            if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
1434                codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)                codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
1435              {              {
# Line 1574  for (;;) Line 1580  for (;;)
1580        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1581        if (clen > 0)        if (clen > 0)
1582          {          {
1583          BOOL OK;          BOOL OK;
1584          switch (c)          switch (c)
1585            {            {
1586            case 0x000a:            case 0x000a:
# Line 1586  for (;;) Line 1592  for (;;)
1592            case 0x2029:            case 0x2029:
1593            OK = TRUE;            OK = TRUE;
1594            break;            break;
1595    
1596            default:            default:
1597            OK = FALSE;            OK = FALSE;
1598            }            }
1599    
1600          if (OK == (d == OP_VSPACE))          if (OK == (d == OP_VSPACE))
1601            {            {
1602            if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)            if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
1603              {              {
1604              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
# Line 1616  for (;;) Line 1622  for (;;)
1622        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1623        if (clen > 0)        if (clen > 0)
1624          {          {
1625          BOOL OK;          BOOL OK;
1626          switch (c)          switch (c)
1627            {            {
1628            case 0x09:      /* HT */            case 0x09:      /* HT */
# Line 1640  for (;;) Line 1646  for (;;)
1646            case 0x3000:    /* IDEOGRAPHIC SPACE */            case 0x3000:    /* IDEOGRAPHIC SPACE */
1647            OK = TRUE;            OK = TRUE;
1648            break;            break;
1649    
1650            default:            default:
1651            OK = FALSE;            OK = FALSE;
1652            break;            break;
1653            }            }
1654    
1655          if (OK == (d == OP_HSPACE))          if (OK == (d == OP_HSPACE))
1656            {            {
1657            if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)            if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
1658              {              {
1659              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
# Line 1771  for (;;) Line 1777  for (;;)
1777          case 0x2028:          case 0x2028:
1778          case 0x2029:          case 0x2029:
1779          break;          break;
1780    
1781          default:          default:
1782          ADD_NEW(state_offset + 1, 0);          ADD_NEW(state_offset + 1, 0);
1783          break;          break;
1784          }          }
# Line 1791  for (;;) Line 1797  for (;;)
1797          case 0x2029:          case 0x2029:
1798          ADD_NEW(state_offset + 1, 0);          ADD_NEW(state_offset + 1, 0);
1799          break;          break;
1800    
1801          default: break;          default: break;
1802          }          }
1803        break;        break;
# Line 1820  for (;;) Line 1826  for (;;)
1826          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */          case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1827          case 0x3000:    /* IDEOGRAPHIC SPACE */          case 0x3000:    /* IDEOGRAPHIC SPACE */
1828          break;          break;
1829    
1830          default:          default:
1831          ADD_NEW(state_offset + 1, 0);          ADD_NEW(state_offset + 1, 0);
1832          break;          break;
1833          }          }
# Line 2638  if (md->tables == NULL) md->tables = _pc Line 2644  if (md->tables == NULL) md->tables = _pc
2644  used in a loop when finding where to start. */  used in a loop when finding where to start. */
2645    
2646  lcc = md->tables + lcc_offset;  lcc = md->tables + lcc_offset;
2647  startline = (re->options & PCRE_STARTLINE) != 0;  startline = (re->flags & PCRE_STARTLINE) != 0;
2648  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
2649    
2650  /* Set up the first character to match, if available. The first_byte value is  /* Set up the first character to match, if available. The first_byte value is
# Line 2649  studied, there may be a bitmap of possib Line 2655  studied, there may be a bitmap of possib
2655    
2656  if (!anchored)  if (!anchored)
2657    {    {
2658    if ((re->options & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
2659      {      {
2660      first_byte = re->first_byte & 255;      first_byte = re->first_byte & 255;
2661      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
# Line 2666  if (!anchored) Line 2672  if (!anchored)
2672  /* For anchored or unanchored matches, there may be a "last known required  /* For anchored or unanchored matches, there may be a "last known required
2673  character" set. */  character" set. */
2674    
2675  if ((re->options & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
2676    {    {
2677    req_byte = re->req_byte & 255;    req_byte = re->req_byte & 255;
2678    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
# Line 2836  for (;;) Line 2842  for (;;)
2842      }      }
2843    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
2844    
2845    /* If we have just passed a CR and the newline option is CRLF or ANY or    /* If we have just passed a CR and we are now at a LF, and the pattern does
2846    ANYCRLF, and we are now at a LF, advance the match position by one more    not contain any explicit matches for \r or \n, and the newline option is CRLF
2847    character. */    or ANY or ANYCRLF, advance the match position by one more character. */
2848    
2849    if (current_subject[-1] == '\r' &&    if (current_subject[-1] == '\r' &&
2850         (md->nltype == NLTYPE_ANY ||        current_subject < end_subject &&
2851          md->nltype == NLTYPE_ANYCRLF ||        *current_subject == '\n' &&
2852          md->nllen == 2) &&        (re->flags & PCRE_HASCRORLF) == 0 &&
2853         current_subject < end_subject &&          (md->nltype == NLTYPE_ANY ||
2854         *current_subject == '\n')           md->nltype == NLTYPE_ANYCRLF ||
2855             md->nllen == 2))
2856      current_subject++;      current_subject++;
2857    
2858    }   /* "Bumpalong" loop */    }   /* "Bumpalong" loop */

Legend:
Removed from v.178  
changed lines
  Added in v.230

  ViewVC Help
Powered by ViewVC 1.1.5