/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 200 by ph10, Wed Aug 1 09:10:40 2007 UTC revision 335 by ph10, Sat Apr 12 14:36:14 2008 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2008 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 45  applications. */ Line 45  applications. */
45    
46    
47  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
48  #include <config.h>  #include "config.h"
49  #endif  #endif
50    
51  #define NLBLOCK md             /* Block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
# Line 84  centralize the loading of these characte Line 84  centralize the loading of these characte
84  small value. ***NOTE*** If the start of this table is modified, the two tables  small value. ***NOTE*** If the start of this table is modified, the two tables
85  that follow must also be modified. */  that follow must also be modified. */
86    
87  static uschar coptable[] = {  static const uschar coptable[] = {
88    0,                             /* End                                    */    0,                             /* End                                    */
89    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
90    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
# Line 130  static uschar coptable[] = { Line 130  static uschar coptable[] = {
130    0,                             /* CREF                                   */    0,                             /* CREF                                   */
131    0,                             /* RREF                                   */    0,                             /* RREF                                   */
132    0,                             /* DEF                                    */    0,                             /* DEF                                    */
133    0, 0                           /* BRAZERO, BRAMINZERO                    */    0, 0,                          /* BRAZERO, BRAMINZERO                    */
134      0, 0, 0, 0,                    /* PRUNE, SKIP, THEN, COMMIT              */
135      0, 0                           /* FAIL, ACCEPT                           */
136  };  };
137    
138  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
139  and \w */  and \w */
140    
141  static uschar toptable1[] = {  static const uschar toptable1[] = {
142    0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
143    ctype_digit, ctype_digit,    ctype_digit, ctype_digit,
144    ctype_space, ctype_space,    ctype_space, ctype_space,
# Line 144  static uschar toptable1[] = { Line 146  static uschar toptable1[] = {
146    0                               /* OP_ANY */    0                               /* OP_ANY */
147  };  };
148    
149  static uschar toptable2[] = {  static const uschar toptable2[] = {
150    0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
151    ctype_digit, 0,    ctype_digit, 0,
152    ctype_space, 0,    ctype_space, 0,
# Line 692  for (;;) Line 694  for (;;)
694        break;        break;
695    
696        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
697          case OP_SKIPZERO:
698          code += 1 + GET(code, 2);
699          while (*code == OP_ALT) code += GET(code, 1);
700          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
701          break;
702    
703          /*-----------------------------------------------------------------*/
704        case OP_CIRC:        case OP_CIRC:
705        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
706            ((ims & PCRE_MULTILINE) != 0 &&            ((ims & PCRE_MULTILINE) != 0 &&
# Line 1078  for (;;) Line 1087  for (;;)
1087          int ncount = 0;          int ncount = 0;
1088          switch (c)          switch (c)
1089            {            {
           case 0x000d:  
           if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;  
           /* Fall through */  
           case 0x000a:  
1090            case 0x000b:            case 0x000b:
1091            case 0x000c:            case 0x000c:
1092            case 0x0085:            case 0x0085:
1093            case 0x2028:            case 0x2028:
1094            case 0x2029:            case 0x2029:
1095              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1096              goto ANYNL01;
1097    
1098              case 0x000d:
1099              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1100              /* Fall through */
1101    
1102              ANYNL01:
1103              case 0x000a:
1104            if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)            if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1105              {              {
1106              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
# Line 1095  for (;;) Line 1109  for (;;)
1109            count++;            count++;
1110            ADD_NEW_DATA(-state_offset, count, ncount);            ADD_NEW_DATA(-state_offset, count, ncount);
1111            break;            break;
1112    
1113            default:            default:
1114            break;            break;
1115            }            }
# Line 1311  for (;;) Line 1326  for (;;)
1326          int ncount = 0;          int ncount = 0;
1327          switch (c)          switch (c)
1328            {            {
           case 0x000d:  
           if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;  
           /* Fall through */  
           case 0x000a:  
1329            case 0x000b:            case 0x000b:
1330            case 0x000c:            case 0x000c:
1331            case 0x0085:            case 0x0085:
1332            case 0x2028:            case 0x2028:
1333            case 0x2029:            case 0x2029:
1334              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1335              goto ANYNL02;
1336    
1337              case 0x000d:
1338              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1339              /* Fall through */
1340    
1341              ANYNL02:
1342              case 0x000a:
1343            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1344                codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)                codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1345              {              {
# Line 1328  for (;;) Line 1348  for (;;)
1348              }              }
1349            ADD_NEW_DATA(-(state_offset + count), 0, ncount);            ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1350            break;            break;
1351    
1352            default:            default:
1353            break;            break;
1354            }            }
# Line 1543  for (;;) Line 1564  for (;;)
1564          int ncount = 0;          int ncount = 0;
1565          switch (c)          switch (c)
1566            {            {
           case 0x000d:  
           if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;  
           /* Fall through */  
           case 0x000a:  
1567            case 0x000b:            case 0x000b:
1568            case 0x000c:            case 0x000c:
1569            case 0x0085:            case 0x0085:
1570            case 0x2028:            case 0x2028:
1571            case 0x2029:            case 0x2029:
1572              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1573              goto ANYNL03;
1574    
1575              case 0x000d:
1576              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1577              /* Fall through */
1578    
1579              ANYNL03:
1580              case 0x000a:
1581            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
1582              {              {
1583              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
# Line 1562  for (;;) Line 1588  for (;;)
1588            else            else
1589              { ADD_NEW_DATA(-state_offset, count, ncount); }              { ADD_NEW_DATA(-state_offset, count, ncount); }
1590            break;            break;
1591    
1592            default:            default:
1593            break;            break;
1594            }            }
# Line 1742  for (;;) Line 1769  for (;;)
1769        case OP_ANYNL:        case OP_ANYNL:
1770        if (clen > 0) switch(c)        if (clen > 0) switch(c)
1771          {          {
         case 0x000a:  
1772          case 0x000b:          case 0x000b:
1773          case 0x000c:          case 0x000c:
1774          case 0x0085:          case 0x0085:
1775          case 0x2028:          case 0x2028:
1776          case 0x2029:          case 0x2029:
1777            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1778    
1779            case 0x000a:
1780          ADD_NEW(state_offset + 1, 0);          ADD_NEW(state_offset + 1, 0);
1781          break;          break;
1782    
1783          case 0x000d:          case 0x000d:
1784          if (ptr + 1 < end_subject && ptr[1] == 0x0a)          if (ptr + 1 < end_subject && ptr[1] == 0x0a)
1785            {            {
# Line 2572  md->end_subject = end_subject; Line 2602  md->end_subject = end_subject;
2602  md->moptions = options;  md->moptions = options;
2603  md->poptions = re->options;  md->poptions = re->options;
2604    
2605    /* If the BSR option is not set at match time, copy what was set
2606    at compile time. */
2607    
2608    if ((md->moptions & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 0)
2609      {
2610      if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
2611        md->moptions |= re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE);
2612    #ifdef BSR_ANYCRLF
2613      else md->moptions |= PCRE_BSR_ANYCRLF;
2614    #endif
2615      }
2616    
2617  /* Handle different types of newline. The three bits give eight cases. If  /* Handle different types of newline. The three bits give eight cases. If
2618  nothing is set at run time, whatever was used at compile time applies. */  nothing is set at run time, whatever was used at compile time applies. */
2619    
# Line 2642  if (md->tables == NULL) md->tables = _pc Line 2684  if (md->tables == NULL) md->tables = _pc
2684  used in a loop when finding where to start. */  used in a loop when finding where to start. */
2685    
2686  lcc = md->tables + lcc_offset;  lcc = md->tables + lcc_offset;
2687  startline = (re->options & PCRE_STARTLINE) != 0;  startline = (re->flags & PCRE_STARTLINE) != 0;
2688  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
2689    
2690  /* Set up the first character to match, if available. The first_byte value is  /* Set up the first character to match, if available. The first_byte value is
# Line 2653  studied, there may be a bitmap of possib Line 2695  studied, there may be a bitmap of possib
2695    
2696  if (!anchored)  if (!anchored)
2697    {    {
2698    if ((re->options & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
2699      {      {
2700      first_byte = re->first_byte & 255;      first_byte = re->first_byte & 255;
2701      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
# Line 2670  if (!anchored) Line 2712  if (!anchored)
2712  /* For anchored or unanchored matches, there may be a "last known required  /* For anchored or unanchored matches, there may be a "last known required
2713  character" set. */  character" set. */
2714    
2715  if ((re->options & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
2716    {    {
2717    req_byte = re->req_byte & 255;    req_byte = re->req_byte & 255;
2718    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
# Line 2840  for (;;) Line 2882  for (;;)
2882      }      }
2883    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
2884    
2885    /* If we have just passed a CR and the newline option is CRLF or ANY or    /* If we have just passed a CR and we are now at a LF, and the pattern does
2886    ANYCRLF, and we are now at a LF, advance the match position by one more    not contain any explicit matches for \r or \n, and the newline option is CRLF
2887    character. */    or ANY or ANYCRLF, advance the match position by one more character. */
2888    
2889    if (current_subject[-1] == '\r' &&    if (current_subject[-1] == '\r' &&
2890         (md->nltype == NLTYPE_ANY ||        current_subject < end_subject &&
2891          md->nltype == NLTYPE_ANYCRLF ||        *current_subject == '\n' &&
2892          md->nllen == 2) &&        (re->flags & PCRE_HASCRORLF) == 0 &&
2893         current_subject < end_subject &&          (md->nltype == NLTYPE_ANY ||
2894         *current_subject == '\n')           md->nltype == NLTYPE_ANYCRLF ||
2895             md->nllen == 2))
2896      current_subject++;      current_subject++;
2897    
2898    }   /* "Bumpalong" loop */    }   /* "Bumpalong" loop */

Legend:
Removed from v.200  
changed lines
  Added in v.335

  ViewVC Help
Powered by ViewVC 1.1.5