/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 226 by ph10, Tue Aug 21 11:46:08 2007 UTC revision 341 by ph10, Sat Apr 19 16:41:04 2008 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2008 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 45  applications. */ Line 45  applications. */
45    
46    
47  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
48  #include <config.h>  #include "config.h"
49  #endif  #endif
50    
51  #define NLBLOCK md             /* Block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
# Line 84  centralize the loading of these characte Line 84  centralize the loading of these characte
84  small value. ***NOTE*** If the start of this table is modified, the two tables  small value. ***NOTE*** If the start of this table is modified, the two tables
85  that follow must also be modified. */  that follow must also be modified. */
86    
87  static uschar coptable[] = {  static const uschar coptable[] = {
88    0,                             /* End                                    */    0,                             /* End                                    */
89    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
90    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
91    0, 0,                          /* Any, Anybyte                           */    0, 0, 0,                       /* Any, AllAny, Anybyte                   */
92    0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */    0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */
93    0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */    0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
94    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */
# Line 132  static uschar coptable[] = { Line 132  static uschar coptable[] = {
132    0,                             /* DEF                                    */    0,                             /* DEF                                    */
133    0, 0,                          /* BRAZERO, BRAMINZERO                    */    0, 0,                          /* BRAZERO, BRAMINZERO                    */
134    0, 0, 0, 0,                    /* PRUNE, SKIP, THEN, COMMIT              */    0, 0, 0, 0,                    /* PRUNE, SKIP, THEN, COMMIT              */
135    0, 0                           /* FAIL, ACCEPT                           */    0, 0, 0                        /* FAIL, ACCEPT, SKIPZERO                 */
136  };  };
137    
138  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
139  and \w */  and \w */
140    
141  static uschar toptable1[] = {  static const uschar toptable1[] = {
142    0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
143    ctype_digit, ctype_digit,    ctype_digit, ctype_digit,
144    ctype_space, ctype_space,    ctype_space, ctype_space,
145    ctype_word,  ctype_word,    ctype_word,  ctype_word,
146    0                               /* OP_ANY */    0, 0                            /* OP_ANY, OP_ALLANY */
147  };  };
148    
149  static uschar toptable2[] = {  static const uschar toptable2[] = {
150    0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
151    ctype_digit, 0,    ctype_digit, 0,
152    ctype_space, 0,    ctype_space, 0,
153    ctype_word,  0,    ctype_word,  0,
154    1                               /* OP_ANY */    1, 1                            /* OP_ANY, OP_ALLANY */
155  };  };
156    
157    
# Line 223  Arguments: Line 223  Arguments:
223    rlevel            function call recursion level    rlevel            function call recursion level
224    recursing         regex recursive call level    recursing         regex recursive call level
225    
226  Returns:            > 0 =>  Returns:            > 0 => number of match offset pairs placed in offsets
227                      = 0 =>                      = 0 => offsets overflowed; longest matches are present
228                       -1 => failed to match                       -1 => failed to match
229                     < -1 => some kind of unexpected problem                     < -1 => some kind of unexpected problem
230    
# Line 694  for (;;) Line 694  for (;;)
694        break;        break;
695    
696        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
697          case OP_SKIPZERO:
698          code += 1 + GET(code, 2);
699          while (*code == OP_ALT) code += GET(code, 1);
700          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
701          break;
702    
703          /*-----------------------------------------------------------------*/
704        case OP_CIRC:        case OP_CIRC:
705        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
706            ((ims & PCRE_MULTILINE) != 0 &&            ((ims & PCRE_MULTILINE) != 0 &&
# Line 737  for (;;) Line 744  for (;;)
744        break;        break;
745    
746        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
747          case OP_ALLANY:
748          if (clen > 0)
749            { ADD_NEW(state_offset + 1, 0); }
750          break;
751    
752          /*-----------------------------------------------------------------*/
753        case OP_EODN:        case OP_EODN:
754        if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))        if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
755          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
# Line 852  for (;;) Line 865  for (;;)
865  /* ========================================================================== */  /* ========================================================================== */
866        /* These opcodes likewise inspect the subject character, but have an        /* These opcodes likewise inspect the subject character, but have an
867        argument that is not a data character. It is one of these opcodes:        argument that is not a data character. It is one of these opcodes:
868        OP_ANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE, OP_WORDCHAR,        OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE,
869        OP_NOT_WORDCHAR. The value is loaded into d. */        OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. */
870    
871        case OP_TYPEPLUS:        case OP_TYPEPLUS:
872        case OP_TYPEMINPLUS:        case OP_TYPEMINPLUS:
# Line 1080  for (;;) Line 1093  for (;;)
1093          int ncount = 0;          int ncount = 0;
1094          switch (c)          switch (c)
1095            {            {
           case 0x000d:  
           if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;  
           /* Fall through */  
           case 0x000a:  
1096            case 0x000b:            case 0x000b:
1097            case 0x000c:            case 0x000c:
1098            case 0x0085:            case 0x0085:
1099            case 0x2028:            case 0x2028:
1100            case 0x2029:            case 0x2029:
1101              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1102              goto ANYNL01;
1103    
1104              case 0x000d:
1105              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1106              /* Fall through */
1107    
1108              ANYNL01:
1109              case 0x000a:
1110            if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)            if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1111              {              {
1112              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
# Line 1097  for (;;) Line 1115  for (;;)
1115            count++;            count++;
1116            ADD_NEW_DATA(-state_offset, count, ncount);            ADD_NEW_DATA(-state_offset, count, ncount);
1117            break;            break;
1118    
1119            default:            default:
1120            break;            break;
1121            }            }
# Line 1313  for (;;) Line 1332  for (;;)
1332          int ncount = 0;          int ncount = 0;
1333          switch (c)          switch (c)
1334            {            {
           case 0x000d:  
           if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;  
           /* Fall through */  
           case 0x000a:  
1335            case 0x000b:            case 0x000b:
1336            case 0x000c:            case 0x000c:
1337            case 0x0085:            case 0x0085:
1338            case 0x2028:            case 0x2028:
1339            case 0x2029:            case 0x2029:
1340              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1341              goto ANYNL02;
1342    
1343              case 0x000d:
1344              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1345              /* Fall through */
1346    
1347              ANYNL02:
1348              case 0x000a:
1349            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1350                codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)                codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1351              {              {
# Line 1330  for (;;) Line 1354  for (;;)
1354              }              }
1355            ADD_NEW_DATA(-(state_offset + count), 0, ncount);            ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1356            break;            break;
1357    
1358            default:            default:
1359            break;            break;
1360            }            }
# Line 1545  for (;;) Line 1570  for (;;)
1570          int ncount = 0;          int ncount = 0;
1571          switch (c)          switch (c)
1572            {            {
           case 0x000d:  
           if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;  
           /* Fall through */  
           case 0x000a:  
1573            case 0x000b:            case 0x000b:
1574            case 0x000c:            case 0x000c:
1575            case 0x0085:            case 0x0085:
1576            case 0x2028:            case 0x2028:
1577            case 0x2029:            case 0x2029:
1578              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1579              goto ANYNL03;
1580    
1581              case 0x000d:
1582              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1583              /* Fall through */
1584    
1585              ANYNL03:
1586              case 0x000a:
1587            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
1588              {              {
1589              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
# Line 1564  for (;;) Line 1594  for (;;)
1594            else            else
1595              { ADD_NEW_DATA(-state_offset, count, ncount); }              { ADD_NEW_DATA(-state_offset, count, ncount); }
1596            break;            break;
1597    
1598            default:            default:
1599            break;            break;
1600            }            }
# Line 1744  for (;;) Line 1775  for (;;)
1775        case OP_ANYNL:        case OP_ANYNL:
1776        if (clen > 0) switch(c)        if (clen > 0) switch(c)
1777          {          {
         case 0x000a:  
1778          case 0x000b:          case 0x000b:
1779          case 0x000c:          case 0x000c:
1780          case 0x0085:          case 0x0085:
1781          case 0x2028:          case 0x2028:
1782          case 0x2029:          case 0x2029:
1783            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1784    
1785            case 0x000a:
1786          ADD_NEW(state_offset + 1, 0);          ADD_NEW(state_offset + 1, 0);
1787          break;          break;
1788    
1789          case 0x000d:          case 0x000d:
1790          if (ptr + 1 < end_subject && ptr[1] == 0x0a)          if (ptr + 1 < end_subject && ptr[1] == 0x0a)
1791            {            {
# Line 2141  for (;;) Line 2175  for (;;)
2175    
2176  /* ========================================================================== */  /* ========================================================================== */
2177        /* These are the opcodes for fancy brackets of various kinds. We have        /* These are the opcodes for fancy brackets of various kinds. We have
2178        to use recursion in order to handle them. */        to use recursion in order to handle them. The "always failing" assersion
2179          (?!) is optimised when compiling to OP_FAIL, so we have to support that,
2180          though the other "backtracking verbs" are not supported. */
2181    
2182          case OP_FAIL:
2183          break;
2184    
2185        case OP_ASSERT:        case OP_ASSERT:
2186        case OP_ASSERT_NOT:        case OP_ASSERT_NOT:
# Line 2574  md->end_subject = end_subject; Line 2613  md->end_subject = end_subject;
2613  md->moptions = options;  md->moptions = options;
2614  md->poptions = re->options;  md->poptions = re->options;
2615    
2616    /* If the BSR option is not set at match time, copy what was set
2617    at compile time. */
2618    
2619    if ((md->moptions & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 0)
2620      {
2621      if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
2622        md->moptions |= re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE);
2623    #ifdef BSR_ANYCRLF
2624      else md->moptions |= PCRE_BSR_ANYCRLF;
2625    #endif
2626      }
2627    
2628  /* Handle different types of newline. The three bits give eight cases. If  /* Handle different types of newline. The three bits give eight cases. If
2629  nothing is set at run time, whatever was used at compile time applies. */  nothing is set at run time, whatever was used at compile time applies. */
2630    
# Line 2644  if (md->tables == NULL) md->tables = _pc Line 2695  if (md->tables == NULL) md->tables = _pc
2695  used in a loop when finding where to start. */  used in a loop when finding where to start. */
2696    
2697  lcc = md->tables + lcc_offset;  lcc = md->tables + lcc_offset;
2698  startline = (re->options & PCRE_STARTLINE) != 0;  startline = (re->flags & PCRE_STARTLINE) != 0;
2699  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
2700    
2701  /* Set up the first character to match, if available. The first_byte value is  /* Set up the first character to match, if available. The first_byte value is
# Line 2655  studied, there may be a bitmap of possib Line 2706  studied, there may be a bitmap of possib
2706    
2707  if (!anchored)  if (!anchored)
2708    {    {
2709    if ((re->options & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
2710      {      {
2711      first_byte = re->first_byte & 255;      first_byte = re->first_byte & 255;
2712      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
# Line 2672  if (!anchored) Line 2723  if (!anchored)
2723  /* For anchored or unanchored matches, there may be a "last known required  /* For anchored or unanchored matches, there may be a "last known required
2724  character" set. */  character" set. */
2725    
2726  if ((re->options & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
2727    {    {
2728    req_byte = re->req_byte & 255;    req_byte = re->req_byte & 255;
2729    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
# Line 2842  for (;;) Line 2893  for (;;)
2893      }      }
2894    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
2895    
2896    /* If we have just passed a CR and we are now at a LF, and the pattern does    /* If we have just passed a CR and we are now at a LF, and the pattern does
2897    not contain any explicit matches for \r or \n, and the newline option is CRLF    not contain any explicit matches for \r or \n, and the newline option is CRLF
2898    or ANY or ANYCRLF, advance the match position by one more character. */    or ANY or ANYCRLF, advance the match position by one more character. */
2899    
2900    if (current_subject[-1] == '\r' &&    if (current_subject[-1] == '\r' &&
2901        current_subject < end_subject &&        current_subject < end_subject &&
2902        *current_subject == '\n' &&        *current_subject == '\n' &&
2903        (re->options & PCRE_HASCRORLF) == 0 &&        (re->flags & PCRE_HASCRORLF) == 0 &&
2904          (md->nltype == NLTYPE_ANY ||          (md->nltype == NLTYPE_ANY ||
2905           md->nltype == NLTYPE_ANYCRLF ||           md->nltype == NLTYPE_ANYCRLF ||
2906           md->nllen == 2))           md->nllen == 2))

Legend:
Removed from v.226  
changed lines
  Added in v.341

  ViewVC Help
Powered by ViewVC 1.1.5