/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 87 by nigel, Sat Feb 24 21:41:21 2007 UTC revision 200 by ph10, Wed Aug 1 09:10:40 2007 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 39  POSSIBILITY OF SUCH DAMAGE. Line 39  POSSIBILITY OF SUCH DAMAGE.
39    
40    
41  /* This module contains the external function pcre_dfa_exec(), which is an  /* This module contains the external function pcre_dfa_exec(), which is an
42  alternative matching function that uses a DFA algorithm. This is NOT Perl-  alternative matching function that uses a sort of DFA algorithm (not a true
43  compatible, but it has advantages in certain applications. */  FSM). This is NOT Perl- compatible, but it has advantages in certain
44    applications. */
45    
46    
47    #ifdef HAVE_CONFIG_H
48    #include <config.h>
49    #endif
50    
51    #define NLBLOCK md             /* Block containing newline information */
52    #define PSSTART start_subject  /* Field containing processed string start */
53    #define PSEND   end_subject    /* Field containing processed string end */
54    
55  #include "pcre_internal.h"  #include "pcre_internal.h"
56    
57    
# Line 57  compatible, but it has advantages in cer Line 66  compatible, but it has advantages in cer
66  *************************************************/  *************************************************/
67    
68  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
69  into others, under special conditions. A gap of 10 between the blocks should be  into others, under special conditions. A gap of 20 between the blocks should be
70  enough. */  enough. The resulting opcodes don't have to be less than 256 because they are
71    never stored, so we push them well clear of the normal opcodes. */
72  #define OP_PROP_EXTRA    (EXTRACT_BASIC_MAX+1)  
73  #define OP_EXTUNI_EXTRA  (EXTRACT_BASIC_MAX+11)  #define OP_PROP_EXTRA       300
74    #define OP_EXTUNI_EXTRA     320
75    #define OP_ANYNL_EXTRA      340
76    #define OP_HSPACE_EXTRA     360
77    #define OP_VSPACE_EXTRA     380
78    
79    
80  /* This table identifies those opcodes that are followed immediately by a  /* This table identifies those opcodes that are followed immediately by a
81  character that is to be tested in some way. This makes is possible to  character that is to be tested in some way. This makes is possible to
82  centralize the loading of these characters. In the case of Type * etc, the  centralize the loading of these characters. In the case of Type * etc, the
83  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
84  small value. */  small value. ***NOTE*** If the start of this table is modified, the two tables
85    that follow must also be modified. */
86    
87  static uschar coptable[] = {  static uschar coptable[] = {
88    0,                             /* End                                    */    0,                             /* End                                    */
89    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* \A, \G, \B, \b, \D, \d, \S, \s, \W, \w */    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
90      0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
91    0, 0,                          /* Any, Anybyte                           */    0, 0,                          /* Any, Anybyte                           */
92    0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */    0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */
93      0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
94    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */
95    1,                             /* Char                                   */    1,                             /* Char                                   */
96    1,                             /* Charnc                                 */    1,                             /* Charnc                                 */
# Line 82  static uschar coptable[] = { Line 98  static uschar coptable[] = {
98    /* Positive single-char repeats                                          */    /* Positive single-char repeats                                          */
99    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
100    3, 3, 3,                       /* upto, minupto, exact                   */    3, 3, 3,                       /* upto, minupto, exact                   */
101      1, 1, 1, 3,                    /* *+, ++, ?+, upto+                      */
102    /* Negative single-char repeats - only for chars < 256                   */    /* Negative single-char repeats - only for chars < 256                   */
103    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
104    3, 3, 3,                       /* NOT upto, minupto, exact               */    3, 3, 3,                       /* NOT upto, minupto, exact               */
105      1, 1, 1, 3,                    /* NOT *+, ++, ?+, updo+                  */
106    /* Positive type repeats                                                 */    /* Positive type repeats                                                 */
107    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
108    3, 3, 3,                       /* Type upto, minupto, exact              */    3, 3, 3,                       /* Type upto, minupto, exact              */
109      1, 1, 1, 3,                    /* Type *+, ++, ?+, upto+                 */
110    /* Character class & ref repeats                                         */    /* Character class & ref repeats                                         */
111    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */
112    0, 0,                          /* CRRANGE, CRMINRANGE                    */    0, 0,                          /* CRRANGE, CRMINRANGE                    */
# Line 106  static uschar coptable[] = { Line 125  static uschar coptable[] = {
125    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
126    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
127    0,                             /* Reverse                                */    0,                             /* Reverse                                */
128    0,                             /* Once                                   */    0, 0, 0, 0,                    /* ONCE, BRA, CBRA, COND                  */
129    0,                             /* COND                                   */    0, 0, 0,                       /* SBRA, SCBRA, SCOND                     */
130    0,                             /* CREF                                   */    0,                             /* CREF                                   */
131    0, 0,                          /* BRAZERO, BRAMINZERO                    */    0,                             /* RREF                                   */
132    0,                             /* BRANUMBER                              */    0,                             /* DEF                                    */
133    0                              /* BRA                                    */    0, 0                           /* BRAZERO, BRAMINZERO                    */
134  };  };
135    
136  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
137  and \w */  and \w */
138    
139  static uschar toptable1[] = {  static uschar toptable1[] = {
140    0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
141    ctype_digit, ctype_digit,    ctype_digit, ctype_digit,
142    ctype_space, ctype_space,    ctype_space, ctype_space,
143    ctype_word,  ctype_word,    ctype_word,  ctype_word,
# Line 126  static uschar toptable1[] = { Line 145  static uschar toptable1[] = {
145  };  };
146    
147  static uschar toptable2[] = {  static uschar toptable2[] = {
148    0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
149    ctype_digit, 0,    ctype_digit, 0,
150    ctype_space, 0,    ctype_space, 0,
151    ctype_word,  0,    ctype_word,  0,
# Line 277  stateblock *next_active_state, *next_new Line 296  stateblock *next_active_state, *next_new
296    
297  const uschar *ctypes, *lcc, *fcc;  const uschar *ctypes, *lcc, *fcc;
298  const uschar *ptr;  const uschar *ptr;
299  const uschar *end_code;  const uschar *end_code, *first_op;
300    
301  int active_count, new_count, match_count;  int active_count, new_count, match_count;
302    
# Line 290  const uschar *start_code = md->start_cod Line 309  const uschar *start_code = md->start_cod
309    
310  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
311  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;
312    #else
313    BOOL utf8 = FALSE;
314  #endif  #endif
315    
316  rlevel++;  rlevel++;
# Line 313  active_states = (stateblock *)(workspace Line 334  active_states = (stateblock *)(workspace
334  next_new_state = new_states = active_states + wscount;  next_new_state = new_states = active_states + wscount;
335  new_count = 0;  new_count = 0;
336    
337    first_op = this_start_code + 1 + LINK_SIZE +
338      ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
339    
340  /* The first thing in any (sub) pattern is a bracket of some sort. Push all  /* The first thing in any (sub) pattern is a bracket of some sort. Push all
341  the alternative states onto the list, and find out where the end is. This  the alternative states onto the list, and find out where the end is. This
342  makes is possible to use this function recursively, when we want to stop at a  makes is possible to use this function recursively, when we want to stop at a
# Line 322  If the first opcode in the first alterna Line 346  If the first opcode in the first alterna
346  a backward assertion. In that case, we have to find out the maximum amount to  a backward assertion. In that case, we have to find out the maximum amount to
347  move back, and set up each alternative appropriately. */  move back, and set up each alternative appropriately. */
348    
349  if (this_start_code[1+LINK_SIZE] == OP_REVERSE)  if (*first_op == OP_REVERSE)
350    {    {
351    int max_back = 0;    int max_back = 0;
352    int gone_back;    int gone_back;
# Line 404  else Line 428  else
428    
429    else    else
430      {      {
431        int length = 1 + LINK_SIZE +
432          ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
433      do      do
434        {        {
435        ADD_NEW(end_code - start_code + 1 + LINK_SIZE, 0);        ADD_NEW(end_code - start_code + length, 0);
436        end_code += GET(end_code, 1);        end_code += GET(end_code, 1);
437          length = 1 + LINK_SIZE;
438        }        }
439      while (*end_code == OP_ALT);      while (*end_code == OP_ALT);
440      }      }
# Line 423  ptr = current_subject; Line 450  ptr = current_subject;
450  for (;;)  for (;;)
451    {    {
452    int i, j;    int i, j;
453    int c, d, clen, dlen;    int clen, dlen;
454      unsigned int c, d;
455    
456    /* Make the new state list into the active state list and empty the    /* Make the new state list into the active state list and empty the
457    new state list. */    new state list. */
# Line 459  for (;;) Line 487  for (;;)
487    
488    if (ptr < end_subject)    if (ptr < end_subject)
489      {      {
490      clen = 1;      clen = 1;        /* Number of bytes in the character */
491  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
492      if (utf8) { GETCHARLEN(c, ptr, clen); } else      if (utf8) { GETCHARLEN(c, ptr, clen); } else
493  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
# Line 467  for (;;) Line 495  for (;;)
495      }      }
496    else    else
497      {      {
498      clen = 0;    /* At end subject */      clen = 0;        /* This indicates the end of the subject */
499      c = -1;      c = NOTACHAR;    /* This value should never actually be used */
500      }      }
501    
502    /* Scan up the active states and act on each one. The result of an action    /* Scan up the active states and act on each one. The result of an action
# Line 482  for (;;) Line 510  for (;;)
510      const uschar *code;      const uschar *code;
511      int state_offset = current_state->offset;      int state_offset = current_state->offset;
512      int count, codevalue;      int count, codevalue;
513    #ifdef SUPPORT_UCP
514      int chartype, script;      int chartype, script;
515    #endif
516    
517  #ifdef DEBUG  #ifdef DEBUG
518      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
519      if (c < 0) printf("-1\n");      if (clen == 0) printf("EOL\n");
520        else if (c > 32 && c < 127) printf("'%c'\n", c);        else if (c > 32 && c < 127) printf("'%c'\n", c);
521          else printf("0x%02x\n", c);          else printf("0x%02x\n", c);
522  #endif  #endif
# Line 530  for (;;) Line 560  for (;;)
560    
561      code = start_code + state_offset;      code = start_code + state_offset;
562      codevalue = *code;      codevalue = *code;
     if (codevalue >= OP_BRA) codevalue = OP_BRA; /* All brackets are equal */  
563    
564      /* If this opcode is followed by an inline character, load it. It is      /* If this opcode is followed by an inline character, load it. It is
565      tempting to test for the presence of a subject character here, but that      tempting to test for the presence of a subject character here, but that
# Line 538  for (;;) Line 567  for (;;)
567      permitted.      permitted.
568    
569      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
570      argument that is not a data character - but is always one byte long.      argument that is not a data character - but is always one byte long. We
571      Unfortunately, we have to take special action to deal with  \P, \p, and      have to take special action to deal with  \P, \p, \H, \h, \V, \v and \X in
572      \X in this case. To keep the other cases fast, convert these ones to new      this case. To keep the other cases fast, convert these ones to new opcodes.
573      opcodes. */      */
574    
575      if (coptable[codevalue] > 0)      if (coptable[codevalue] > 0)
576        {        {
# Line 552  for (;;) Line 581  for (;;)
581        d = code[coptable[codevalue]];        d = code[coptable[codevalue]];
582        if (codevalue >= OP_TYPESTAR)        if (codevalue >= OP_TYPESTAR)
583          {          {
584          if (d == OP_ANYBYTE) return PCRE_ERROR_DFA_UITEM;          switch(d)
585          if (d >= OP_NOTPROP)            {
586            codevalue += (d == OP_EXTUNI)? OP_EXTUNI_EXTRA : OP_PROP_EXTRA;            case OP_ANYBYTE: return PCRE_ERROR_DFA_UITEM;
587              case OP_NOTPROP:
588              case OP_PROP: codevalue += OP_PROP_EXTRA; break;
589              case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
590              case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
591              case OP_NOT_HSPACE:
592              case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
593              case OP_NOT_VSPACE:
594              case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
595              default: break;
596              }
597          }          }
598        }        }
599      else      else
600        {        {
601        dlen = 0;         /* Not strictly necessary, but compilers moan */        dlen = 0;         /* Not strictly necessary, but compilers moan */
602        d = -1;           /* if these variables are not set. */        d = NOTACHAR;     /* if these variables are not set. */
603        }        }
604    
605    
# Line 622  for (;;) Line 661  for (;;)
661    
662        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
663        case OP_BRA:        case OP_BRA:
664          case OP_SBRA:
665        do        do
666          {          {
667          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
# Line 631  for (;;) Line 671  for (;;)
671        break;        break;
672    
673        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
674          case OP_CBRA:
675          case OP_SCBRA:
676          ADD_ACTIVE(code - start_code + 3 + LINK_SIZE,  0);
677          code += GET(code, 1);
678          while (*code == OP_ALT)
679            {
680            ADD_ACTIVE(code - start_code + 1 + LINK_SIZE,  0);
681            code += GET(code, 1);
682            }
683          break;
684    
685          /*-----------------------------------------------------------------*/
686        case OP_BRAZERO:        case OP_BRAZERO:
687        case OP_BRAMINZERO:        case OP_BRAMINZERO:
688        ADD_ACTIVE(state_offset + 1, 0);        ADD_ACTIVE(state_offset + 1, 0);
# Line 640  for (;;) Line 692  for (;;)
692        break;        break;
693    
694        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
       case OP_BRANUMBER:  
       ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);  
       break;  
   
       /*-----------------------------------------------------------------*/  
695        case OP_CIRC:        case OP_CIRC:
696        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
697            ((ims & PCRE_MULTILINE) != 0 && ptr[-1] == NEWLINE))            ((ims & PCRE_MULTILINE) != 0 &&
698                ptr != end_subject &&
699                WAS_NEWLINE(ptr)))
700          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
701        break;        break;
702    
# Line 681  for (;;) Line 730  for (;;)
730    
731        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
732        case OP_ANY:        case OP_ANY:
733        if (clen > 0 && (c != NEWLINE || (ims & PCRE_DOTALL) != 0))        if (clen > 0 && ((ims & PCRE_DOTALL) != 0 || !IS_NEWLINE(ptr)))
734          { ADD_NEW(state_offset + 1, 0); }          { ADD_NEW(state_offset + 1, 0); }
735        break;        break;
736    
737        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
738        case OP_EODN:        case OP_EODN:
739        if (clen == 0 || (c == NEWLINE && ptr + 1 == end_subject))        if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
740          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
741        break;        break;
742    
# Line 695  for (;;) Line 744  for (;;)
744        case OP_DOLL:        case OP_DOLL:
745        if ((md->moptions & PCRE_NOTEOL) == 0)        if ((md->moptions & PCRE_NOTEOL) == 0)
746          {          {
747          if (clen == 0 || (c == NEWLINE && (ptr + 1 == end_subject ||          if (clen == 0 ||
748                                  (ims & PCRE_MULTILINE) != 0)))              (IS_NEWLINE(ptr) &&
749                   ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)
750                ))
751            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
752          }          }
753        else if (c == NEWLINE && (ims & PCRE_MULTILINE) != 0)        else if ((ims & PCRE_MULTILINE) != 0 && IS_NEWLINE(ptr))
754          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
755        break;        break;
756    
# Line 748  for (;;) Line 799  for (;;)
799        break;        break;
800    
801    
 #ifdef SUPPORT_UCP  
   
802        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
803        /* Check the next character by Unicode property. We will get here only        /* Check the next character by Unicode property. We will get here only
804        if the support is in the binary; otherwise a compile-time error occurs.        if the support is in the binary; otherwise a compile-time error occurs.
805        */        */
806    
807    #ifdef SUPPORT_UCP
808        case OP_PROP:        case OP_PROP:
809        case OP_NOTPROP:        case OP_NOTPROP:
810        if (clen > 0)        if (clen > 0)
# Line 805  for (;;) Line 855  for (;;)
855    
856        case OP_TYPEPLUS:        case OP_TYPEPLUS:
857        case OP_TYPEMINPLUS:        case OP_TYPEMINPLUS:
858          case OP_TYPEPOSPLUS:
859        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
860        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
861        if (clen > 0)        if (clen > 0)
862          {          {
863          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
864              (c < 256 &&              (c < 256 &&
865                (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&                (d != OP_ANY ||
866                   (ims & PCRE_DOTALL) != 0 ||
867                   !IS_NEWLINE(ptr)
868                  ) &&
869                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
870            {            {
871              if (count > 0 && codevalue == OP_TYPEPOSPLUS)
872                {
873                active_count--;            /* Remove non-match possibility */
874                next_active_state--;
875                }
876            count++;            count++;
877            ADD_NEW(state_offset, count);            ADD_NEW(state_offset, count);
878            }            }
# Line 823  for (;;) Line 882  for (;;)
882        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
883        case OP_TYPEQUERY:        case OP_TYPEQUERY:
884        case OP_TYPEMINQUERY:        case OP_TYPEMINQUERY:
885          case OP_TYPEPOSQUERY:
886        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
887        if (clen > 0)        if (clen > 0)
888          {          {
889          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
890              (c < 256 &&              (c < 256 &&
891                (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&                (d != OP_ANY ||
892                   (ims & PCRE_DOTALL) != 0 ||
893                   !IS_NEWLINE(ptr)
894                  ) &&
895                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
896            {            {
897              if (codevalue == OP_TYPEPOSQUERY)
898                {
899                active_count--;            /* Remove non-match possibility */
900                next_active_state--;
901                }
902            ADD_NEW(state_offset + 2, 0);            ADD_NEW(state_offset + 2, 0);
903            }            }
904          }          }
# Line 839  for (;;) Line 907  for (;;)
907        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
908        case OP_TYPESTAR:        case OP_TYPESTAR:
909        case OP_TYPEMINSTAR:        case OP_TYPEMINSTAR:
910          case OP_TYPEPOSSTAR:
911        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
912        if (clen > 0)        if (clen > 0)
913          {          {
914          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
915              (c < 256 &&              (c < 256 &&
916                (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&                (d != OP_ANY ||
917                   (ims & PCRE_DOTALL) != 0 ||
918                   !IS_NEWLINE(ptr)
919                  ) &&
920                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
921            {            {
922              if (codevalue == OP_TYPEPOSSTAR)
923                {
924                active_count--;            /* Remove non-match possibility */
925                next_active_state--;
926                }
927            ADD_NEW(state_offset, 0);            ADD_NEW(state_offset, 0);
928            }            }
929          }          }
# Line 854  for (;;) Line 931  for (;;)
931    
932        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
933        case OP_TYPEEXACT:        case OP_TYPEEXACT:
934          count = current_state->count;  /* Number already matched */
935          if (clen > 0)
936            {
937            if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
938                (c < 256 &&
939                  (d != OP_ANY ||
940                   (ims & PCRE_DOTALL) != 0 ||
941                   !IS_NEWLINE(ptr)
942                  ) &&
943                  ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
944              {
945              if (++count >= GET2(code, 1))
946                { ADD_NEW(state_offset + 4, 0); }
947              else
948                { ADD_NEW(state_offset, count); }
949              }
950            }
951          break;
952    
953          /*-----------------------------------------------------------------*/
954        case OP_TYPEUPTO:        case OP_TYPEUPTO:
955        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
956        if (codevalue != OP_TYPEEXACT)        case OP_TYPEPOSUPTO:
957          { ADD_ACTIVE(state_offset + 4, 0); }        ADD_ACTIVE(state_offset + 4, 0);
958        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
959        if (clen > 0)        if (clen > 0)
960          {          {
961          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
962              (c < 256 &&              (c < 256 &&
963                (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&                (d != OP_ANY ||
964                   (ims & PCRE_DOTALL) != 0 ||
965                   !IS_NEWLINE(ptr)
966                  ) &&
967                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
968            {            {
969              if (codevalue == OP_TYPEPOSUPTO)
970                {
971                active_count--;           /* Remove non-match possibility */
972                next_active_state--;
973                }
974            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
975              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 4, 0); }
976            else            else
# Line 876  for (;;) Line 981  for (;;)
981    
982  /* ========================================================================== */  /* ========================================================================== */
983        /* These are virtual opcodes that are used when something like        /* These are virtual opcodes that are used when something like
984        OP_TYPEPLUS has OP_PROP, OP_NOTPROP, or OP_EXTUNI as its argument. It        OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its
985        keeps the code above fast for the other cases. The argument is in the        argument. It keeps the code above fast for the other cases. The argument
986        d variable. */        is in the d variable. */
987    
988    #ifdef SUPPORT_UCP
989        case OP_PROP_EXTRA + OP_TYPEPLUS:        case OP_PROP_EXTRA + OP_TYPEPLUS:
990        case OP_PROP_EXTRA + OP_TYPEMINPLUS:        case OP_PROP_EXTRA + OP_TYPEMINPLUS:
991          case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
992        count = current_state->count;           /* Already matched */        count = current_state->count;           /* Already matched */
993        if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
994        if (clen > 0)        if (clen > 0)
# Line 917  for (;;) Line 1024  for (;;)
1024            break;            break;
1025            }            }
1026    
1027          if (OK == (d == OP_PROP)) { count++; ADD_NEW(state_offset, count); }          if (OK == (d == OP_PROP))
1028              {
1029              if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
1030                {
1031                active_count--;           /* Remove non-match possibility */
1032                next_active_state--;
1033                }
1034              count++;
1035              ADD_NEW(state_offset, count);
1036              }
1037          }          }
1038        break;        break;
1039    
1040        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1041        case OP_EXTUNI_EXTRA + OP_TYPEPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
1042        case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
1043          case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1044        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1045        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1046        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
1047          {          {
1048          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1049          int ncount = 0;          int ncount = 0;
1050            if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
1051              {
1052              active_count--;           /* Remove non-match possibility */
1053              next_active_state--;
1054              }
1055          while (nptr < end_subject)          while (nptr < end_subject)
1056            {            {
1057            int nd;            int nd;
# Line 943  for (;;) Line 1065  for (;;)
1065          ADD_NEW_DATA(-state_offset, count, ncount);          ADD_NEW_DATA(-state_offset, count, ncount);
1066          }          }
1067        break;        break;
1068    #endif
1069    
1070        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1071          case OP_ANYNL_EXTRA + OP_TYPEPLUS:
1072          case OP_ANYNL_EXTRA + OP_TYPEMINPLUS:
1073          case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS:
1074          count = current_state->count;  /* Already matched */
1075          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1076          if (clen > 0)
1077            {
1078            int ncount = 0;
1079            switch (c)
1080              {
1081              case 0x000d:
1082              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1083              /* Fall through */
1084              case 0x000a:
1085              case 0x000b:
1086              case 0x000c:
1087              case 0x0085:
1088              case 0x2028:
1089              case 0x2029:
1090              if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1091                {
1092                active_count--;           /* Remove non-match possibility */
1093                next_active_state--;
1094                }
1095              count++;
1096              ADD_NEW_DATA(-state_offset, count, ncount);
1097              break;
1098              default:
1099              break;
1100              }
1101            }
1102          break;
1103    
1104          /*-----------------------------------------------------------------*/
1105          case OP_VSPACE_EXTRA + OP_TYPEPLUS:
1106          case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
1107          case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
1108          count = current_state->count;  /* Already matched */
1109          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1110          if (clen > 0)
1111            {
1112            BOOL OK;
1113            switch (c)
1114              {
1115              case 0x000a:
1116              case 0x000b:
1117              case 0x000c:
1118              case 0x000d:
1119              case 0x0085:
1120              case 0x2028:
1121              case 0x2029:
1122              OK = TRUE;
1123              break;
1124    
1125              default:
1126              OK = FALSE;
1127              break;
1128              }
1129    
1130            if (OK == (d == OP_VSPACE))
1131              {
1132              if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
1133                {
1134                active_count--;           /* Remove non-match possibility */
1135                next_active_state--;
1136                }
1137              count++;
1138              ADD_NEW_DATA(-state_offset, count, 0);
1139              }
1140            }
1141          break;
1142    
1143          /*-----------------------------------------------------------------*/
1144          case OP_HSPACE_EXTRA + OP_TYPEPLUS:
1145          case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
1146          case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
1147          count = current_state->count;  /* Already matched */
1148          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1149          if (clen > 0)
1150            {
1151            BOOL OK;
1152            switch (c)
1153              {
1154              case 0x09:      /* HT */
1155              case 0x20:      /* SPACE */
1156              case 0xa0:      /* NBSP */
1157              case 0x1680:    /* OGHAM SPACE MARK */
1158              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1159              case 0x2000:    /* EN QUAD */
1160              case 0x2001:    /* EM QUAD */
1161              case 0x2002:    /* EN SPACE */
1162              case 0x2003:    /* EM SPACE */
1163              case 0x2004:    /* THREE-PER-EM SPACE */
1164              case 0x2005:    /* FOUR-PER-EM SPACE */
1165              case 0x2006:    /* SIX-PER-EM SPACE */
1166              case 0x2007:    /* FIGURE SPACE */
1167              case 0x2008:    /* PUNCTUATION SPACE */
1168              case 0x2009:    /* THIN SPACE */
1169              case 0x200A:    /* HAIR SPACE */
1170              case 0x202f:    /* NARROW NO-BREAK SPACE */
1171              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1172              case 0x3000:    /* IDEOGRAPHIC SPACE */
1173              OK = TRUE;
1174              break;
1175    
1176              default:
1177              OK = FALSE;
1178              break;
1179              }
1180    
1181            if (OK == (d == OP_HSPACE))
1182              {
1183              if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
1184                {
1185                active_count--;           /* Remove non-match possibility */
1186                next_active_state--;
1187                }
1188              count++;
1189              ADD_NEW_DATA(-state_offset, count, 0);
1190              }
1191            }
1192          break;
1193    
1194          /*-----------------------------------------------------------------*/
1195    #ifdef SUPPORT_UCP
1196        case OP_PROP_EXTRA + OP_TYPEQUERY:        case OP_PROP_EXTRA + OP_TYPEQUERY:
1197        case OP_PROP_EXTRA + OP_TYPEMINQUERY:        case OP_PROP_EXTRA + OP_TYPEMINQUERY:
1198          case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
1199        count = 4;        count = 4;
1200        goto QS1;        goto QS1;
1201    
1202        case OP_PROP_EXTRA + OP_TYPESTAR:        case OP_PROP_EXTRA + OP_TYPESTAR:
1203        case OP_PROP_EXTRA + OP_TYPEMINSTAR:        case OP_PROP_EXTRA + OP_TYPEMINSTAR:
1204          case OP_PROP_EXTRA + OP_TYPEPOSSTAR:
1205        count = 0;        count = 0;
1206    
1207        QS1:        QS1:
# Line 990  for (;;) Line 1240  for (;;)
1240            break;            break;
1241            }            }
1242    
1243          if (OK == (d == OP_PROP)) { ADD_NEW(state_offset + count, 0); }          if (OK == (d == OP_PROP))
1244              {
1245              if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR ||
1246                  codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
1247                {
1248                active_count--;           /* Remove non-match possibility */
1249                next_active_state--;
1250                }
1251              ADD_NEW(state_offset + count, 0);
1252              }
1253          }          }
1254        break;        break;
1255    
1256        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1257        case OP_EXTUNI_EXTRA + OP_TYPEQUERY:        case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
1258        case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:        case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
1259          case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY:
1260        count = 2;        count = 2;
1261        goto QS2;        goto QS2;
1262    
1263        case OP_EXTUNI_EXTRA + OP_TYPESTAR:        case OP_EXTUNI_EXTRA + OP_TYPESTAR:
1264        case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:        case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
1265          case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR:
1266        count = 0;        count = 0;
1267    
1268        QS2:        QS2:
# Line 1011  for (;;) Line 1272  for (;;)
1272          {          {
1273          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1274          int ncount = 0;          int ncount = 0;
1275            if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
1276                codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
1277              {
1278              active_count--;           /* Remove non-match possibility */
1279              next_active_state--;
1280              }
1281          while (nptr < end_subject)          while (nptr < end_subject)
1282            {            {
1283            int nd;            int nd;
# Line 1023  for (;;) Line 1290  for (;;)
1290          ADD_NEW_DATA(-(state_offset + count), 0, ncount);          ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1291          }          }
1292        break;        break;
1293    #endif
1294    
1295          /*-----------------------------------------------------------------*/
1296          case OP_ANYNL_EXTRA + OP_TYPEQUERY:
1297          case OP_ANYNL_EXTRA + OP_TYPEMINQUERY:
1298          case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY:
1299          count = 2;
1300          goto QS3;
1301    
1302          case OP_ANYNL_EXTRA + OP_TYPESTAR:
1303          case OP_ANYNL_EXTRA + OP_TYPEMINSTAR:
1304          case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR:
1305          count = 0;
1306    
1307          QS3:
1308          ADD_ACTIVE(state_offset + 2, 0);
1309          if (clen > 0)
1310            {
1311            int ncount = 0;
1312            switch (c)
1313              {
1314              case 0x000d:
1315              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1316              /* Fall through */
1317              case 0x000a:
1318              case 0x000b:
1319              case 0x000c:
1320              case 0x0085:
1321              case 0x2028:
1322              case 0x2029:
1323              if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1324                  codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1325                {
1326                active_count--;           /* Remove non-match possibility */
1327                next_active_state--;
1328                }
1329              ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1330              break;
1331              default:
1332              break;
1333              }
1334            }
1335          break;
1336    
1337          /*-----------------------------------------------------------------*/
1338          case OP_VSPACE_EXTRA + OP_TYPEQUERY:
1339          case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
1340          case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
1341          count = 2;
1342          goto QS4;
1343    
1344          case OP_VSPACE_EXTRA + OP_TYPESTAR:
1345          case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
1346          case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
1347          count = 0;
1348    
1349          QS4:
1350          ADD_ACTIVE(state_offset + 2, 0);
1351          if (clen > 0)
1352            {
1353            BOOL OK;
1354            switch (c)
1355              {
1356              case 0x000a:
1357              case 0x000b:
1358              case 0x000c:
1359              case 0x000d:
1360              case 0x0085:
1361              case 0x2028:
1362              case 0x2029:
1363              OK = TRUE;
1364              break;
1365    
1366              default:
1367              OK = FALSE;
1368              break;
1369              }
1370            if (OK == (d == OP_VSPACE))
1371              {
1372              if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
1373                  codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
1374                {
1375                active_count--;           /* Remove non-match possibility */
1376                next_active_state--;
1377                }
1378              ADD_NEW_DATA(-(state_offset + count), 0, 0);
1379              }
1380            }
1381          break;
1382    
1383          /*-----------------------------------------------------------------*/
1384          case OP_HSPACE_EXTRA + OP_TYPEQUERY:
1385          case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
1386          case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
1387          count = 2;
1388          goto QS5;
1389    
1390          case OP_HSPACE_EXTRA + OP_TYPESTAR:
1391          case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
1392          case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
1393          count = 0;
1394    
1395          QS5:
1396          ADD_ACTIVE(state_offset + 2, 0);
1397          if (clen > 0)
1398            {
1399            BOOL OK;
1400            switch (c)
1401              {
1402              case 0x09:      /* HT */
1403              case 0x20:      /* SPACE */
1404              case 0xa0:      /* NBSP */
1405              case 0x1680:    /* OGHAM SPACE MARK */
1406              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1407              case 0x2000:    /* EN QUAD */
1408              case 0x2001:    /* EM QUAD */
1409              case 0x2002:    /* EN SPACE */
1410              case 0x2003:    /* EM SPACE */
1411              case 0x2004:    /* THREE-PER-EM SPACE */
1412              case 0x2005:    /* FOUR-PER-EM SPACE */
1413              case 0x2006:    /* SIX-PER-EM SPACE */
1414              case 0x2007:    /* FIGURE SPACE */
1415              case 0x2008:    /* PUNCTUATION SPACE */
1416              case 0x2009:    /* THIN SPACE */
1417              case 0x200A:    /* HAIR SPACE */
1418              case 0x202f:    /* NARROW NO-BREAK SPACE */
1419              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1420              case 0x3000:    /* IDEOGRAPHIC SPACE */
1421              OK = TRUE;
1422              break;
1423    
1424              default:
1425              OK = FALSE;
1426              break;
1427              }
1428    
1429            if (OK == (d == OP_HSPACE))
1430              {
1431              if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
1432                  codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
1433                {
1434                active_count--;           /* Remove non-match possibility */
1435                next_active_state--;
1436                }
1437              ADD_NEW_DATA(-(state_offset + count), 0, 0);
1438              }
1439            }
1440          break;
1441    
1442        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1443    #ifdef SUPPORT_UCP
1444        case OP_PROP_EXTRA + OP_TYPEEXACT:        case OP_PROP_EXTRA + OP_TYPEEXACT:
1445        case OP_PROP_EXTRA + OP_TYPEUPTO:        case OP_PROP_EXTRA + OP_TYPEUPTO:
1446        case OP_PROP_EXTRA + OP_TYPEMINUPTO:        case OP_PROP_EXTRA + OP_TYPEMINUPTO:
1447          case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
1448        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1449          { ADD_ACTIVE(state_offset + 6, 0); }          { ADD_ACTIVE(state_offset + 6, 0); }
1450        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
# Line 1066  for (;;) Line 1483  for (;;)
1483    
1484          if (OK == (d == OP_PROP))          if (OK == (d == OP_PROP))
1485            {            {
1486              if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
1487                {
1488                active_count--;           /* Remove non-match possibility */
1489                next_active_state--;
1490                }
1491            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1492              { ADD_NEW(state_offset + 6, 0); }              { ADD_NEW(state_offset + 6, 0); }
1493            else            else
# Line 1078  for (;;) Line 1500  for (;;)
1500        case OP_EXTUNI_EXTRA + OP_TYPEEXACT:        case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
1501        case OP_EXTUNI_EXTRA + OP_TYPEUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
1502        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
1503          case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
1504        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1505          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 4, 0); }
1506        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
# Line 1085  for (;;) Line 1508  for (;;)
1508          {          {
1509          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1510          int ncount = 0;          int ncount = 0;
1511            if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
1512              {
1513              active_count--;           /* Remove non-match possibility */
1514              next_active_state--;
1515              }
1516          while (nptr < end_subject)          while (nptr < end_subject)
1517            {            {
1518            int nd;            int nd;
# Line 1100  for (;;) Line 1528  for (;;)
1528            { ADD_NEW_DATA(-state_offset, count, ncount); }            { ADD_NEW_DATA(-state_offset, count, ncount); }
1529          }          }
1530        break;        break;
1531    #endif
1532    
1533          /*-----------------------------------------------------------------*/
1534          case OP_ANYNL_EXTRA + OP_TYPEEXACT:
1535          case OP_ANYNL_EXTRA + OP_TYPEUPTO:
1536          case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
1537          case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
1538          if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
1539            { ADD_ACTIVE(state_offset + 4, 0); }
1540          count = current_state->count;  /* Number already matched */
1541          if (clen > 0)
1542            {
1543            int ncount = 0;
1544            switch (c)
1545              {
1546              case 0x000d:
1547              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1548              /* Fall through */
1549              case 0x000a:
1550              case 0x000b:
1551              case 0x000c:
1552              case 0x0085:
1553              case 0x2028:
1554              case 0x2029:
1555              if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
1556                {
1557                active_count--;           /* Remove non-match possibility */
1558                next_active_state--;
1559                }
1560              if (++count >= GET2(code, 1))
1561                { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }
1562              else
1563                { ADD_NEW_DATA(-state_offset, count, ncount); }
1564              break;
1565              default:
1566              break;
1567              }
1568            }
1569          break;
1570    
1571          /*-----------------------------------------------------------------*/
1572          case OP_VSPACE_EXTRA + OP_TYPEEXACT:
1573          case OP_VSPACE_EXTRA + OP_TYPEUPTO:
1574          case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
1575          case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
1576          if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
1577            { ADD_ACTIVE(state_offset + 4, 0); }
1578          count = current_state->count;  /* Number already matched */
1579          if (clen > 0)
1580            {
1581            BOOL OK;
1582            switch (c)
1583              {
1584              case 0x000a:
1585              case 0x000b:
1586              case 0x000c:
1587              case 0x000d:
1588              case 0x0085:
1589              case 0x2028:
1590              case 0x2029:
1591              OK = TRUE;
1592              break;
1593    
1594              default:
1595              OK = FALSE;
1596              }
1597    
1598            if (OK == (d == OP_VSPACE))
1599              {
1600              if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
1601                {
1602                active_count--;           /* Remove non-match possibility */
1603                next_active_state--;
1604                }
1605              if (++count >= GET2(code, 1))
1606                { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
1607              else
1608                { ADD_NEW_DATA(-state_offset, count, 0); }
1609              }
1610            }
1611          break;
1612    
1613          /*-----------------------------------------------------------------*/
1614          case OP_HSPACE_EXTRA + OP_TYPEEXACT:
1615          case OP_HSPACE_EXTRA + OP_TYPEUPTO:
1616          case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
1617          case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
1618          if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
1619            { ADD_ACTIVE(state_offset + 4, 0); }
1620          count = current_state->count;  /* Number already matched */
1621          if (clen > 0)
1622            {
1623            BOOL OK;
1624            switch (c)
1625              {
1626              case 0x09:      /* HT */
1627              case 0x20:      /* SPACE */
1628              case 0xa0:      /* NBSP */
1629              case 0x1680:    /* OGHAM SPACE MARK */
1630              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1631              case 0x2000:    /* EN QUAD */
1632              case 0x2001:    /* EM QUAD */
1633              case 0x2002:    /* EN SPACE */
1634              case 0x2003:    /* EM SPACE */
1635              case 0x2004:    /* THREE-PER-EM SPACE */
1636              case 0x2005:    /* FOUR-PER-EM SPACE */
1637              case 0x2006:    /* SIX-PER-EM SPACE */
1638              case 0x2007:    /* FIGURE SPACE */
1639              case 0x2008:    /* PUNCTUATION SPACE */
1640              case 0x2009:    /* THIN SPACE */
1641              case 0x200A:    /* HAIR SPACE */
1642              case 0x202f:    /* NARROW NO-BREAK SPACE */
1643              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1644              case 0x3000:    /* IDEOGRAPHIC SPACE */
1645              OK = TRUE;
1646              break;
1647    
1648              default:
1649              OK = FALSE;
1650              break;
1651              }
1652    
1653            if (OK == (d == OP_HSPACE))
1654              {
1655              if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
1656                {
1657                active_count--;           /* Remove non-match possibility */
1658                next_active_state--;
1659                }
1660              if (++count >= GET2(code, 1))
1661                { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
1662              else
1663                { ADD_NEW_DATA(-state_offset, count, 0); }
1664              }
1665            }
1666          break;
1667    
1668  /* ========================================================================== */  /* ========================================================================== */
1669        /* These opcodes are followed by a character that is usually compared        /* These opcodes are followed by a character that is usually compared
# Line 1121  for (;;) Line 1685  for (;;)
1685          {          {
1686          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
1687            {            {
1688            int othercase;            unsigned int othercase;
1689            if (c < 128) othercase = fcc[c]; else            if (c < 128) othercase = fcc[c]; else
1690    
1691            /* If we have Unicode property support, we can use it to test the            /* If we have Unicode property support, we can use it to test the
# Line 1130  for (;;) Line 1694  for (;;)
1694  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1695            othercase = _pcre_ucp_othercase(c);            othercase = _pcre_ucp_othercase(c);
1696  #else  #else
1697            othercase = -1;            othercase = NOTACHAR;
1698  #endif  #endif
1699    
1700            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
# Line 1171  for (;;) Line 1735  for (;;)
1735  #endif  #endif
1736    
1737        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1738          /* This is a tricky like EXTUNI because it too can match more than one
1739          character (when CR is followed by LF). In this case, set up a negative
1740          state to wait for one character to pass before continuing. */
1741    
1742          case OP_ANYNL:
1743          if (clen > 0) switch(c)
1744            {
1745            case 0x000a:
1746            case 0x000b:
1747            case 0x000c:
1748            case 0x0085:
1749            case 0x2028:
1750            case 0x2029:
1751            ADD_NEW(state_offset + 1, 0);
1752            break;
1753            case 0x000d:
1754            if (ptr + 1 < end_subject && ptr[1] == 0x0a)
1755              {
1756              ADD_NEW_DATA(-(state_offset + 1), 0, 1);
1757              }
1758            else
1759              {
1760              ADD_NEW(state_offset + 1, 0);
1761              }
1762            break;
1763            }
1764          break;
1765    
1766          /*-----------------------------------------------------------------*/
1767          case OP_NOT_VSPACE:
1768          if (clen > 0) switch(c)
1769            {
1770            case 0x000a:
1771            case 0x000b:
1772            case 0x000c:
1773            case 0x000d:
1774            case 0x0085:
1775            case 0x2028:
1776            case 0x2029:
1777            break;
1778    
1779            default:
1780            ADD_NEW(state_offset + 1, 0);
1781            break;
1782            }
1783          break;
1784    
1785          /*-----------------------------------------------------------------*/
1786          case OP_VSPACE:
1787          if (clen > 0) switch(c)
1788            {
1789            case 0x000a:
1790            case 0x000b:
1791            case 0x000c:
1792            case 0x000d:
1793            case 0x0085:
1794            case 0x2028:
1795            case 0x2029:
1796            ADD_NEW(state_offset + 1, 0);
1797            break;
1798    
1799            default: break;
1800            }
1801          break;
1802    
1803          /*-----------------------------------------------------------------*/
1804          case OP_NOT_HSPACE:
1805          if (clen > 0) switch(c)
1806            {
1807            case 0x09:      /* HT */
1808            case 0x20:      /* SPACE */
1809            case 0xa0:      /* NBSP */
1810            case 0x1680:    /* OGHAM SPACE MARK */
1811            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1812            case 0x2000:    /* EN QUAD */
1813            case 0x2001:    /* EM QUAD */
1814            case 0x2002:    /* EN SPACE */
1815            case 0x2003:    /* EM SPACE */
1816            case 0x2004:    /* THREE-PER-EM SPACE */
1817            case 0x2005:    /* FOUR-PER-EM SPACE */
1818            case 0x2006:    /* SIX-PER-EM SPACE */
1819            case 0x2007:    /* FIGURE SPACE */
1820            case 0x2008:    /* PUNCTUATION SPACE */
1821            case 0x2009:    /* THIN SPACE */
1822            case 0x200A:    /* HAIR SPACE */
1823            case 0x202f:    /* NARROW NO-BREAK SPACE */
1824            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1825            case 0x3000:    /* IDEOGRAPHIC SPACE */
1826            break;
1827    
1828            default:
1829            ADD_NEW(state_offset + 1, 0);
1830            break;
1831            }
1832          break;
1833    
1834          /*-----------------------------------------------------------------*/
1835          case OP_HSPACE:
1836          if (clen > 0) switch(c)
1837            {
1838            case 0x09:      /* HT */
1839            case 0x20:      /* SPACE */
1840            case 0xa0:      /* NBSP */
1841            case 0x1680:    /* OGHAM SPACE MARK */
1842            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1843            case 0x2000:    /* EN QUAD */
1844            case 0x2001:    /* EM QUAD */
1845            case 0x2002:    /* EN SPACE */
1846            case 0x2003:    /* EM SPACE */
1847            case 0x2004:    /* THREE-PER-EM SPACE */
1848            case 0x2005:    /* FOUR-PER-EM SPACE */
1849            case 0x2006:    /* SIX-PER-EM SPACE */
1850            case 0x2007:    /* FIGURE SPACE */
1851            case 0x2008:    /* PUNCTUATION SPACE */
1852            case 0x2009:    /* THIN SPACE */
1853            case 0x200A:    /* HAIR SPACE */
1854            case 0x202f:    /* NARROW NO-BREAK SPACE */
1855            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1856            case 0x3000:    /* IDEOGRAPHIC SPACE */
1857            ADD_NEW(state_offset + 1, 0);
1858            break;
1859            }
1860          break;
1861    
1862          /*-----------------------------------------------------------------*/
1863        /* Match a negated single character. This is only used for one-byte        /* Match a negated single character. This is only used for one-byte
1864        characters, that is, we know that d < 256. The character we are        characters, that is, we know that d < 256. The character we are
1865        checking (c) can be multibyte. */        checking (c) can be multibyte. */
# Line 1178  for (;;) Line 1867  for (;;)
1867        case OP_NOT:        case OP_NOT:
1868        if (clen > 0)        if (clen > 0)
1869          {          {
1870          int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;          unsigned int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;
1871          if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }          if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }
1872          }          }
1873        break;        break;
# Line 1186  for (;;) Line 1875  for (;;)
1875        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1876        case OP_PLUS:        case OP_PLUS:
1877        case OP_MINPLUS:        case OP_MINPLUS:
1878          case OP_POSPLUS:
1879        case OP_NOTPLUS:        case OP_NOTPLUS:
1880        case OP_NOTMINPLUS:        case OP_NOTMINPLUS:
1881          case OP_NOTPOSPLUS:
1882        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1883        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
1884        if (clen > 0)        if (clen > 0)
1885          {          {
1886          int otherd = -1;          unsigned int otherd = NOTACHAR;
1887          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1888            {            {
1889  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1207  for (;;) Line 1898  for (;;)
1898            otherd = fcc[d];            otherd = fcc[d];
1899            }            }
1900          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1901            { count++; ADD_NEW(state_offset, count); }            {
1902              if (count > 0 &&
1903                  (codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS))
1904                {
1905                active_count--;             /* Remove non-match possibility */
1906                next_active_state--;
1907                }
1908              count++;
1909              ADD_NEW(state_offset, count);
1910              }
1911          }          }
1912        break;        break;
1913    
1914        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1915        case OP_QUERY:        case OP_QUERY:
1916        case OP_MINQUERY:        case OP_MINQUERY:
1917          case OP_POSQUERY:
1918        case OP_NOTQUERY:        case OP_NOTQUERY:
1919        case OP_NOTMINQUERY:        case OP_NOTMINQUERY:
1920          case OP_NOTPOSQUERY:
1921        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
1922        if (clen > 0)        if (clen > 0)
1923          {          {
1924          int otherd = -1;          unsigned int otherd = NOTACHAR;
1925          if ((ims && PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1926            {            {
1927  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1928            if (utf8 && d >= 128)            if (utf8 && d >= 128)
# Line 1234  for (;;) Line 1936  for (;;)
1936            otherd = fcc[d];            otherd = fcc[d];
1937            }            }
1938          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1939            { ADD_NEW(state_offset + dlen + 1, 0); }            {
1940              if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY)
1941                {
1942                active_count--;            /* Remove non-match possibility */
1943                next_active_state--;
1944                }
1945              ADD_NEW(state_offset + dlen + 1, 0);
1946              }
1947          }          }
1948        break;        break;
1949    
1950        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1951        case OP_STAR:        case OP_STAR:
1952        case OP_MINSTAR:        case OP_MINSTAR:
1953          case OP_POSSTAR:
1954        case OP_NOTSTAR:        case OP_NOTSTAR:
1955        case OP_NOTMINSTAR:        case OP_NOTMINSTAR:
1956          case OP_NOTPOSSTAR:
1957        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
1958        if (clen > 0)        if (clen > 0)
1959          {          {
1960          int otherd = -1;          unsigned int otherd = NOTACHAR;
1961          if ((ims && PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1962            {            {
1963  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1964            if (utf8 && d >= 128)            if (utf8 && d >= 128)
# Line 1261  for (;;) Line 1972  for (;;)
1972            otherd = fcc[d];            otherd = fcc[d];
1973            }            }
1974          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1975            { ADD_NEW(state_offset, 0); }            {
1976              if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR)
1977                {
1978                active_count--;            /* Remove non-match possibility */
1979                next_active_state--;
1980                }
1981              ADD_NEW(state_offset, 0);
1982              }
1983          }          }
1984        break;        break;
1985    
1986        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1987        case OP_EXACT:        case OP_EXACT:
1988          case OP_NOTEXACT:
1989          count = current_state->count;  /* Number already matched */
1990          if (clen > 0)
1991            {
1992            unsigned int otherd = NOTACHAR;
1993            if ((ims & PCRE_CASELESS) != 0)
1994              {
1995    #ifdef SUPPORT_UTF8
1996              if (utf8 && d >= 128)
1997                {
1998    #ifdef SUPPORT_UCP
1999                otherd = _pcre_ucp_othercase(d);
2000    #endif  /* SUPPORT_UCP */
2001                }
2002              else
2003    #endif  /* SUPPORT_UTF8 */
2004              otherd = fcc[d];
2005              }
2006            if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2007              {
2008              if (++count >= GET2(code, 1))
2009                { ADD_NEW(state_offset + dlen + 3, 0); }
2010              else
2011                { ADD_NEW(state_offset, count); }
2012              }
2013            }
2014          break;
2015    
2016          /*-----------------------------------------------------------------*/
2017        case OP_UPTO:        case OP_UPTO:
2018        case OP_MINUPTO:        case OP_MINUPTO:
2019        case OP_NOTEXACT:        case OP_POSUPTO:
2020        case OP_NOTUPTO:        case OP_NOTUPTO:
2021        case OP_NOTMINUPTO:        case OP_NOTMINUPTO:
2022        if (codevalue != OP_EXACT && codevalue != OP_NOTEXACT)        case OP_NOTPOSUPTO:
2023          { ADD_ACTIVE(state_offset + dlen + 3, 0); }        ADD_ACTIVE(state_offset + dlen + 3, 0);
2024        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2025        if (clen > 0)        if (clen > 0)
2026          {          {
2027          int otherd = -1;          unsigned int otherd = NOTACHAR;
2028          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
2029            {            {
2030  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1293  for (;;) Line 2040  for (;;)
2040            }            }
2041          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2042            {            {
2043              if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO)
2044                {
2045                active_count--;             /* Remove non-match possibility */
2046                next_active_state--;
2047                }
2048            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2049              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 3, 0); }
2050            else            else
# Line 1370  for (;;) Line 2122  for (;;)
2122              { ADD_ACTIVE(next_state_offset + 5, 0); }              { ADD_ACTIVE(next_state_offset + 5, 0); }
2123            if (isinclass)            if (isinclass)
2124              {              {
2125              if (++count >= GET2(ecode, 3))              int max = GET2(ecode, 3);
2126                if (++count >= max && max != 0)   /* Max 0 => no limit */
2127                { ADD_NEW(next_state_offset + 5, 0); }                { ADD_NEW(next_state_offset + 5, 0); }
2128              else              else
2129                { ADD_NEW(state_offset, count); }                { ADD_NEW(state_offset, count); }
# Line 1420  for (;;) Line 2173  for (;;)
2173    
2174        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2175        case OP_COND:        case OP_COND:
2176          case OP_SCOND:
2177          {          {
2178          int local_offsets[1000];          int local_offsets[1000];
2179          int local_workspace[1000];          int local_workspace[1000];
2180          int condcode = code[LINK_SIZE+1];          int condcode = code[LINK_SIZE+1];
2181    
2182          /* The only supported version of OP_CREF is for the value 0xffff, which          /* Back reference conditions are not supported */
2183          means "test if in a recursion". */  
2184            if (condcode == OP_CREF) return PCRE_ERROR_DFA_UCOND;
2185    
2186          if (condcode == OP_CREF)          /* The DEFINE condition is always false */
2187    
2188            if (condcode == OP_DEF)
2189              {
2190              ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0);
2191              }
2192    
2193            /* The only supported version of OP_RREF is for the value RREF_ANY,
2194            which means "test if in any recursion". We can't test for specifically
2195            recursed groups. */
2196    
2197            else if (condcode == OP_RREF)
2198            {            {
2199            int value = GET2(code, LINK_SIZE+2);            int value = GET2(code, LINK_SIZE+2);
2200            if (value != 0xffff) return PCRE_ERROR_DFA_UCOND;            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
2201            if (recursing > 0) { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }            if (recursing > 0) { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }
2202              else { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }              else { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }
2203            }            }
# Line 1670  for (;;) Line 2436  for (;;)
2436      DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"      DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
2437        "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,        "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
2438        rlevel*2-2, SP));        rlevel*2-2, SP));
2439      return match_count;      break;        /* In effect, "return", but see the comment below */
2440      }      }
2441    
2442    /* One or more states are active for the next character. */    /* One or more states are active for the next character. */
# Line 1678  for (;;) Line 2444  for (;;)
2444    ptr += clen;    /* Advance to next subject character */    ptr += clen;    /* Advance to next subject character */
2445    }               /* Loop to move along the subject string */    }               /* Loop to move along the subject string */
2446    
2447  /* Control never gets here, but we must keep the compiler happy. */  /* Control gets here from "break" a few lines above. We do it this way because
2448    if we use "return" above, we have compiler trouble. Some compilers warn if
2449    there's nothing here because they think the function doesn't return a value. On
2450    the other hand, if we put a dummy statement here, some more clever compilers
2451    complain that it can't be reached. Sigh. */
2452    
2453  DPRINTF(("%.*s+++ Unexpected end of internal_dfa_exec %d +++\n"  return match_count;
   "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, rlevel*2-2, SP));  
 return PCRE_ERROR_NOMATCH;  
2454  }  }
2455    
2456    
# Line 1698  is not anchored. Line 2466  is not anchored.
2466    
2467  Arguments:  Arguments:
2468    argument_re     points to the compiled expression    argument_re     points to the compiled expression
2469    extra_data      points to extra data or is NULL (not currently used)    extra_data      points to extra data or is NULL
2470    subject         points to the subject string    subject         points to the subject string
2471    length          length of subject string (may contain binary zeros)    length          length of subject string (may contain binary zeros)
2472    start_offset    where to start in the subject string    start_offset    where to start in the subject string
# Line 1714  Returns:          > 0 => number of match Line 2482  Returns:          > 0 => number of match
2482                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
2483  */  */
2484    
2485  PCRE_DATA_SCOPE int  PCRE_EXP_DEFN int
2486  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
2487    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
2488    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
2489  {  {
2490  real_pcre *re = (real_pcre *)argument_re;  real_pcre *re = (real_pcre *)argument_re;
2491  dfa_match_data match_block;  dfa_match_data match_block;
2492    dfa_match_data *md = &match_block;
2493  BOOL utf8, anchored, startline, firstline;  BOOL utf8, anchored, startline, firstline;
2494  const uschar *current_subject, *end_subject, *lcc;  const uschar *current_subject, *end_subject, *lcc;
2495    
# Line 1735  BOOL req_byte_caseless = FALSE; Line 2504  BOOL req_byte_caseless = FALSE;
2504  int first_byte = -1;  int first_byte = -1;
2505  int req_byte = -1;  int req_byte = -1;
2506  int req_byte2 = -1;  int req_byte2 = -1;
2507    int newline;
2508    
2509  /* Plausibility checks */  /* Plausibility checks */
2510    
# Line 1749  flipping, so we scan the extra_data bloc Line 2519  flipping, so we scan the extra_data bloc
2519  match block, so we must initialize them beforehand. However, the other fields  match block, so we must initialize them beforehand. However, the other fields
2520  in the match block must not be set until after the byte flipping. */  in the match block must not be set until after the byte flipping. */
2521    
2522  match_block.tables = re->tables;  md->tables = re->tables;
2523  match_block.callout_data = NULL;  md->callout_data = NULL;
2524    
2525  if (extra_data != NULL)  if (extra_data != NULL)
2526    {    {
# Line 1761  if (extra_data != NULL) Line 2531  if (extra_data != NULL)
2531    if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)    if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
2532      return PCRE_ERROR_DFA_UMLIMIT;      return PCRE_ERROR_DFA_UMLIMIT;
2533    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
2534      match_block.callout_data = extra_data->callout_data;      md->callout_data = extra_data->callout_data;
2535    if ((flags & PCRE_EXTRA_TABLES) != 0)    if ((flags & PCRE_EXTRA_TABLES) != 0)
2536      match_block.tables = extra_data->tables;      md->tables = extra_data->tables;
2537    }    }
2538    
2539  /* Check that the first field in the block is the magic number. If it is not,  /* Check that the first field in the block is the magic number. If it is not,
# Line 1784  current_subject = (const unsigned char * Line 2554  current_subject = (const unsigned char *
2554  end_subject = (const unsigned char *)subject + length;  end_subject = (const unsigned char *)subject + length;
2555  req_byte_ptr = current_subject - 1;  req_byte_ptr = current_subject - 1;
2556    
2557    #ifdef SUPPORT_UTF8
2558  utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = (re->options & PCRE_UTF8) != 0;
2559    #else
2560    utf8 = FALSE;
2561    #endif
2562    
2563  anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||  anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
2564    (re->options & PCRE_ANCHORED) != 0;    (re->options & PCRE_ANCHORED) != 0;
2565    
2566  /* The remaining fixed data for passing around. */  /* The remaining fixed data for passing around. */
2567    
2568  match_block.start_code = (const uschar *)argument_re +  md->start_code = (const uschar *)argument_re +
2569      re->name_table_offset + re->name_count * re->name_entry_size;      re->name_table_offset + re->name_count * re->name_entry_size;
2570  match_block.start_subject = (const unsigned char *)subject;  md->start_subject = (const unsigned char *)subject;
2571  match_block.end_subject = end_subject;  md->end_subject = end_subject;
2572  match_block.moptions = options;  md->moptions = options;
2573  match_block.poptions = re->options;  md->poptions = re->options;
2574    
2575    /* Handle different types of newline. The three bits give eight cases. If
2576    nothing is set at run time, whatever was used at compile time applies. */
2577    
2578    switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
2579             PCRE_NEWLINE_BITS)
2580      {
2581      case 0: newline = NEWLINE; break;   /* Compile-time default */
2582      case PCRE_NEWLINE_CR: newline = '\r'; break;
2583      case PCRE_NEWLINE_LF: newline = '\n'; break;
2584      case PCRE_NEWLINE_CR+
2585           PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
2586      case PCRE_NEWLINE_ANY: newline = -1; break;
2587      case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
2588      default: return PCRE_ERROR_BADNEWLINE;
2589      }
2590    
2591    if (newline == -2)
2592      {
2593      md->nltype = NLTYPE_ANYCRLF;
2594      }
2595    else if (newline < 0)
2596      {
2597      md->nltype = NLTYPE_ANY;
2598      }
2599    else
2600      {
2601      md->nltype = NLTYPE_FIXED;
2602      if (newline > 255)
2603        {
2604        md->nllen = 2;
2605        md->nl[0] = (newline >> 8) & 255;
2606        md->nl[1] = newline & 255;
2607        }
2608      else
2609        {
2610        md->nllen = 1;
2611        md->nl[0] = newline;
2612        }
2613      }
2614    
2615  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
2616  back the character offset. */  back the character offset. */
# Line 1822  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 2636  if (utf8 && (options & PCRE_NO_UTF8_CHEC
2636  is a feature that makes it possible to save compiled regex and re-use them  is a feature that makes it possible to save compiled regex and re-use them
2637  in other programs later. */  in other programs later. */
2638    
2639  if (match_block.tables == NULL) match_block.tables = _pcre_default_tables;  if (md->tables == NULL) md->tables = _pcre_default_tables;
2640    
2641  /* The lower casing table and the "must be at the start of a line" flag are  /* The lower casing table and the "must be at the start of a line" flag are
2642  used in a loop when finding where to start. */  used in a loop when finding where to start. */
2643    
2644  lcc = match_block.tables + lcc_offset;  lcc = md->tables + lcc_offset;
2645  startline = (re->options & PCRE_STARTLINE) != 0;  startline = (re->options & PCRE_STARTLINE) != 0;
2646  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
2647    
# Line 1860  if ((re->options & PCRE_REQCHSET) != 0) Line 2674  if ((re->options & PCRE_REQCHSET) != 0)
2674    {    {
2675    req_byte = re->req_byte & 255;    req_byte = re->req_byte & 255;
2676    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
2677    req_byte2 = (match_block.tables + fcc_offset)[req_byte];  /* case flipped */    req_byte2 = (md->tables + fcc_offset)[req_byte];  /* case flipped */
2678    }    }
2679    
2680  /* Call the main matching function, looping for a non-anchored regex after a  /* Call the main matching function, looping for a non-anchored regex after a
# Line 1885  for (;;) Line 2699  for (;;)
2699      if (firstline)      if (firstline)
2700        {        {
2701        const uschar *t = current_subject;        const uschar *t = current_subject;
2702        while (t < save_end_subject && *t != '\n') t++;        while (t < md->end_subject && !IS_NEWLINE(t)) t++;
2703        end_subject = t;        end_subject = t;
2704        }        }
2705    
# Line 1900  for (;;) Line 2714  for (;;)
2714            current_subject++;            current_subject++;
2715        }        }
2716    
2717      /* Or to just after \n for a multiline match if possible */      /* Or to just after a linebreak for a multiline match if possible */
2718    
2719      else if (startline)      else if (startline)
2720        {        {
2721        if (current_subject > match_block.start_subject + start_offset)        if (current_subject > md->start_subject + start_offset)
2722          {          {
2723          while (current_subject < end_subject && current_subject[-1] != NEWLINE)          while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))
2724              current_subject++;
2725    
2726            /* If we have just passed a CR and the newline option is ANY or
2727            ANYCRLF, and we are now at a LF, advance the match position by one more
2728            character. */
2729    
2730            if (current_subject[-1] == '\r' &&
2731                 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
2732                 current_subject < end_subject &&
2733                 *current_subject == '\n')
2734            current_subject++;            current_subject++;
2735          }          }
2736        }        }
# Line 1987  for (;;) Line 2811  for (;;)
2811    /* OK, now we can do the business */    /* OK, now we can do the business */
2812    
2813    rc = internal_dfa_exec(    rc = internal_dfa_exec(
2814      &match_block,                              /* fixed match data */      md,                                /* fixed match data */
2815      match_block.start_code,                    /* this subexpression's code */      md->start_code,                    /* this subexpression's code */
2816      current_subject,                           /* where we currently are */      current_subject,                   /* where we currently are */
2817      start_offset,                              /* start offset in subject */      start_offset,                      /* start offset in subject */
2818      offsets,                                   /* offset vector */      offsets,                           /* offset vector */
2819      offsetcount,                               /* size of same */      offsetcount,                       /* size of same */
2820      workspace,                                 /* workspace vector */      workspace,                         /* workspace vector */
2821      wscount,                                   /* size of same */      wscount,                           /* size of same */
2822      re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL), /* ims flags */      re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL), /* ims flags */
2823      0,                                         /* function recurse level */      0,                                 /* function recurse level */
2824      0);                                        /* regex recurse level */      0);                                /* regex recurse level */
2825    
2826    /* Anything other than "no match" means we are done, always; otherwise, carry    /* Anything other than "no match" means we are done, always; otherwise, carry
2827    on only if not anchored. */    on only if not anchored. */
# Line 2007  for (;;) Line 2831  for (;;)
2831    /* Advance to the next subject character unless we are at the end of a line    /* Advance to the next subject character unless we are at the end of a line
2832    and firstline is set. */    and firstline is set. */
2833    
2834    if (firstline && *current_subject == NEWLINE) break;    if (firstline && IS_NEWLINE(current_subject)) break;
2835    current_subject++;    current_subject++;
   
 #ifdef SUPPORT_UTF8  
2836    if (utf8)    if (utf8)
2837      {      {
2838      while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)      while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)
2839        current_subject++;        current_subject++;
2840      }      }
 #endif  
   
2841    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
2842    }  
2843      /* If we have just passed a CR and the newline option is CRLF or ANY or
2844      ANYCRLF, and we are now at a LF, advance the match position by one more
2845      character. */
2846    
2847      if (current_subject[-1] == '\r' &&
2848           (md->nltype == NLTYPE_ANY ||
2849            md->nltype == NLTYPE_ANYCRLF ||
2850            md->nllen == 2) &&
2851           current_subject < end_subject &&
2852           *current_subject == '\n')
2853        current_subject++;
2854    
2855      }   /* "Bumpalong" loop */
2856    
2857  return PCRE_ERROR_NOMATCH;  return PCRE_ERROR_NOMATCH;
2858  }  }

Legend:
Removed from v.87  
changed lines
  Added in v.200

  ViewVC Help
Powered by ViewVC 1.1.5