/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 91 by nigel, Sat Feb 24 21:41:34 2007 UTC revision 342 by ph10, Sun Apr 20 17:10:13 2008 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2008 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 39  POSSIBILITY OF SUCH DAMAGE. Line 39  POSSIBILITY OF SUCH DAMAGE.
39    
40    
41  /* This module contains the external function pcre_dfa_exec(), which is an  /* This module contains the external function pcre_dfa_exec(), which is an
42  alternative matching function that uses a DFA algorithm. This is NOT Perl-  alternative matching function that uses a sort of DFA algorithm (not a true
43  compatible, but it has advantages in certain applications. */  FSM). This is NOT Perl- compatible, but it has advantages in certain
44    applications. */
45    
46    
47  #define NLBLOCK md           /* The block containing newline information */  #ifdef HAVE_CONFIG_H
48    #include "config.h"
49    #endif
50    
51    #define NLBLOCK md             /* Block containing newline information */
52    #define PSSTART start_subject  /* Field containing processed string start */
53    #define PSEND   end_subject    /* Field containing processed string end */
54    
55  #include "pcre_internal.h"  #include "pcre_internal.h"
56    
57    
# Line 58  compatible, but it has advantages in cer Line 66  compatible, but it has advantages in cer
66  *************************************************/  *************************************************/
67    
68  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
69  into others, under special conditions. A gap of 10 between the blocks should be  into others, under special conditions. A gap of 20 between the blocks should be
70  enough. */  enough. The resulting opcodes don't have to be less than 256 because they are
71    never stored, so we push them well clear of the normal opcodes. */
72  #define OP_PROP_EXTRA    (EXTRACT_BASIC_MAX+1)  
73  #define OP_EXTUNI_EXTRA  (EXTRACT_BASIC_MAX+11)  #define OP_PROP_EXTRA       300
74    #define OP_EXTUNI_EXTRA     320
75    #define OP_ANYNL_EXTRA      340
76    #define OP_HSPACE_EXTRA     360
77    #define OP_VSPACE_EXTRA     380
78    
79    
80  /* This table identifies those opcodes that are followed immediately by a  /* This table identifies those opcodes that are followed immediately by a
81  character that is to be tested in some way. This makes is possible to  character that is to be tested in some way. This makes is possible to
82  centralize the loading of these characters. In the case of Type * etc, the  centralize the loading of these characters. In the case of Type * etc, the
83  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
84  small value. */  small value. ***NOTE*** If the start of this table is modified, the two tables
85    that follow must also be modified. */
86    
87  static uschar coptable[] = {  static const uschar coptable[] = {
88    0,                             /* End                                    */    0,                             /* End                                    */
89    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* \A, \G, \B, \b, \D, \d, \S, \s, \W, \w */    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
90    0, 0,                          /* Any, Anybyte                           */    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
91      0, 0, 0,                       /* Any, AllAny, Anybyte                   */
92    0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */    0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */
93      0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
94    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */
95    1,                             /* Char                                   */    1,                             /* Char                                   */
96    1,                             /* Charnc                                 */    1,                             /* Charnc                                 */
# Line 83  static uschar coptable[] = { Line 98  static uschar coptable[] = {
98    /* Positive single-char repeats                                          */    /* Positive single-char repeats                                          */
99    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
100    3, 3, 3,                       /* upto, minupto, exact                   */    3, 3, 3,                       /* upto, minupto, exact                   */
101      1, 1, 1, 3,                    /* *+, ++, ?+, upto+                      */
102    /* Negative single-char repeats - only for chars < 256                   */    /* Negative single-char repeats - only for chars < 256                   */
103    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
104    3, 3, 3,                       /* NOT upto, minupto, exact               */    3, 3, 3,                       /* NOT upto, minupto, exact               */
105      1, 1, 1, 3,                    /* NOT *+, ++, ?+, updo+                  */
106    /* Positive type repeats                                                 */    /* Positive type repeats                                                 */
107    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
108    3, 3, 3,                       /* Type upto, minupto, exact              */    3, 3, 3,                       /* Type upto, minupto, exact              */
109      1, 1, 1, 3,                    /* Type *+, ++, ?+, upto+                 */
110    /* Character class & ref repeats                                         */    /* Character class & ref repeats                                         */
111    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */
112    0, 0,                          /* CRRANGE, CRMINRANGE                    */    0, 0,                          /* CRRANGE, CRMINRANGE                    */
# Line 107  static uschar coptable[] = { Line 125  static uschar coptable[] = {
125    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
126    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
127    0,                             /* Reverse                                */    0,                             /* Reverse                                */
128    0,                             /* Once                                   */    0, 0, 0, 0,                    /* ONCE, BRA, CBRA, COND                  */
129    0,                             /* COND                                   */    0, 0, 0,                       /* SBRA, SCBRA, SCOND                     */
130    0,                             /* CREF                                   */    0,                             /* CREF                                   */
131      0,                             /* RREF                                   */
132      0,                             /* DEF                                    */
133    0, 0,                          /* BRAZERO, BRAMINZERO                    */    0, 0,                          /* BRAZERO, BRAMINZERO                    */
134    0,                             /* BRANUMBER                              */    0, 0, 0, 0,                    /* PRUNE, SKIP, THEN, COMMIT              */
135    0                              /* BRA                                    */    0, 0, 0                        /* FAIL, ACCEPT, SKIPZERO                 */
136  };  };
137    
138  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
139  and \w */  and \w */
140    
141  static uschar toptable1[] = {  static const uschar toptable1[] = {
142    0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
143    ctype_digit, ctype_digit,    ctype_digit, ctype_digit,
144    ctype_space, ctype_space,    ctype_space, ctype_space,
145    ctype_word,  ctype_word,    ctype_word,  ctype_word,
146    0                               /* OP_ANY */    0, 0                            /* OP_ANY, OP_ALLANY */
147  };  };
148    
149  static uschar toptable2[] = {  static const uschar toptable2[] = {
150    0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
151    ctype_digit, 0,    ctype_digit, 0,
152    ctype_space, 0,    ctype_space, 0,
153    ctype_word,  0,    ctype_word,  0,
154    1                               /* OP_ANY */    1, 1                            /* OP_ANY, OP_ALLANY */
155  };  };
156    
157    
# Line 203  Arguments: Line 223  Arguments:
223    rlevel            function call recursion level    rlevel            function call recursion level
224    recursing         regex recursive call level    recursing         regex recursive call level
225    
226  Returns:            > 0 =>  Returns:            > 0 => number of match offset pairs placed in offsets
227                      = 0 =>                      = 0 => offsets overflowed; longest matches are present
228                       -1 => failed to match                       -1 => failed to match
229                     < -1 => some kind of unexpected problem                     < -1 => some kind of unexpected problem
230    
# Line 278  stateblock *next_active_state, *next_new Line 298  stateblock *next_active_state, *next_new
298    
299  const uschar *ctypes, *lcc, *fcc;  const uschar *ctypes, *lcc, *fcc;
300  const uschar *ptr;  const uschar *ptr;
301  const uschar *end_code;  const uschar *end_code, *first_op;
302    
303  int active_count, new_count, match_count;  int active_count, new_count, match_count;
304    
# Line 291  const uschar *start_code = md->start_cod Line 311  const uschar *start_code = md->start_cod
311    
312  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
313  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;
314    #else
315    BOOL utf8 = FALSE;
316  #endif  #endif
317    
318  rlevel++;  rlevel++;
# Line 314  active_states = (stateblock *)(workspace Line 336  active_states = (stateblock *)(workspace
336  next_new_state = new_states = active_states + wscount;  next_new_state = new_states = active_states + wscount;
337  new_count = 0;  new_count = 0;
338    
339    first_op = this_start_code + 1 + LINK_SIZE +
340      ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
341    
342  /* The first thing in any (sub) pattern is a bracket of some sort. Push all  /* The first thing in any (sub) pattern is a bracket of some sort. Push all
343  the alternative states onto the list, and find out where the end is. This  the alternative states onto the list, and find out where the end is. This
344  makes is possible to use this function recursively, when we want to stop at a  makes is possible to use this function recursively, when we want to stop at a
# Line 323  If the first opcode in the first alterna Line 348  If the first opcode in the first alterna
348  a backward assertion. In that case, we have to find out the maximum amount to  a backward assertion. In that case, we have to find out the maximum amount to
349  move back, and set up each alternative appropriately. */  move back, and set up each alternative appropriately. */
350    
351  if (this_start_code[1+LINK_SIZE] == OP_REVERSE)  if (*first_op == OP_REVERSE)
352    {    {
353    int max_back = 0;    int max_back = 0;
354    int gone_back;    int gone_back;
# Line 405  else Line 430  else
430    
431    else    else
432      {      {
433        int length = 1 + LINK_SIZE +
434          ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
435      do      do
436        {        {
437        ADD_NEW(end_code - start_code + 1 + LINK_SIZE, 0);        ADD_NEW(end_code - start_code + length, 0);
438        end_code += GET(end_code, 1);        end_code += GET(end_code, 1);
439          length = 1 + LINK_SIZE;
440        }        }
441      while (*end_code == OP_ALT);      while (*end_code == OP_ALT);
442      }      }
# Line 461  for (;;) Line 489  for (;;)
489    
490    if (ptr < end_subject)    if (ptr < end_subject)
491      {      {
492      clen = 1;      clen = 1;        /* Number of bytes in the character */
493  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
494      if (utf8) { GETCHARLEN(c, ptr, clen); } else      if (utf8) { GETCHARLEN(c, ptr, clen); } else
495  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
# Line 469  for (;;) Line 497  for (;;)
497      }      }
498    else    else
499      {      {
500      clen = 0;    /* At end subject */      clen = 0;        /* This indicates the end of the subject */
501      c = -1;      c = NOTACHAR;    /* This value should never actually be used */
502      }      }
503    
504    /* Scan up the active states and act on each one. The result of an action    /* Scan up the active states and act on each one. The result of an action
# Line 484  for (;;) Line 512  for (;;)
512      const uschar *code;      const uschar *code;
513      int state_offset = current_state->offset;      int state_offset = current_state->offset;
514      int count, codevalue;      int count, codevalue;
515    #ifdef SUPPORT_UCP
516      int chartype, script;      int chartype, script;
517    #endif
518    
519  #ifdef DEBUG  #ifdef DEBUG
520      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
521      if (c < 0) printf("-1\n");      if (clen == 0) printf("EOL\n");
522        else if (c > 32 && c < 127) printf("'%c'\n", c);        else if (c > 32 && c < 127) printf("'%c'\n", c);
523          else printf("0x%02x\n", c);          else printf("0x%02x\n", c);
524  #endif  #endif
# Line 532  for (;;) Line 562  for (;;)
562    
563      code = start_code + state_offset;      code = start_code + state_offset;
564      codevalue = *code;      codevalue = *code;
     if (codevalue >= OP_BRA) codevalue = OP_BRA; /* All brackets are equal */  
565    
566      /* If this opcode is followed by an inline character, load it. It is      /* If this opcode is followed by an inline character, load it. It is
567      tempting to test for the presence of a subject character here, but that      tempting to test for the presence of a subject character here, but that
# Line 540  for (;;) Line 569  for (;;)
569      permitted.      permitted.
570    
571      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
572      argument that is not a data character - but is always one byte long.      argument that is not a data character - but is always one byte long. We
573      Unfortunately, we have to take special action to deal with  \P, \p, and      have to take special action to deal with  \P, \p, \H, \h, \V, \v and \X in
574      \X in this case. To keep the other cases fast, convert these ones to new      this case. To keep the other cases fast, convert these ones to new opcodes.
575      opcodes. */      */
576    
577      if (coptable[codevalue] > 0)      if (coptable[codevalue] > 0)
578        {        {
# Line 554  for (;;) Line 583  for (;;)
583        d = code[coptable[codevalue]];        d = code[coptable[codevalue]];
584        if (codevalue >= OP_TYPESTAR)        if (codevalue >= OP_TYPESTAR)
585          {          {
586          if (d == OP_ANYBYTE) return PCRE_ERROR_DFA_UITEM;          switch(d)
587          if (d >= OP_NOTPROP)            {
588            codevalue += (d == OP_EXTUNI)? OP_EXTUNI_EXTRA : OP_PROP_EXTRA;            case OP_ANYBYTE: return PCRE_ERROR_DFA_UITEM;
589              case OP_NOTPROP:
590              case OP_PROP: codevalue += OP_PROP_EXTRA; break;
591              case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
592              case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
593              case OP_NOT_HSPACE:
594              case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
595              case OP_NOT_VSPACE:
596              case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
597              default: break;
598              }
599          }          }
600        }        }
601      else      else
602        {        {
603        dlen = 0;         /* Not strictly necessary, but compilers moan */        dlen = 0;         /* Not strictly necessary, but compilers moan */
604        d = -1;           /* if these variables are not set. */        d = NOTACHAR;     /* if these variables are not set. */
605        }        }
606    
607    
# Line 624  for (;;) Line 663  for (;;)
663    
664        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
665        case OP_BRA:        case OP_BRA:
666          case OP_SBRA:
667        do        do
668          {          {
669          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
# Line 633  for (;;) Line 673  for (;;)
673        break;        break;
674    
675        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
676          case OP_CBRA:
677          case OP_SCBRA:
678          ADD_ACTIVE(code - start_code + 3 + LINK_SIZE,  0);
679          code += GET(code, 1);
680          while (*code == OP_ALT)
681            {
682            ADD_ACTIVE(code - start_code + 1 + LINK_SIZE,  0);
683            code += GET(code, 1);
684            }
685          break;
686    
687          /*-----------------------------------------------------------------*/
688        case OP_BRAZERO:        case OP_BRAZERO:
689        case OP_BRAMINZERO:        case OP_BRAMINZERO:
690        ADD_ACTIVE(state_offset + 1, 0);        ADD_ACTIVE(state_offset + 1, 0);
# Line 642  for (;;) Line 694  for (;;)
694        break;        break;
695    
696        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
697        case OP_BRANUMBER:        case OP_SKIPZERO:
698        ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);        code += 1 + GET(code, 2);
699          while (*code == OP_ALT) code += GET(code, 1);
700          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
701        break;        break;
702    
703        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
704        case OP_CIRC:        case OP_CIRC:
705        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
706            ((ims & PCRE_MULTILINE) != 0 &&            ((ims & PCRE_MULTILINE) != 0 &&
             ptr >= start_subject + md->nllen &&  
707              ptr != end_subject &&              ptr != end_subject &&
708              IS_NEWLINE(ptr - md->nllen)))              WAS_NEWLINE(ptr)))
709          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
710        break;        break;
711    
# Line 686  for (;;) Line 739  for (;;)
739    
740        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
741        case OP_ANY:        case OP_ANY:
742        if (clen > 0 && ((ims & PCRE_DOTALL) != 0 ||        if (clen > 0 && !IS_NEWLINE(ptr))
743                         ptr > end_subject - md->nllen ||          { ADD_NEW(state_offset + 1, 0); }
744                         !IS_NEWLINE(ptr)))        break;
745    
746          /*-----------------------------------------------------------------*/
747          case OP_ALLANY:
748          if (clen > 0)
749          { ADD_NEW(state_offset + 1, 0); }          { ADD_NEW(state_offset + 1, 0); }
750        break;        break;
751    
752        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
753        case OP_EODN:        case OP_EODN:
754        if (clen == 0 ||        if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
            (ptr == end_subject - md->nllen && IS_NEWLINE(ptr)))  
755          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
756        break;        break;
757    
# Line 704  for (;;) Line 760  for (;;)
760        if ((md->moptions & PCRE_NOTEOL) == 0)        if ((md->moptions & PCRE_NOTEOL) == 0)
761          {          {
762          if (clen == 0 ||          if (clen == 0 ||
763              (ptr <= end_subject - md->nllen && IS_NEWLINE(ptr) &&              (IS_NEWLINE(ptr) &&
764                 ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)                 ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)
765              ))              ))
766            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
767          }          }
768        else if ((ims & PCRE_MULTILINE) != 0 &&        else if ((ims & PCRE_MULTILINE) != 0 && IS_NEWLINE(ptr))
                ptr <= end_subject - md->nllen && IS_NEWLINE(ptr))  
769          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
770        break;        break;
771    
# Line 759  for (;;) Line 814  for (;;)
814        break;        break;
815    
816    
 #ifdef SUPPORT_UCP  
   
817        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
818        /* Check the next character by Unicode property. We will get here only        /* Check the next character by Unicode property. We will get here only
819        if the support is in the binary; otherwise a compile-time error occurs.        if the support is in the binary; otherwise a compile-time error occurs.
820        */        */
821    
822    #ifdef SUPPORT_UCP
823        case OP_PROP:        case OP_PROP:
824        case OP_NOTPROP:        case OP_NOTPROP:
825        if (clen > 0)        if (clen > 0)
# Line 811  for (;;) Line 865  for (;;)
865  /* ========================================================================== */  /* ========================================================================== */
866        /* These opcodes likewise inspect the subject character, but have an        /* These opcodes likewise inspect the subject character, but have an
867        argument that is not a data character. It is one of these opcodes:        argument that is not a data character. It is one of these opcodes:
868        OP_ANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE, OP_WORDCHAR,        OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE,
869        OP_NOT_WORDCHAR. The value is loaded into d. */        OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. */
870    
871        case OP_TYPEPLUS:        case OP_TYPEPLUS:
872        case OP_TYPEMINPLUS:        case OP_TYPEMINPLUS:
873          case OP_TYPEPOSPLUS:
874        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
875        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
876        if (clen > 0)        if (clen > 0)
877          {          {
878          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
879              (c < 256 &&              (c < 256 &&
880                (d != OP_ANY ||                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                (ims & PCRE_DOTALL) != 0 ||  
                ptr > end_subject - md->nllen ||  
                !IS_NEWLINE(ptr)  
               ) &&  
881                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
882            {            {
883              if (count > 0 && codevalue == OP_TYPEPOSPLUS)
884                {
885                active_count--;            /* Remove non-match possibility */
886                next_active_state--;
887                }
888            count++;            count++;
889            ADD_NEW(state_offset, count);            ADD_NEW(state_offset, count);
890            }            }
# Line 838  for (;;) Line 894  for (;;)
894        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
895        case OP_TYPEQUERY:        case OP_TYPEQUERY:
896        case OP_TYPEMINQUERY:        case OP_TYPEMINQUERY:
897          case OP_TYPEPOSQUERY:
898        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
899        if (clen > 0)        if (clen > 0)
900          {          {
901          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
902              (c < 256 &&              (c < 256 &&
903                (d != OP_ANY ||                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                (ims & PCRE_DOTALL) != 0 ||  
                ptr > end_subject - md->nllen ||  
                !IS_NEWLINE(ptr)  
               ) &&  
904                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
905            {            {
906              if (codevalue == OP_TYPEPOSQUERY)
907                {
908                active_count--;            /* Remove non-match possibility */
909                next_active_state--;
910                }
911            ADD_NEW(state_offset + 2, 0);            ADD_NEW(state_offset + 2, 0);
912            }            }
913          }          }
# Line 858  for (;;) Line 916  for (;;)
916        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
917        case OP_TYPESTAR:        case OP_TYPESTAR:
918        case OP_TYPEMINSTAR:        case OP_TYPEMINSTAR:
919          case OP_TYPEPOSSTAR:
920        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
921        if (clen > 0)        if (clen > 0)
922          {          {
923          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
924              (c < 256 &&              (c < 256 &&
925                (d != OP_ANY ||                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                (ims & PCRE_DOTALL) != 0 ||  
                ptr > end_subject - md->nllen ||  
                !IS_NEWLINE(ptr)  
               ) &&  
926                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
927            {            {
928              if (codevalue == OP_TYPEPOSSTAR)
929                {
930                active_count--;            /* Remove non-match possibility */
931                next_active_state--;
932                }
933            ADD_NEW(state_offset, 0);            ADD_NEW(state_offset, 0);
934            }            }
935          }          }
# Line 877  for (;;) Line 937  for (;;)
937    
938        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
939        case OP_TYPEEXACT:        case OP_TYPEEXACT:
940          count = current_state->count;  /* Number already matched */
941          if (clen > 0)
942            {
943            if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
944                (c < 256 &&
945                  (d != OP_ANY || !IS_NEWLINE(ptr)) &&
946                  ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
947              {
948              if (++count >= GET2(code, 1))
949                { ADD_NEW(state_offset + 4, 0); }
950              else
951                { ADD_NEW(state_offset, count); }
952              }
953            }
954          break;
955    
956          /*-----------------------------------------------------------------*/
957        case OP_TYPEUPTO:        case OP_TYPEUPTO:
958        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
959        if (codevalue != OP_TYPEEXACT)        case OP_TYPEPOSUPTO:
960          { ADD_ACTIVE(state_offset + 4, 0); }        ADD_ACTIVE(state_offset + 4, 0);
961        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
962        if (clen > 0)        if (clen > 0)
963          {          {
964          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
965              (c < 256 &&              (c < 256 &&
966                (d != OP_ANY ||                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
                (ims & PCRE_DOTALL) != 0 ||  
                ptr > end_subject - md->nllen ||  
                !IS_NEWLINE(ptr)  
               ) &&  
967                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
968            {            {
969              if (codevalue == OP_TYPEPOSUPTO)
970                {
971                active_count--;           /* Remove non-match possibility */
972                next_active_state--;
973                }
974            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
975              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 4, 0); }
976            else            else
# Line 903  for (;;) Line 981  for (;;)
981    
982  /* ========================================================================== */  /* ========================================================================== */
983        /* These are virtual opcodes that are used when something like        /* These are virtual opcodes that are used when something like
984        OP_TYPEPLUS has OP_PROP, OP_NOTPROP, or OP_EXTUNI as its argument. It        OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its
985        keeps the code above fast for the other cases. The argument is in the        argument. It keeps the code above fast for the other cases. The argument
986        d variable. */        is in the d variable. */
987    
988    #ifdef SUPPORT_UCP
989        case OP_PROP_EXTRA + OP_TYPEPLUS:        case OP_PROP_EXTRA + OP_TYPEPLUS:
990        case OP_PROP_EXTRA + OP_TYPEMINPLUS:        case OP_PROP_EXTRA + OP_TYPEMINPLUS:
991          case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
992        count = current_state->count;           /* Already matched */        count = current_state->count;           /* Already matched */
993        if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
994        if (clen > 0)        if (clen > 0)
# Line 944  for (;;) Line 1024  for (;;)
1024            break;            break;
1025            }            }
1026    
1027          if (OK == (d == OP_PROP)) { count++; ADD_NEW(state_offset, count); }          if (OK == (d == OP_PROP))
1028              {
1029              if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
1030                {
1031                active_count--;           /* Remove non-match possibility */
1032                next_active_state--;
1033                }
1034              count++;
1035              ADD_NEW(state_offset, count);
1036              }
1037          }          }
1038        break;        break;
1039    
1040        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1041        case OP_EXTUNI_EXTRA + OP_TYPEPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
1042        case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
1043          case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1044        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1045        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1046        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
1047          {          {
1048          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1049          int ncount = 0;          int ncount = 0;
1050            if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
1051              {
1052              active_count--;           /* Remove non-match possibility */
1053              next_active_state--;
1054              }
1055          while (nptr < end_subject)          while (nptr < end_subject)
1056            {            {
1057            int nd;            int nd;
# Line 970  for (;;) Line 1065  for (;;)
1065          ADD_NEW_DATA(-state_offset, count, ncount);          ADD_NEW_DATA(-state_offset, count, ncount);
1066          }          }
1067        break;        break;
1068    #endif
1069    
1070          /*-----------------------------------------------------------------*/
1071          case OP_ANYNL_EXTRA + OP_TYPEPLUS:
1072          case OP_ANYNL_EXTRA + OP_TYPEMINPLUS:
1073          case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS:
1074          count = current_state->count;  /* Already matched */
1075          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1076          if (clen > 0)
1077            {
1078            int ncount = 0;
1079            switch (c)
1080              {
1081              case 0x000b:
1082              case 0x000c:
1083              case 0x0085:
1084              case 0x2028:
1085              case 0x2029:
1086              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1087              goto ANYNL01;
1088    
1089              case 0x000d:
1090              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1091              /* Fall through */
1092    
1093              ANYNL01:
1094              case 0x000a:
1095              if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1096                {
1097                active_count--;           /* Remove non-match possibility */
1098                next_active_state--;
1099                }
1100              count++;
1101              ADD_NEW_DATA(-state_offset, count, ncount);
1102              break;
1103    
1104              default:
1105              break;
1106              }
1107            }
1108          break;
1109    
1110          /*-----------------------------------------------------------------*/
1111          case OP_VSPACE_EXTRA + OP_TYPEPLUS:
1112          case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
1113          case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
1114          count = current_state->count;  /* Already matched */
1115          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1116          if (clen > 0)
1117            {
1118            BOOL OK;
1119            switch (c)
1120              {
1121              case 0x000a:
1122              case 0x000b:
1123              case 0x000c:
1124              case 0x000d:
1125              case 0x0085:
1126              case 0x2028:
1127              case 0x2029:
1128              OK = TRUE;
1129              break;
1130    
1131              default:
1132              OK = FALSE;
1133              break;
1134              }
1135    
1136            if (OK == (d == OP_VSPACE))
1137              {
1138              if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
1139                {
1140                active_count--;           /* Remove non-match possibility */
1141                next_active_state--;
1142                }
1143              count++;
1144              ADD_NEW_DATA(-state_offset, count, 0);
1145              }
1146            }
1147          break;
1148    
1149        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1150          case OP_HSPACE_EXTRA + OP_TYPEPLUS:
1151          case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
1152          case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
1153          count = current_state->count;  /* Already matched */
1154          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1155          if (clen > 0)
1156            {
1157            BOOL OK;
1158            switch (c)
1159              {
1160              case 0x09:      /* HT */
1161              case 0x20:      /* SPACE */
1162              case 0xa0:      /* NBSP */
1163              case 0x1680:    /* OGHAM SPACE MARK */
1164              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1165              case 0x2000:    /* EN QUAD */
1166              case 0x2001:    /* EM QUAD */
1167              case 0x2002:    /* EN SPACE */
1168              case 0x2003:    /* EM SPACE */
1169              case 0x2004:    /* THREE-PER-EM SPACE */
1170              case 0x2005:    /* FOUR-PER-EM SPACE */
1171              case 0x2006:    /* SIX-PER-EM SPACE */
1172              case 0x2007:    /* FIGURE SPACE */
1173              case 0x2008:    /* PUNCTUATION SPACE */
1174              case 0x2009:    /* THIN SPACE */
1175              case 0x200A:    /* HAIR SPACE */
1176              case 0x202f:    /* NARROW NO-BREAK SPACE */
1177              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1178              case 0x3000:    /* IDEOGRAPHIC SPACE */
1179              OK = TRUE;
1180              break;
1181    
1182              default:
1183              OK = FALSE;
1184              break;
1185              }
1186    
1187            if (OK == (d == OP_HSPACE))
1188              {
1189              if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
1190                {
1191                active_count--;           /* Remove non-match possibility */
1192                next_active_state--;
1193                }
1194              count++;
1195              ADD_NEW_DATA(-state_offset, count, 0);
1196              }
1197            }
1198          break;
1199    
1200          /*-----------------------------------------------------------------*/
1201    #ifdef SUPPORT_UCP
1202        case OP_PROP_EXTRA + OP_TYPEQUERY:        case OP_PROP_EXTRA + OP_TYPEQUERY:
1203        case OP_PROP_EXTRA + OP_TYPEMINQUERY:        case OP_PROP_EXTRA + OP_TYPEMINQUERY:
1204          case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
1205        count = 4;        count = 4;
1206        goto QS1;        goto QS1;
1207    
1208        case OP_PROP_EXTRA + OP_TYPESTAR:        case OP_PROP_EXTRA + OP_TYPESTAR:
1209        case OP_PROP_EXTRA + OP_TYPEMINSTAR:        case OP_PROP_EXTRA + OP_TYPEMINSTAR:
1210          case OP_PROP_EXTRA + OP_TYPEPOSSTAR:
1211        count = 0;        count = 0;
1212    
1213        QS1:        QS1:
# Line 1017  for (;;) Line 1246  for (;;)
1246            break;            break;
1247            }            }
1248    
1249          if (OK == (d == OP_PROP)) { ADD_NEW(state_offset + count, 0); }          if (OK == (d == OP_PROP))
1250              {
1251              if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR ||
1252                  codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
1253                {
1254                active_count--;           /* Remove non-match possibility */
1255                next_active_state--;
1256                }
1257              ADD_NEW(state_offset + count, 0);
1258              }
1259          }          }
1260        break;        break;
1261    
1262        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1263        case OP_EXTUNI_EXTRA + OP_TYPEQUERY:        case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
1264        case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:        case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
1265          case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY:
1266        count = 2;        count = 2;
1267        goto QS2;        goto QS2;
1268    
1269        case OP_EXTUNI_EXTRA + OP_TYPESTAR:        case OP_EXTUNI_EXTRA + OP_TYPESTAR:
1270        case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:        case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
1271          case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR:
1272        count = 0;        count = 0;
1273    
1274        QS2:        QS2:
# Line 1038  for (;;) Line 1278  for (;;)
1278          {          {
1279          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1280          int ncount = 0;          int ncount = 0;
1281            if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
1282                codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
1283              {
1284              active_count--;           /* Remove non-match possibility */
1285              next_active_state--;
1286              }
1287          while (nptr < end_subject)          while (nptr < end_subject)
1288            {            {
1289            int nd;            int nd;
# Line 1050  for (;;) Line 1296  for (;;)
1296          ADD_NEW_DATA(-(state_offset + count), 0, ncount);          ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1297          }          }
1298        break;        break;
1299    #endif
1300    
1301          /*-----------------------------------------------------------------*/
1302          case OP_ANYNL_EXTRA + OP_TYPEQUERY:
1303          case OP_ANYNL_EXTRA + OP_TYPEMINQUERY:
1304          case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY:
1305          count = 2;
1306          goto QS3;
1307    
1308          case OP_ANYNL_EXTRA + OP_TYPESTAR:
1309          case OP_ANYNL_EXTRA + OP_TYPEMINSTAR:
1310          case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR:
1311          count = 0;
1312    
1313          QS3:
1314          ADD_ACTIVE(state_offset + 2, 0);
1315          if (clen > 0)
1316            {
1317            int ncount = 0;
1318            switch (c)
1319              {
1320              case 0x000b:
1321              case 0x000c:
1322              case 0x0085:
1323              case 0x2028:
1324              case 0x2029:
1325              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1326              goto ANYNL02;
1327    
1328              case 0x000d:
1329              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1330              /* Fall through */
1331    
1332              ANYNL02:
1333              case 0x000a:
1334              if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1335                  codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1336                {
1337                active_count--;           /* Remove non-match possibility */
1338                next_active_state--;
1339                }
1340              ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1341              break;
1342    
1343              default:
1344              break;
1345              }
1346            }
1347          break;
1348    
1349          /*-----------------------------------------------------------------*/
1350          case OP_VSPACE_EXTRA + OP_TYPEQUERY:
1351          case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
1352          case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
1353          count = 2;
1354          goto QS4;
1355    
1356          case OP_VSPACE_EXTRA + OP_TYPESTAR:
1357          case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
1358          case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
1359          count = 0;
1360    
1361          QS4:
1362          ADD_ACTIVE(state_offset + 2, 0);
1363          if (clen > 0)
1364            {
1365            BOOL OK;
1366            switch (c)
1367              {
1368              case 0x000a:
1369              case 0x000b:
1370              case 0x000c:
1371              case 0x000d:
1372              case 0x0085:
1373              case 0x2028:
1374              case 0x2029:
1375              OK = TRUE;
1376              break;
1377    
1378              default:
1379              OK = FALSE;
1380              break;
1381              }
1382            if (OK == (d == OP_VSPACE))
1383              {
1384              if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
1385                  codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
1386                {
1387                active_count--;           /* Remove non-match possibility */
1388                next_active_state--;
1389                }
1390              ADD_NEW_DATA(-(state_offset + count), 0, 0);
1391              }
1392            }
1393          break;
1394    
1395        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1396          case OP_HSPACE_EXTRA + OP_TYPEQUERY:
1397          case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
1398          case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
1399          count = 2;
1400          goto QS5;
1401    
1402          case OP_HSPACE_EXTRA + OP_TYPESTAR:
1403          case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
1404          case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
1405          count = 0;
1406    
1407          QS5:
1408          ADD_ACTIVE(state_offset + 2, 0);
1409          if (clen > 0)
1410            {
1411            BOOL OK;
1412            switch (c)
1413              {
1414              case 0x09:      /* HT */
1415              case 0x20:      /* SPACE */
1416              case 0xa0:      /* NBSP */
1417              case 0x1680:    /* OGHAM SPACE MARK */
1418              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1419              case 0x2000:    /* EN QUAD */
1420              case 0x2001:    /* EM QUAD */
1421              case 0x2002:    /* EN SPACE */
1422              case 0x2003:    /* EM SPACE */
1423              case 0x2004:    /* THREE-PER-EM SPACE */
1424              case 0x2005:    /* FOUR-PER-EM SPACE */
1425              case 0x2006:    /* SIX-PER-EM SPACE */
1426              case 0x2007:    /* FIGURE SPACE */
1427              case 0x2008:    /* PUNCTUATION SPACE */
1428              case 0x2009:    /* THIN SPACE */
1429              case 0x200A:    /* HAIR SPACE */
1430              case 0x202f:    /* NARROW NO-BREAK SPACE */
1431              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1432              case 0x3000:    /* IDEOGRAPHIC SPACE */
1433              OK = TRUE;
1434              break;
1435    
1436              default:
1437              OK = FALSE;
1438              break;
1439              }
1440    
1441            if (OK == (d == OP_HSPACE))
1442              {
1443              if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
1444                  codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
1445                {
1446                active_count--;           /* Remove non-match possibility */
1447                next_active_state--;
1448                }
1449              ADD_NEW_DATA(-(state_offset + count), 0, 0);
1450              }
1451            }
1452          break;
1453    
1454          /*-----------------------------------------------------------------*/
1455    #ifdef SUPPORT_UCP
1456        case OP_PROP_EXTRA + OP_TYPEEXACT:        case OP_PROP_EXTRA + OP_TYPEEXACT:
1457        case OP_PROP_EXTRA + OP_TYPEUPTO:        case OP_PROP_EXTRA + OP_TYPEUPTO:
1458        case OP_PROP_EXTRA + OP_TYPEMINUPTO:        case OP_PROP_EXTRA + OP_TYPEMINUPTO:
1459          case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
1460        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1461          { ADD_ACTIVE(state_offset + 6, 0); }          { ADD_ACTIVE(state_offset + 6, 0); }
1462        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
# Line 1093  for (;;) Line 1495  for (;;)
1495    
1496          if (OK == (d == OP_PROP))          if (OK == (d == OP_PROP))
1497            {            {
1498              if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
1499                {
1500                active_count--;           /* Remove non-match possibility */
1501                next_active_state--;
1502                }
1503            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1504              { ADD_NEW(state_offset + 6, 0); }              { ADD_NEW(state_offset + 6, 0); }
1505            else            else
# Line 1105  for (;;) Line 1512  for (;;)
1512        case OP_EXTUNI_EXTRA + OP_TYPEEXACT:        case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
1513        case OP_EXTUNI_EXTRA + OP_TYPEUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
1514        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
1515          case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
1516        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1517          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 4, 0); }
1518        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
# Line 1112  for (;;) Line 1520  for (;;)
1520          {          {
1521          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1522          int ncount = 0;          int ncount = 0;
1523            if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
1524              {
1525              active_count--;           /* Remove non-match possibility */
1526              next_active_state--;
1527              }
1528          while (nptr < end_subject)          while (nptr < end_subject)
1529            {            {
1530            int nd;            int nd;
# Line 1127  for (;;) Line 1540  for (;;)
1540            { ADD_NEW_DATA(-state_offset, count, ncount); }            { ADD_NEW_DATA(-state_offset, count, ncount); }
1541          }          }
1542        break;        break;
1543    #endif
1544    
1545          /*-----------------------------------------------------------------*/
1546          case OP_ANYNL_EXTRA + OP_TYPEEXACT:
1547          case OP_ANYNL_EXTRA + OP_TYPEUPTO:
1548          case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
1549          case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
1550          if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
1551            { ADD_ACTIVE(state_offset + 4, 0); }
1552          count = current_state->count;  /* Number already matched */
1553          if (clen > 0)
1554            {
1555            int ncount = 0;
1556            switch (c)
1557              {
1558              case 0x000b:
1559              case 0x000c:
1560              case 0x0085:
1561              case 0x2028:
1562              case 0x2029:
1563              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1564              goto ANYNL03;
1565    
1566              case 0x000d:
1567              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1568              /* Fall through */
1569    
1570              ANYNL03:
1571              case 0x000a:
1572              if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
1573                {
1574                active_count--;           /* Remove non-match possibility */
1575                next_active_state--;
1576                }
1577              if (++count >= GET2(code, 1))
1578                { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }
1579              else
1580                { ADD_NEW_DATA(-state_offset, count, ncount); }
1581              break;
1582    
1583              default:
1584              break;
1585              }
1586            }
1587          break;
1588    
1589          /*-----------------------------------------------------------------*/
1590          case OP_VSPACE_EXTRA + OP_TYPEEXACT:
1591          case OP_VSPACE_EXTRA + OP_TYPEUPTO:
1592          case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
1593          case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
1594          if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
1595            { ADD_ACTIVE(state_offset + 4, 0); }
1596          count = current_state->count;  /* Number already matched */
1597          if (clen > 0)
1598            {
1599            BOOL OK;
1600            switch (c)
1601              {
1602              case 0x000a:
1603              case 0x000b:
1604              case 0x000c:
1605              case 0x000d:
1606              case 0x0085:
1607              case 0x2028:
1608              case 0x2029:
1609              OK = TRUE;
1610              break;
1611    
1612              default:
1613              OK = FALSE;
1614              }
1615    
1616            if (OK == (d == OP_VSPACE))
1617              {
1618              if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
1619                {
1620                active_count--;           /* Remove non-match possibility */
1621                next_active_state--;
1622                }
1623              if (++count >= GET2(code, 1))
1624                { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
1625              else
1626                { ADD_NEW_DATA(-state_offset, count, 0); }
1627              }
1628            }
1629          break;
1630    
1631          /*-----------------------------------------------------------------*/
1632          case OP_HSPACE_EXTRA + OP_TYPEEXACT:
1633          case OP_HSPACE_EXTRA + OP_TYPEUPTO:
1634          case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
1635          case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
1636          if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
1637            { ADD_ACTIVE(state_offset + 4, 0); }
1638          count = current_state->count;  /* Number already matched */
1639          if (clen > 0)
1640            {
1641            BOOL OK;
1642            switch (c)
1643              {
1644              case 0x09:      /* HT */
1645              case 0x20:      /* SPACE */
1646              case 0xa0:      /* NBSP */
1647              case 0x1680:    /* OGHAM SPACE MARK */
1648              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1649              case 0x2000:    /* EN QUAD */
1650              case 0x2001:    /* EM QUAD */
1651              case 0x2002:    /* EN SPACE */
1652              case 0x2003:    /* EM SPACE */
1653              case 0x2004:    /* THREE-PER-EM SPACE */
1654              case 0x2005:    /* FOUR-PER-EM SPACE */
1655              case 0x2006:    /* SIX-PER-EM SPACE */
1656              case 0x2007:    /* FIGURE SPACE */
1657              case 0x2008:    /* PUNCTUATION SPACE */
1658              case 0x2009:    /* THIN SPACE */
1659              case 0x200A:    /* HAIR SPACE */
1660              case 0x202f:    /* NARROW NO-BREAK SPACE */
1661              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1662              case 0x3000:    /* IDEOGRAPHIC SPACE */
1663              OK = TRUE;
1664              break;
1665    
1666              default:
1667              OK = FALSE;
1668              break;
1669              }
1670    
1671            if (OK == (d == OP_HSPACE))
1672              {
1673              if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
1674                {
1675                active_count--;           /* Remove non-match possibility */
1676                next_active_state--;
1677                }
1678              if (++count >= GET2(code, 1))
1679                { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
1680              else
1681                { ADD_NEW_DATA(-state_offset, count, 0); }
1682              }
1683            }
1684          break;
1685    
1686  /* ========================================================================== */  /* ========================================================================== */
1687        /* These opcodes are followed by a character that is usually compared        /* These opcodes are followed by a character that is usually compared
# Line 1148  for (;;) Line 1703  for (;;)
1703          {          {
1704          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
1705            {            {
1706            int othercase;            unsigned int othercase;
1707            if (c < 128) othercase = fcc[c]; else            if (c < 128) othercase = fcc[c]; else
1708    
1709            /* If we have Unicode property support, we can use it to test the            /* If we have Unicode property support, we can use it to test the
# Line 1157  for (;;) Line 1712  for (;;)
1712  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1713            othercase = _pcre_ucp_othercase(c);            othercase = _pcre_ucp_othercase(c);
1714  #else  #else
1715            othercase = -1;            othercase = NOTACHAR;
1716  #endif  #endif
1717    
1718            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
# Line 1198  for (;;) Line 1753  for (;;)
1753  #endif  #endif
1754    
1755        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1756          /* This is a tricky like EXTUNI because it too can match more than one
1757          character (when CR is followed by LF). In this case, set up a negative
1758          state to wait for one character to pass before continuing. */
1759    
1760          case OP_ANYNL:
1761          if (clen > 0) switch(c)
1762            {
1763            case 0x000b:
1764            case 0x000c:
1765            case 0x0085:
1766            case 0x2028:
1767            case 0x2029:
1768            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1769    
1770            case 0x000a:
1771            ADD_NEW(state_offset + 1, 0);
1772            break;
1773    
1774            case 0x000d:
1775            if (ptr + 1 < end_subject && ptr[1] == 0x0a)
1776              {
1777              ADD_NEW_DATA(-(state_offset + 1), 0, 1);
1778              }
1779            else
1780              {
1781              ADD_NEW(state_offset + 1, 0);
1782              }
1783            break;
1784            }
1785          break;
1786    
1787          /*-----------------------------------------------------------------*/
1788          case OP_NOT_VSPACE:
1789          if (clen > 0) switch(c)
1790            {
1791            case 0x000a:
1792            case 0x000b:
1793            case 0x000c:
1794            case 0x000d:
1795            case 0x0085:
1796            case 0x2028:
1797            case 0x2029:
1798            break;
1799    
1800            default:
1801            ADD_NEW(state_offset + 1, 0);
1802            break;
1803            }
1804          break;
1805    
1806          /*-----------------------------------------------------------------*/
1807          case OP_VSPACE:
1808          if (clen > 0) switch(c)
1809            {
1810            case 0x000a:
1811            case 0x000b:
1812            case 0x000c:
1813            case 0x000d:
1814            case 0x0085:
1815            case 0x2028:
1816            case 0x2029:
1817            ADD_NEW(state_offset + 1, 0);
1818            break;
1819    
1820            default: break;
1821            }
1822          break;
1823    
1824          /*-----------------------------------------------------------------*/
1825          case OP_NOT_HSPACE:
1826          if (clen > 0) switch(c)
1827            {
1828            case 0x09:      /* HT */
1829            case 0x20:      /* SPACE */
1830            case 0xa0:      /* NBSP */
1831            case 0x1680:    /* OGHAM SPACE MARK */
1832            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1833            case 0x2000:    /* EN QUAD */
1834            case 0x2001:    /* EM QUAD */
1835            case 0x2002:    /* EN SPACE */
1836            case 0x2003:    /* EM SPACE */
1837            case 0x2004:    /* THREE-PER-EM SPACE */
1838            case 0x2005:    /* FOUR-PER-EM SPACE */
1839            case 0x2006:    /* SIX-PER-EM SPACE */
1840            case 0x2007:    /* FIGURE SPACE */
1841            case 0x2008:    /* PUNCTUATION SPACE */
1842            case 0x2009:    /* THIN SPACE */
1843            case 0x200A:    /* HAIR SPACE */
1844            case 0x202f:    /* NARROW NO-BREAK SPACE */
1845            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1846            case 0x3000:    /* IDEOGRAPHIC SPACE */
1847            break;
1848    
1849            default:
1850            ADD_NEW(state_offset + 1, 0);
1851            break;
1852            }
1853          break;
1854    
1855          /*-----------------------------------------------------------------*/
1856          case OP_HSPACE:
1857          if (clen > 0) switch(c)
1858            {
1859            case 0x09:      /* HT */
1860            case 0x20:      /* SPACE */
1861            case 0xa0:      /* NBSP */
1862            case 0x1680:    /* OGHAM SPACE MARK */
1863            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1864            case 0x2000:    /* EN QUAD */
1865            case 0x2001:    /* EM QUAD */
1866            case 0x2002:    /* EN SPACE */
1867            case 0x2003:    /* EM SPACE */
1868            case 0x2004:    /* THREE-PER-EM SPACE */
1869            case 0x2005:    /* FOUR-PER-EM SPACE */
1870            case 0x2006:    /* SIX-PER-EM SPACE */
1871            case 0x2007:    /* FIGURE SPACE */
1872            case 0x2008:    /* PUNCTUATION SPACE */
1873            case 0x2009:    /* THIN SPACE */
1874            case 0x200A:    /* HAIR SPACE */
1875            case 0x202f:    /* NARROW NO-BREAK SPACE */
1876            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1877            case 0x3000:    /* IDEOGRAPHIC SPACE */
1878            ADD_NEW(state_offset + 1, 0);
1879            break;
1880            }
1881          break;
1882    
1883          /*-----------------------------------------------------------------*/
1884        /* Match a negated single character. This is only used for one-byte        /* Match a negated single character. This is only used for one-byte
1885        characters, that is, we know that d < 256. The character we are        characters, that is, we know that d < 256. The character we are
1886        checking (c) can be multibyte. */        checking (c) can be multibyte. */
# Line 1205  for (;;) Line 1888  for (;;)
1888        case OP_NOT:        case OP_NOT:
1889        if (clen > 0)        if (clen > 0)
1890          {          {
1891          int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;          unsigned int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;
1892          if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }          if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }
1893          }          }
1894        break;        break;
# Line 1213  for (;;) Line 1896  for (;;)
1896        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1897        case OP_PLUS:        case OP_PLUS:
1898        case OP_MINPLUS:        case OP_MINPLUS:
1899          case OP_POSPLUS:
1900        case OP_NOTPLUS:        case OP_NOTPLUS:
1901        case OP_NOTMINPLUS:        case OP_NOTMINPLUS:
1902          case OP_NOTPOSPLUS:
1903        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1904        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
1905        if (clen > 0)        if (clen > 0)
1906          {          {
1907          int otherd = -1;          unsigned int otherd = NOTACHAR;
1908          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1909            {            {
1910  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1234  for (;;) Line 1919  for (;;)
1919            otherd = fcc[d];            otherd = fcc[d];
1920            }            }
1921          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1922            { count++; ADD_NEW(state_offset, count); }            {
1923              if (count > 0 &&
1924                  (codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS))
1925                {
1926                active_count--;             /* Remove non-match possibility */
1927                next_active_state--;
1928                }
1929              count++;
1930              ADD_NEW(state_offset, count);
1931              }
1932          }          }
1933        break;        break;
1934    
1935        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1936        case OP_QUERY:        case OP_QUERY:
1937        case OP_MINQUERY:        case OP_MINQUERY:
1938          case OP_POSQUERY:
1939        case OP_NOTQUERY:        case OP_NOTQUERY:
1940        case OP_NOTMINQUERY:        case OP_NOTMINQUERY:
1941          case OP_NOTPOSQUERY:
1942        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
1943        if (clen > 0)        if (clen > 0)
1944          {          {
1945          int otherd = -1;          unsigned int otherd = NOTACHAR;
1946          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1947            {            {
1948  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1261  for (;;) Line 1957  for (;;)
1957            otherd = fcc[d];            otherd = fcc[d];
1958            }            }
1959          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1960            { ADD_NEW(state_offset + dlen + 1, 0); }            {
1961              if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY)
1962                {
1963                active_count--;            /* Remove non-match possibility */
1964                next_active_state--;
1965                }
1966              ADD_NEW(state_offset + dlen + 1, 0);
1967              }
1968          }          }
1969        break;        break;
1970    
1971        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1972        case OP_STAR:        case OP_STAR:
1973        case OP_MINSTAR:        case OP_MINSTAR:
1974          case OP_POSSTAR:
1975        case OP_NOTSTAR:        case OP_NOTSTAR:
1976        case OP_NOTMINSTAR:        case OP_NOTMINSTAR:
1977          case OP_NOTPOSSTAR:
1978        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
1979        if (clen > 0)        if (clen > 0)
1980          {          {
1981          int otherd = -1;          unsigned int otherd = NOTACHAR;
1982          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1983            {            {
1984  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1288  for (;;) Line 1993  for (;;)
1993            otherd = fcc[d];            otherd = fcc[d];
1994            }            }
1995          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1996            { ADD_NEW(state_offset, 0); }            {
1997              if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR)
1998                {
1999                active_count--;            /* Remove non-match possibility */
2000                next_active_state--;
2001                }
2002              ADD_NEW(state_offset, 0);
2003              }
2004          }          }
2005        break;        break;
2006    
2007        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2008        case OP_EXACT:        case OP_EXACT:
2009          case OP_NOTEXACT:
2010          count = current_state->count;  /* Number already matched */
2011          if (clen > 0)
2012            {
2013            unsigned int otherd = NOTACHAR;
2014            if ((ims & PCRE_CASELESS) != 0)
2015              {
2016    #ifdef SUPPORT_UTF8
2017              if (utf8 && d >= 128)
2018                {
2019    #ifdef SUPPORT_UCP
2020                otherd = _pcre_ucp_othercase(d);
2021    #endif  /* SUPPORT_UCP */
2022                }
2023              else
2024    #endif  /* SUPPORT_UTF8 */
2025              otherd = fcc[d];
2026              }
2027            if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2028              {
2029              if (++count >= GET2(code, 1))
2030                { ADD_NEW(state_offset + dlen + 3, 0); }
2031              else
2032                { ADD_NEW(state_offset, count); }
2033              }
2034            }
2035          break;
2036    
2037          /*-----------------------------------------------------------------*/
2038        case OP_UPTO:        case OP_UPTO:
2039        case OP_MINUPTO:        case OP_MINUPTO:
2040        case OP_NOTEXACT:        case OP_POSUPTO:
2041        case OP_NOTUPTO:        case OP_NOTUPTO:
2042        case OP_NOTMINUPTO:        case OP_NOTMINUPTO:
2043        if (codevalue != OP_EXACT && codevalue != OP_NOTEXACT)        case OP_NOTPOSUPTO:
2044          { ADD_ACTIVE(state_offset + dlen + 3, 0); }        ADD_ACTIVE(state_offset + dlen + 3, 0);
2045        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2046        if (clen > 0)        if (clen > 0)
2047          {          {
2048          int otherd = -1;          unsigned int otherd = NOTACHAR;
2049          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
2050            {            {
2051  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1320  for (;;) Line 2061  for (;;)
2061            }            }
2062          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2063            {            {
2064              if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO)
2065                {
2066                active_count--;             /* Remove non-match possibility */
2067                next_active_state--;
2068                }
2069            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2070              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 3, 0); }
2071            else            else
# Line 1414  for (;;) Line 2160  for (;;)
2160    
2161  /* ========================================================================== */  /* ========================================================================== */
2162        /* These are the opcodes for fancy brackets of various kinds. We have        /* These are the opcodes for fancy brackets of various kinds. We have
2163        to use recursion in order to handle them. */        to use recursion in order to handle them. The "always failing" assersion
2164          (?!) is optimised when compiling to OP_FAIL, so we have to support that,
2165          though the other "backtracking verbs" are not supported. */
2166    
2167          case OP_FAIL:
2168          break;
2169    
2170        case OP_ASSERT:        case OP_ASSERT:
2171        case OP_ASSERT_NOT:        case OP_ASSERT_NOT:
# Line 1448  for (;;) Line 2199  for (;;)
2199    
2200        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2201        case OP_COND:        case OP_COND:
2202          case OP_SCOND:
2203          {          {
2204          int local_offsets[1000];          int local_offsets[1000];
2205          int local_workspace[1000];          int local_workspace[1000];
2206          int condcode = code[LINK_SIZE+1];          int condcode = code[LINK_SIZE+1];
2207    
2208          /* The only supported version of OP_CREF is for the value 0xffff, which          /* Back reference conditions are not supported */
2209          means "test if in a recursion". */  
2210            if (condcode == OP_CREF) return PCRE_ERROR_DFA_UCOND;
2211    
2212          if (condcode == OP_CREF)          /* The DEFINE condition is always false */
2213    
2214            if (condcode == OP_DEF)
2215              {
2216              ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0);
2217              }
2218    
2219            /* The only supported version of OP_RREF is for the value RREF_ANY,
2220            which means "test if in any recursion". We can't test for specifically
2221            recursed groups. */
2222    
2223            else if (condcode == OP_RREF)
2224            {            {
2225            int value = GET2(code, LINK_SIZE+2);            int value = GET2(code, LINK_SIZE+2);
2226            if (value != 0xffff) return PCRE_ERROR_DFA_UCOND;            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
2227            if (recursing > 0) { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }            if (recursing > 0) { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }
2228              else { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }              else { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }
2229            }            }
# Line 1728  is not anchored. Line 2492  is not anchored.
2492    
2493  Arguments:  Arguments:
2494    argument_re     points to the compiled expression    argument_re     points to the compiled expression
2495    extra_data      points to extra data or is NULL (not currently used)    extra_data      points to extra data or is NULL
2496    subject         points to the subject string    subject         points to the subject string
2497    length          length of subject string (may contain binary zeros)    length          length of subject string (may contain binary zeros)
2498    start_offset    where to start in the subject string    start_offset    where to start in the subject string
# Line 1744  Returns:          > 0 => number of match Line 2508  Returns:          > 0 => number of match
2508                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
2509  */  */
2510    
2511  PCRE_DATA_SCOPE int  PCRE_EXP_DEFN int
2512  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
2513    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
2514    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
# Line 1834  md->end_subject = end_subject; Line 2598  md->end_subject = end_subject;
2598  md->moptions = options;  md->moptions = options;
2599  md->poptions = re->options;  md->poptions = re->options;
2600    
2601  /* Handle different types of newline. The two bits give four cases. If nothing  /* If the BSR option is not set at match time, copy what was set
2602  is set at run time, whatever was used at compile time applies. */  at compile time. */
2603    
2604  switch ((((options & PCRE_NEWLINE_CRLF) == 0)? re->options : options) &  if ((md->moptions & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 0)
          PCRE_NEWLINE_CRLF)  
2605    {    {
2606    default:              newline = NEWLINE; break;   /* Compile-time default */    if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
2607        md->moptions |= re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE);
2608    #ifdef BSR_ANYCRLF
2609      else md->moptions |= PCRE_BSR_ANYCRLF;
2610    #endif
2611      }
2612    
2613    /* Handle different types of newline. The three bits give eight cases. If
2614    nothing is set at run time, whatever was used at compile time applies. */
2615    
2616    switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
2617             PCRE_NEWLINE_BITS)
2618      {
2619      case 0: newline = NEWLINE; break;   /* Compile-time default */
2620    case PCRE_NEWLINE_CR: newline = '\r'; break;    case PCRE_NEWLINE_CR: newline = '\r'; break;
2621    case PCRE_NEWLINE_LF: newline = '\n'; break;    case PCRE_NEWLINE_LF: newline = '\n'; break;
2622    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
2623         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
2624      case PCRE_NEWLINE_ANY: newline = -1; break;
2625      case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
2626      default: return PCRE_ERROR_BADNEWLINE;
2627    }    }
2628    
2629  if (newline > 255)  if (newline == -2)
2630      {
2631      md->nltype = NLTYPE_ANYCRLF;
2632      }
2633    else if (newline < 0)
2634    {    {
2635    md->nllen = 2;    md->nltype = NLTYPE_ANY;
   md->nl[0] = (newline >> 8) & 255;  
   md->nl[1] = newline & 255;  
2636    }    }
2637  else  else
2638    {    {
2639    md->nllen = 1;    md->nltype = NLTYPE_FIXED;
2640    md->nl[0] = newline;    if (newline > 255)
2641        {
2642        md->nllen = 2;
2643        md->nl[0] = (newline >> 8) & 255;
2644        md->nl[1] = newline & 255;
2645        }
2646      else
2647        {
2648        md->nllen = 1;
2649        md->nl[0] = newline;
2650        }
2651    }    }
2652    
2653  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
# Line 1889  if (md->tables == NULL) md->tables = _pc Line 2680  if (md->tables == NULL) md->tables = _pc
2680  used in a loop when finding where to start. */  used in a loop when finding where to start. */
2681    
2682  lcc = md->tables + lcc_offset;  lcc = md->tables + lcc_offset;
2683  startline = (re->options & PCRE_STARTLINE) != 0;  startline = (re->flags & PCRE_STARTLINE) != 0;
2684  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
2685    
2686  /* Set up the first character to match, if available. The first_byte value is  /* Set up the first character to match, if available. The first_byte value is
# Line 1900  studied, there may be a bitmap of possib Line 2691  studied, there may be a bitmap of possib
2691    
2692  if (!anchored)  if (!anchored)
2693    {    {
2694    if ((re->options & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
2695      {      {
2696      first_byte = re->first_byte & 255;      first_byte = re->first_byte & 255;
2697      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
# Line 1917  if (!anchored) Line 2708  if (!anchored)
2708  /* For anchored or unanchored matches, there may be a "last known required  /* For anchored or unanchored matches, there may be a "last known required
2709  character" set. */  character" set. */
2710    
2711  if ((re->options & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
2712    {    {
2713    req_byte = re->req_byte & 255;    req_byte = re->req_byte & 255;
2714    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
# Line 1946  for (;;) Line 2737  for (;;)
2737      if (firstline)      if (firstline)
2738        {        {
2739        const uschar *t = current_subject;        const uschar *t = current_subject;
2740        while (t <= save_end_subject - md->nllen && !IS_NEWLINE(t)) t++;        while (t < md->end_subject && !IS_NEWLINE(t)) t++;
2741        end_subject = t;        end_subject = t;
2742        }        }
2743    
# Line 1965  for (;;) Line 2756  for (;;)
2756    
2757      else if (startline)      else if (startline)
2758        {        {
2759        if (current_subject > md->start_subject + md->nllen +        if (current_subject > md->start_subject + start_offset)
           start_offset)  
2760          {          {
2761          while (current_subject <= end_subject &&          while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))
2762                 !IS_NEWLINE(current_subject - md->nllen))            current_subject++;
2763    
2764            /* If we have just passed a CR and the newline option is ANY or
2765            ANYCRLF, and we are now at a LF, advance the match position by one more
2766            character. */
2767    
2768            if (current_subject[-1] == '\r' &&
2769                 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
2770                 current_subject < end_subject &&
2771                 *current_subject == '\n')
2772            current_subject++;            current_subject++;
2773          }          }
2774        }        }
# Line 2070  for (;;) Line 2869  for (;;)
2869    /* Advance to the next subject character unless we are at the end of a line    /* Advance to the next subject character unless we are at the end of a line
2870    and firstline is set. */    and firstline is set. */
2871    
2872    if (firstline &&    if (firstline && IS_NEWLINE(current_subject)) break;
       current_subject <= end_subject - md->nllen &&  
       IS_NEWLINE(current_subject)) break;  
2873    current_subject++;    current_subject++;
2874    if (utf8)    if (utf8)
2875      {      {
# Line 2080  for (;;) Line 2877  for (;;)
2877        current_subject++;        current_subject++;
2878      }      }
2879    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
2880    }  
2881      /* If we have just passed a CR and we are now at a LF, and the pattern does
2882      not contain any explicit matches for \r or \n, and the newline option is CRLF
2883      or ANY or ANYCRLF, advance the match position by one more character. */
2884    
2885      if (current_subject[-1] == '\r' &&
2886          current_subject < end_subject &&
2887          *current_subject == '\n' &&
2888          (re->flags & PCRE_HASCRORLF) == 0 &&
2889            (md->nltype == NLTYPE_ANY ||
2890             md->nltype == NLTYPE_ANYCRLF ||
2891             md->nllen == 2))
2892        current_subject++;
2893    
2894      }   /* "Bumpalong" loop */
2895    
2896  return PCRE_ERROR_NOMATCH;  return PCRE_ERROR_NOMATCH;
2897  }  }

Legend:
Removed from v.91  
changed lines
  Added in v.342

  ViewVC Help
Powered by ViewVC 1.1.5