/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 91 by nigel, Sat Feb 24 21:41:34 2007 UTC revision 178 by ph10, Wed Jun 13 08:44:34 2007 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 39  POSSIBILITY OF SUCH DAMAGE. Line 39  POSSIBILITY OF SUCH DAMAGE.
39    
40    
41  /* This module contains the external function pcre_dfa_exec(), which is an  /* This module contains the external function pcre_dfa_exec(), which is an
42  alternative matching function that uses a DFA algorithm. This is NOT Perl-  alternative matching function that uses a sort of DFA algorithm (not a true
43  compatible, but it has advantages in certain applications. */  FSM). This is NOT Perl- compatible, but it has advantages in certain
44    applications. */
45    
46    
47  #define NLBLOCK md           /* The block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
48    #define PSSTART start_subject  /* Field containing processed string start */
49    #define PSEND   end_subject    /* Field containing processed string end */
50    
51  #include "pcre_internal.h"  #include "pcre_internal.h"
52    
53    
# Line 58  compatible, but it has advantages in cer Line 62  compatible, but it has advantages in cer
62  *************************************************/  *************************************************/
63    
64  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
65  into others, under special conditions. A gap of 10 between the blocks should be  into others, under special conditions. A gap of 20 between the blocks should be
66  enough. */  enough. The resulting opcodes don't have to be less than 256 because they are
67    never stored, so we push them well clear of the normal opcodes. */
68  #define OP_PROP_EXTRA    (EXTRACT_BASIC_MAX+1)  
69  #define OP_EXTUNI_EXTRA  (EXTRACT_BASIC_MAX+11)  #define OP_PROP_EXTRA       300
70    #define OP_EXTUNI_EXTRA     320
71    #define OP_ANYNL_EXTRA      340
72    #define OP_HSPACE_EXTRA     360
73    #define OP_VSPACE_EXTRA     380
74    
75    
76  /* This table identifies those opcodes that are followed immediately by a  /* This table identifies those opcodes that are followed immediately by a
77  character that is to be tested in some way. This makes is possible to  character that is to be tested in some way. This makes is possible to
78  centralize the loading of these characters. In the case of Type * etc, the  centralize the loading of these characters. In the case of Type * etc, the
79  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
80  small value. */  small value. ***NOTE*** If the start of this table is modified, the two tables
81    that follow must also be modified. */
82    
83  static uschar coptable[] = {  static uschar coptable[] = {
84    0,                             /* End                                    */    0,                             /* End                                    */
85    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* \A, \G, \B, \b, \D, \d, \S, \s, \W, \w */    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
86      0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
87    0, 0,                          /* Any, Anybyte                           */    0, 0,                          /* Any, Anybyte                           */
88    0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */    0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */
89      0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
90    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */
91    1,                             /* Char                                   */    1,                             /* Char                                   */
92    1,                             /* Charnc                                 */    1,                             /* Charnc                                 */
# Line 83  static uschar coptable[] = { Line 94  static uschar coptable[] = {
94    /* Positive single-char repeats                                          */    /* Positive single-char repeats                                          */
95    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
96    3, 3, 3,                       /* upto, minupto, exact                   */    3, 3, 3,                       /* upto, minupto, exact                   */
97      1, 1, 1, 3,                    /* *+, ++, ?+, upto+                      */
98    /* Negative single-char repeats - only for chars < 256                   */    /* Negative single-char repeats - only for chars < 256                   */
99    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
100    3, 3, 3,                       /* NOT upto, minupto, exact               */    3, 3, 3,                       /* NOT upto, minupto, exact               */
101      1, 1, 1, 3,                    /* NOT *+, ++, ?+, updo+                  */
102    /* Positive type repeats                                                 */    /* Positive type repeats                                                 */
103    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
104    3, 3, 3,                       /* Type upto, minupto, exact              */    3, 3, 3,                       /* Type upto, minupto, exact              */
105      1, 1, 1, 3,                    /* Type *+, ++, ?+, upto+                 */
106    /* Character class & ref repeats                                         */    /* Character class & ref repeats                                         */
107    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */
108    0, 0,                          /* CRRANGE, CRMINRANGE                    */    0, 0,                          /* CRRANGE, CRMINRANGE                    */
# Line 107  static uschar coptable[] = { Line 121  static uschar coptable[] = {
121    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
122    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
123    0,                             /* Reverse                                */    0,                             /* Reverse                                */
124    0,                             /* Once                                   */    0, 0, 0, 0,                    /* ONCE, BRA, CBRA, COND                  */
125    0,                             /* COND                                   */    0, 0, 0,                       /* SBRA, SCBRA, SCOND                     */
126    0,                             /* CREF                                   */    0,                             /* CREF                                   */
127    0, 0,                          /* BRAZERO, BRAMINZERO                    */    0,                             /* RREF                                   */
128    0,                             /* BRANUMBER                              */    0,                             /* DEF                                    */
129    0                              /* BRA                                    */    0, 0                           /* BRAZERO, BRAMINZERO                    */
130  };  };
131    
132  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
133  and \w */  and \w */
134    
135  static uschar toptable1[] = {  static uschar toptable1[] = {
136    0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
137    ctype_digit, ctype_digit,    ctype_digit, ctype_digit,
138    ctype_space, ctype_space,    ctype_space, ctype_space,
139    ctype_word,  ctype_word,    ctype_word,  ctype_word,
# Line 127  static uschar toptable1[] = { Line 141  static uschar toptable1[] = {
141  };  };
142    
143  static uschar toptable2[] = {  static uschar toptable2[] = {
144    0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
145    ctype_digit, 0,    ctype_digit, 0,
146    ctype_space, 0,    ctype_space, 0,
147    ctype_word,  0,    ctype_word,  0,
# Line 278  stateblock *next_active_state, *next_new Line 292  stateblock *next_active_state, *next_new
292    
293  const uschar *ctypes, *lcc, *fcc;  const uschar *ctypes, *lcc, *fcc;
294  const uschar *ptr;  const uschar *ptr;
295  const uschar *end_code;  const uschar *end_code, *first_op;
296    
297  int active_count, new_count, match_count;  int active_count, new_count, match_count;
298    
# Line 291  const uschar *start_code = md->start_cod Line 305  const uschar *start_code = md->start_cod
305    
306  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
307  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;
308    #else
309    BOOL utf8 = FALSE;
310  #endif  #endif
311    
312  rlevel++;  rlevel++;
# Line 314  active_states = (stateblock *)(workspace Line 330  active_states = (stateblock *)(workspace
330  next_new_state = new_states = active_states + wscount;  next_new_state = new_states = active_states + wscount;
331  new_count = 0;  new_count = 0;
332    
333    first_op = this_start_code + 1 + LINK_SIZE +
334      ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
335    
336  /* The first thing in any (sub) pattern is a bracket of some sort. Push all  /* The first thing in any (sub) pattern is a bracket of some sort. Push all
337  the alternative states onto the list, and find out where the end is. This  the alternative states onto the list, and find out where the end is. This
338  makes is possible to use this function recursively, when we want to stop at a  makes is possible to use this function recursively, when we want to stop at a
# Line 323  If the first opcode in the first alterna Line 342  If the first opcode in the first alterna
342  a backward assertion. In that case, we have to find out the maximum amount to  a backward assertion. In that case, we have to find out the maximum amount to
343  move back, and set up each alternative appropriately. */  move back, and set up each alternative appropriately. */
344    
345  if (this_start_code[1+LINK_SIZE] == OP_REVERSE)  if (*first_op == OP_REVERSE)
346    {    {
347    int max_back = 0;    int max_back = 0;
348    int gone_back;    int gone_back;
# Line 405  else Line 424  else
424    
425    else    else
426      {      {
427        int length = 1 + LINK_SIZE +
428          ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
429      do      do
430        {        {
431        ADD_NEW(end_code - start_code + 1 + LINK_SIZE, 0);        ADD_NEW(end_code - start_code + length, 0);
432        end_code += GET(end_code, 1);        end_code += GET(end_code, 1);
433          length = 1 + LINK_SIZE;
434        }        }
435      while (*end_code == OP_ALT);      while (*end_code == OP_ALT);
436      }      }
# Line 461  for (;;) Line 483  for (;;)
483    
484    if (ptr < end_subject)    if (ptr < end_subject)
485      {      {
486      clen = 1;      clen = 1;        /* Number of bytes in the character */
487  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
488      if (utf8) { GETCHARLEN(c, ptr, clen); } else      if (utf8) { GETCHARLEN(c, ptr, clen); } else
489  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
# Line 469  for (;;) Line 491  for (;;)
491      }      }
492    else    else
493      {      {
494      clen = 0;    /* At end subject */      clen = 0;        /* This indicates the end of the subject */
495      c = -1;      c = NOTACHAR;    /* This value should never actually be used */
496      }      }
497    
498    /* Scan up the active states and act on each one. The result of an action    /* Scan up the active states and act on each one. The result of an action
# Line 484  for (;;) Line 506  for (;;)
506      const uschar *code;      const uschar *code;
507      int state_offset = current_state->offset;      int state_offset = current_state->offset;
508      int count, codevalue;      int count, codevalue;
509    #ifdef SUPPORT_UCP
510      int chartype, script;      int chartype, script;
511    #endif
512    
513  #ifdef DEBUG  #ifdef DEBUG
514      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
515      if (c < 0) printf("-1\n");      if (clen == 0) printf("EOL\n");
516        else if (c > 32 && c < 127) printf("'%c'\n", c);        else if (c > 32 && c < 127) printf("'%c'\n", c);
517          else printf("0x%02x\n", c);          else printf("0x%02x\n", c);
518  #endif  #endif
# Line 532  for (;;) Line 556  for (;;)
556    
557      code = start_code + state_offset;      code = start_code + state_offset;
558      codevalue = *code;      codevalue = *code;
     if (codevalue >= OP_BRA) codevalue = OP_BRA; /* All brackets are equal */  
559    
560      /* If this opcode is followed by an inline character, load it. It is      /* If this opcode is followed by an inline character, load it. It is
561      tempting to test for the presence of a subject character here, but that      tempting to test for the presence of a subject character here, but that
# Line 540  for (;;) Line 563  for (;;)
563      permitted.      permitted.
564    
565      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
566      argument that is not a data character - but is always one byte long.      argument that is not a data character - but is always one byte long. We
567      Unfortunately, we have to take special action to deal with  \P, \p, and      have to take special action to deal with  \P, \p, \H, \h, \V, \v and \X in
568      \X in this case. To keep the other cases fast, convert these ones to new      this case. To keep the other cases fast, convert these ones to new opcodes.
569      opcodes. */      */
570    
571      if (coptable[codevalue] > 0)      if (coptable[codevalue] > 0)
572        {        {
# Line 554  for (;;) Line 577  for (;;)
577        d = code[coptable[codevalue]];        d = code[coptable[codevalue]];
578        if (codevalue >= OP_TYPESTAR)        if (codevalue >= OP_TYPESTAR)
579          {          {
580          if (d == OP_ANYBYTE) return PCRE_ERROR_DFA_UITEM;          switch(d)
581          if (d >= OP_NOTPROP)            {
582            codevalue += (d == OP_EXTUNI)? OP_EXTUNI_EXTRA : OP_PROP_EXTRA;            case OP_ANYBYTE: return PCRE_ERROR_DFA_UITEM;
583              case OP_NOTPROP:
584              case OP_PROP: codevalue += OP_PROP_EXTRA; break;
585              case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
586              case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
587              case OP_NOT_HSPACE:
588              case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
589              case OP_NOT_VSPACE:
590              case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
591              default: break;
592              }
593          }          }
594        }        }
595      else      else
596        {        {
597        dlen = 0;         /* Not strictly necessary, but compilers moan */        dlen = 0;         /* Not strictly necessary, but compilers moan */
598        d = -1;           /* if these variables are not set. */        d = NOTACHAR;     /* if these variables are not set. */
599        }        }
600    
601    
# Line 624  for (;;) Line 657  for (;;)
657    
658        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
659        case OP_BRA:        case OP_BRA:
660          case OP_SBRA:
661        do        do
662          {          {
663          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
# Line 633  for (;;) Line 667  for (;;)
667        break;        break;
668    
669        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
670          case OP_CBRA:
671          case OP_SCBRA:
672          ADD_ACTIVE(code - start_code + 3 + LINK_SIZE,  0);
673          code += GET(code, 1);
674          while (*code == OP_ALT)
675            {
676            ADD_ACTIVE(code - start_code + 1 + LINK_SIZE,  0);
677            code += GET(code, 1);
678            }
679          break;
680    
681          /*-----------------------------------------------------------------*/
682        case OP_BRAZERO:        case OP_BRAZERO:
683        case OP_BRAMINZERO:        case OP_BRAMINZERO:
684        ADD_ACTIVE(state_offset + 1, 0);        ADD_ACTIVE(state_offset + 1, 0);
# Line 642  for (;;) Line 688  for (;;)
688        break;        break;
689    
690        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
       case OP_BRANUMBER:  
       ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);  
       break;  
   
       /*-----------------------------------------------------------------*/  
691        case OP_CIRC:        case OP_CIRC:
692        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
693            ((ims & PCRE_MULTILINE) != 0 &&            ((ims & PCRE_MULTILINE) != 0 &&
             ptr >= start_subject + md->nllen &&  
694              ptr != end_subject &&              ptr != end_subject &&
695              IS_NEWLINE(ptr - md->nllen)))              WAS_NEWLINE(ptr)))
696          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
697        break;        break;
698    
# Line 686  for (;;) Line 726  for (;;)
726    
727        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
728        case OP_ANY:        case OP_ANY:
729        if (clen > 0 && ((ims & PCRE_DOTALL) != 0 ||        if (clen > 0 && ((ims & PCRE_DOTALL) != 0 || !IS_NEWLINE(ptr)))
                        ptr > end_subject - md->nllen ||  
                        !IS_NEWLINE(ptr)))  
730          { ADD_NEW(state_offset + 1, 0); }          { ADD_NEW(state_offset + 1, 0); }
731        break;        break;
732    
733        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
734        case OP_EODN:        case OP_EODN:
735        if (clen == 0 ||        if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
            (ptr == end_subject - md->nllen && IS_NEWLINE(ptr)))  
736          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
737        break;        break;
738    
# Line 704  for (;;) Line 741  for (;;)
741        if ((md->moptions & PCRE_NOTEOL) == 0)        if ((md->moptions & PCRE_NOTEOL) == 0)
742          {          {
743          if (clen == 0 ||          if (clen == 0 ||
744              (ptr <= end_subject - md->nllen && IS_NEWLINE(ptr) &&              (IS_NEWLINE(ptr) &&
745                 ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)                 ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)
746              ))              ))
747            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
748          }          }
749        else if ((ims & PCRE_MULTILINE) != 0 &&        else if ((ims & PCRE_MULTILINE) != 0 && IS_NEWLINE(ptr))
                ptr <= end_subject - md->nllen && IS_NEWLINE(ptr))  
750          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
751        break;        break;
752    
# Line 759  for (;;) Line 795  for (;;)
795        break;        break;
796    
797    
 #ifdef SUPPORT_UCP  
   
798        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
799        /* Check the next character by Unicode property. We will get here only        /* Check the next character by Unicode property. We will get here only
800        if the support is in the binary; otherwise a compile-time error occurs.        if the support is in the binary; otherwise a compile-time error occurs.
801        */        */
802    
803    #ifdef SUPPORT_UCP
804        case OP_PROP:        case OP_PROP:
805        case OP_NOTPROP:        case OP_NOTPROP:
806        if (clen > 0)        if (clen > 0)
# Line 816  for (;;) Line 851  for (;;)
851    
852        case OP_TYPEPLUS:        case OP_TYPEPLUS:
853        case OP_TYPEMINPLUS:        case OP_TYPEMINPLUS:
854          case OP_TYPEPOSPLUS:
855        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
856        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
857        if (clen > 0)        if (clen > 0)
# Line 824  for (;;) Line 860  for (;;)
860              (c < 256 &&              (c < 256 &&
861                (d != OP_ANY ||                (d != OP_ANY ||
862                 (ims & PCRE_DOTALL) != 0 ||                 (ims & PCRE_DOTALL) != 0 ||
                ptr > end_subject - md->nllen ||  
863                 !IS_NEWLINE(ptr)                 !IS_NEWLINE(ptr)
864                ) &&                ) &&
865                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
866            {            {
867              if (count > 0 && codevalue == OP_TYPEPOSPLUS)
868                {
869                active_count--;            /* Remove non-match possibility */
870                next_active_state--;
871                }
872            count++;            count++;
873            ADD_NEW(state_offset, count);            ADD_NEW(state_offset, count);
874            }            }
# Line 838  for (;;) Line 878  for (;;)
878        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
879        case OP_TYPEQUERY:        case OP_TYPEQUERY:
880        case OP_TYPEMINQUERY:        case OP_TYPEMINQUERY:
881          case OP_TYPEPOSQUERY:
882        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
883        if (clen > 0)        if (clen > 0)
884          {          {
# Line 845  for (;;) Line 886  for (;;)
886              (c < 256 &&              (c < 256 &&
887                (d != OP_ANY ||                (d != OP_ANY ||
888                 (ims & PCRE_DOTALL) != 0 ||                 (ims & PCRE_DOTALL) != 0 ||
                ptr > end_subject - md->nllen ||  
889                 !IS_NEWLINE(ptr)                 !IS_NEWLINE(ptr)
890                ) &&                ) &&
891                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
892            {            {
893              if (codevalue == OP_TYPEPOSQUERY)
894                {
895                active_count--;            /* Remove non-match possibility */
896                next_active_state--;
897                }
898            ADD_NEW(state_offset + 2, 0);            ADD_NEW(state_offset + 2, 0);
899            }            }
900          }          }
# Line 858  for (;;) Line 903  for (;;)
903        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
904        case OP_TYPESTAR:        case OP_TYPESTAR:
905        case OP_TYPEMINSTAR:        case OP_TYPEMINSTAR:
906          case OP_TYPEPOSSTAR:
907        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
908        if (clen > 0)        if (clen > 0)
909          {          {
# Line 865  for (;;) Line 911  for (;;)
911              (c < 256 &&              (c < 256 &&
912                (d != OP_ANY ||                (d != OP_ANY ||
913                 (ims & PCRE_DOTALL) != 0 ||                 (ims & PCRE_DOTALL) != 0 ||
                ptr > end_subject - md->nllen ||  
914                 !IS_NEWLINE(ptr)                 !IS_NEWLINE(ptr)
915                ) &&                ) &&
916                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
917            {            {
918              if (codevalue == OP_TYPEPOSSTAR)
919                {
920                active_count--;            /* Remove non-match possibility */
921                next_active_state--;
922                }
923            ADD_NEW(state_offset, 0);            ADD_NEW(state_offset, 0);
924            }            }
925          }          }
# Line 877  for (;;) Line 927  for (;;)
927    
928        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
929        case OP_TYPEEXACT:        case OP_TYPEEXACT:
930          count = current_state->count;  /* Number already matched */
931          if (clen > 0)
932            {
933            if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
934                (c < 256 &&
935                  (d != OP_ANY ||
936                   (ims & PCRE_DOTALL) != 0 ||
937                   !IS_NEWLINE(ptr)
938                  ) &&
939                  ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
940              {
941              if (++count >= GET2(code, 1))
942                { ADD_NEW(state_offset + 4, 0); }
943              else
944                { ADD_NEW(state_offset, count); }
945              }
946            }
947          break;
948    
949          /*-----------------------------------------------------------------*/
950        case OP_TYPEUPTO:        case OP_TYPEUPTO:
951        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
952        if (codevalue != OP_TYPEEXACT)        case OP_TYPEPOSUPTO:
953          { ADD_ACTIVE(state_offset + 4, 0); }        ADD_ACTIVE(state_offset + 4, 0);
954        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
955        if (clen > 0)        if (clen > 0)
956          {          {
# Line 888  for (;;) Line 958  for (;;)
958              (c < 256 &&              (c < 256 &&
959                (d != OP_ANY ||                (d != OP_ANY ||
960                 (ims & PCRE_DOTALL) != 0 ||                 (ims & PCRE_DOTALL) != 0 ||
                ptr > end_subject - md->nllen ||  
961                 !IS_NEWLINE(ptr)                 !IS_NEWLINE(ptr)
962                ) &&                ) &&
963                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
964            {            {
965              if (codevalue == OP_TYPEPOSUPTO)
966                {
967                active_count--;           /* Remove non-match possibility */
968                next_active_state--;
969                }
970            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
971              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 4, 0); }
972            else            else
# Line 903  for (;;) Line 977  for (;;)
977    
978  /* ========================================================================== */  /* ========================================================================== */
979        /* These are virtual opcodes that are used when something like        /* These are virtual opcodes that are used when something like
980        OP_TYPEPLUS has OP_PROP, OP_NOTPROP, or OP_EXTUNI as its argument. It        OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its
981        keeps the code above fast for the other cases. The argument is in the        argument. It keeps the code above fast for the other cases. The argument
982        d variable. */        is in the d variable. */
983    
984    #ifdef SUPPORT_UCP
985        case OP_PROP_EXTRA + OP_TYPEPLUS:        case OP_PROP_EXTRA + OP_TYPEPLUS:
986        case OP_PROP_EXTRA + OP_TYPEMINPLUS:        case OP_PROP_EXTRA + OP_TYPEMINPLUS:
987          case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
988        count = current_state->count;           /* Already matched */        count = current_state->count;           /* Already matched */
989        if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
990        if (clen > 0)        if (clen > 0)
# Line 944  for (;;) Line 1020  for (;;)
1020            break;            break;
1021            }            }
1022    
1023          if (OK == (d == OP_PROP)) { count++; ADD_NEW(state_offset, count); }          if (OK == (d == OP_PROP))
1024              {
1025              if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
1026                {
1027                active_count--;           /* Remove non-match possibility */
1028                next_active_state--;
1029                }
1030              count++;
1031              ADD_NEW(state_offset, count);
1032              }
1033          }          }
1034        break;        break;
1035    
1036        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1037        case OP_EXTUNI_EXTRA + OP_TYPEPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
1038        case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
1039          case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1040        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1041        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1042        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
1043          {          {
1044          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1045          int ncount = 0;          int ncount = 0;
1046            if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
1047              {
1048              active_count--;           /* Remove non-match possibility */
1049              next_active_state--;
1050              }
1051          while (nptr < end_subject)          while (nptr < end_subject)
1052            {            {
1053            int nd;            int nd;
# Line 970  for (;;) Line 1061  for (;;)
1061          ADD_NEW_DATA(-state_offset, count, ncount);          ADD_NEW_DATA(-state_offset, count, ncount);
1062          }          }
1063        break;        break;
1064    #endif
1065    
1066          /*-----------------------------------------------------------------*/
1067          case OP_ANYNL_EXTRA + OP_TYPEPLUS:
1068          case OP_ANYNL_EXTRA + OP_TYPEMINPLUS:
1069          case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS:
1070          count = current_state->count;  /* Already matched */
1071          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1072          if (clen > 0)
1073            {
1074            int ncount = 0;
1075            switch (c)
1076              {
1077              case 0x000d:
1078              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1079              /* Fall through */
1080              case 0x000a:
1081              case 0x000b:
1082              case 0x000c:
1083              case 0x0085:
1084              case 0x2028:
1085              case 0x2029:
1086              if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1087                {
1088                active_count--;           /* Remove non-match possibility */
1089                next_active_state--;
1090                }
1091              count++;
1092              ADD_NEW_DATA(-state_offset, count, ncount);
1093              break;
1094              default:
1095              break;
1096              }
1097            }
1098          break;
1099    
1100          /*-----------------------------------------------------------------*/
1101          case OP_VSPACE_EXTRA + OP_TYPEPLUS:
1102          case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
1103          case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
1104          count = current_state->count;  /* Already matched */
1105          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1106          if (clen > 0)
1107            {
1108            BOOL OK;
1109            switch (c)
1110              {
1111              case 0x000a:
1112              case 0x000b:
1113              case 0x000c:
1114              case 0x000d:
1115              case 0x0085:
1116              case 0x2028:
1117              case 0x2029:
1118              OK = TRUE;
1119              break;
1120    
1121              default:
1122              OK = FALSE;
1123              break;
1124              }
1125    
1126            if (OK == (d == OP_VSPACE))
1127              {
1128              if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
1129                {
1130                active_count--;           /* Remove non-match possibility */
1131                next_active_state--;
1132                }
1133              count++;
1134              ADD_NEW_DATA(-state_offset, count, 0);
1135              }
1136            }
1137          break;
1138    
1139          /*-----------------------------------------------------------------*/
1140          case OP_HSPACE_EXTRA + OP_TYPEPLUS:
1141          case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
1142          case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
1143          count = current_state->count;  /* Already matched */
1144          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1145          if (clen > 0)
1146            {
1147            BOOL OK;
1148            switch (c)
1149              {
1150              case 0x09:      /* HT */
1151              case 0x20:      /* SPACE */
1152              case 0xa0:      /* NBSP */
1153              case 0x1680:    /* OGHAM SPACE MARK */
1154              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1155              case 0x2000:    /* EN QUAD */
1156              case 0x2001:    /* EM QUAD */
1157              case 0x2002:    /* EN SPACE */
1158              case 0x2003:    /* EM SPACE */
1159              case 0x2004:    /* THREE-PER-EM SPACE */
1160              case 0x2005:    /* FOUR-PER-EM SPACE */
1161              case 0x2006:    /* SIX-PER-EM SPACE */
1162              case 0x2007:    /* FIGURE SPACE */
1163              case 0x2008:    /* PUNCTUATION SPACE */
1164              case 0x2009:    /* THIN SPACE */
1165              case 0x200A:    /* HAIR SPACE */
1166              case 0x202f:    /* NARROW NO-BREAK SPACE */
1167              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1168              case 0x3000:    /* IDEOGRAPHIC SPACE */
1169              OK = TRUE;
1170              break;
1171    
1172              default:
1173              OK = FALSE;
1174              break;
1175              }
1176    
1177            if (OK == (d == OP_HSPACE))
1178              {
1179              if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
1180                {
1181                active_count--;           /* Remove non-match possibility */
1182                next_active_state--;
1183                }
1184              count++;
1185              ADD_NEW_DATA(-state_offset, count, 0);
1186              }
1187            }
1188          break;
1189    
1190        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1191    #ifdef SUPPORT_UCP
1192        case OP_PROP_EXTRA + OP_TYPEQUERY:        case OP_PROP_EXTRA + OP_TYPEQUERY:
1193        case OP_PROP_EXTRA + OP_TYPEMINQUERY:        case OP_PROP_EXTRA + OP_TYPEMINQUERY:
1194          case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
1195        count = 4;        count = 4;
1196        goto QS1;        goto QS1;
1197    
1198        case OP_PROP_EXTRA + OP_TYPESTAR:        case OP_PROP_EXTRA + OP_TYPESTAR:
1199        case OP_PROP_EXTRA + OP_TYPEMINSTAR:        case OP_PROP_EXTRA + OP_TYPEMINSTAR:
1200          case OP_PROP_EXTRA + OP_TYPEPOSSTAR:
1201        count = 0;        count = 0;
1202    
1203        QS1:        QS1:
# Line 1017  for (;;) Line 1236  for (;;)
1236            break;            break;
1237            }            }
1238    
1239          if (OK == (d == OP_PROP)) { ADD_NEW(state_offset + count, 0); }          if (OK == (d == OP_PROP))
1240              {
1241              if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR ||
1242                  codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
1243                {
1244                active_count--;           /* Remove non-match possibility */
1245                next_active_state--;
1246                }
1247              ADD_NEW(state_offset + count, 0);
1248              }
1249          }          }
1250        break;        break;
1251    
1252        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1253        case OP_EXTUNI_EXTRA + OP_TYPEQUERY:        case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
1254        case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:        case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
1255          case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY:
1256        count = 2;        count = 2;
1257        goto QS2;        goto QS2;
1258    
1259        case OP_EXTUNI_EXTRA + OP_TYPESTAR:        case OP_EXTUNI_EXTRA + OP_TYPESTAR:
1260        case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:        case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
1261          case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR:
1262        count = 0;        count = 0;
1263    
1264        QS2:        QS2:
# Line 1038  for (;;) Line 1268  for (;;)
1268          {          {
1269          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1270          int ncount = 0;          int ncount = 0;
1271            if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
1272                codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
1273              {
1274              active_count--;           /* Remove non-match possibility */
1275              next_active_state--;
1276              }
1277          while (nptr < end_subject)          while (nptr < end_subject)
1278            {            {
1279            int nd;            int nd;
# Line 1050  for (;;) Line 1286  for (;;)
1286          ADD_NEW_DATA(-(state_offset + count), 0, ncount);          ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1287          }          }
1288        break;        break;
1289    #endif
1290    
1291          /*-----------------------------------------------------------------*/
1292          case OP_ANYNL_EXTRA + OP_TYPEQUERY:
1293          case OP_ANYNL_EXTRA + OP_TYPEMINQUERY:
1294          case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY:
1295          count = 2;
1296          goto QS3;
1297    
1298          case OP_ANYNL_EXTRA + OP_TYPESTAR:
1299          case OP_ANYNL_EXTRA + OP_TYPEMINSTAR:
1300          case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR:
1301          count = 0;
1302    
1303          QS3:
1304          ADD_ACTIVE(state_offset + 2, 0);
1305          if (clen > 0)
1306            {
1307            int ncount = 0;
1308            switch (c)
1309              {
1310              case 0x000d:
1311              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1312              /* Fall through */
1313              case 0x000a:
1314              case 0x000b:
1315              case 0x000c:
1316              case 0x0085:
1317              case 0x2028:
1318              case 0x2029:
1319              if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1320                  codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1321                {
1322                active_count--;           /* Remove non-match possibility */
1323                next_active_state--;
1324                }
1325              ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1326              break;
1327              default:
1328              break;
1329              }
1330            }
1331          break;
1332    
1333          /*-----------------------------------------------------------------*/
1334          case OP_VSPACE_EXTRA + OP_TYPEQUERY:
1335          case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
1336          case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
1337          count = 2;
1338          goto QS4;
1339    
1340          case OP_VSPACE_EXTRA + OP_TYPESTAR:
1341          case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
1342          case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
1343          count = 0;
1344    
1345          QS4:
1346          ADD_ACTIVE(state_offset + 2, 0);
1347          if (clen > 0)
1348            {
1349            BOOL OK;
1350            switch (c)
1351              {
1352              case 0x000a:
1353              case 0x000b:
1354              case 0x000c:
1355              case 0x000d:
1356              case 0x0085:
1357              case 0x2028:
1358              case 0x2029:
1359              OK = TRUE;
1360              break;
1361    
1362              default:
1363              OK = FALSE;
1364              break;
1365              }
1366            if (OK == (d == OP_VSPACE))
1367              {
1368              if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
1369                  codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
1370                {
1371                active_count--;           /* Remove non-match possibility */
1372                next_active_state--;
1373                }
1374              ADD_NEW_DATA(-(state_offset + count), 0, 0);
1375              }
1376            }
1377          break;
1378    
1379          /*-----------------------------------------------------------------*/
1380          case OP_HSPACE_EXTRA + OP_TYPEQUERY:
1381          case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
1382          case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
1383          count = 2;
1384          goto QS5;
1385    
1386          case OP_HSPACE_EXTRA + OP_TYPESTAR:
1387          case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
1388          case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
1389          count = 0;
1390    
1391          QS5:
1392          ADD_ACTIVE(state_offset + 2, 0);
1393          if (clen > 0)
1394            {
1395            BOOL OK;
1396            switch (c)
1397              {
1398              case 0x09:      /* HT */
1399              case 0x20:      /* SPACE */
1400              case 0xa0:      /* NBSP */
1401              case 0x1680:    /* OGHAM SPACE MARK */
1402              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1403              case 0x2000:    /* EN QUAD */
1404              case 0x2001:    /* EM QUAD */
1405              case 0x2002:    /* EN SPACE */
1406              case 0x2003:    /* EM SPACE */
1407              case 0x2004:    /* THREE-PER-EM SPACE */
1408              case 0x2005:    /* FOUR-PER-EM SPACE */
1409              case 0x2006:    /* SIX-PER-EM SPACE */
1410              case 0x2007:    /* FIGURE SPACE */
1411              case 0x2008:    /* PUNCTUATION SPACE */
1412              case 0x2009:    /* THIN SPACE */
1413              case 0x200A:    /* HAIR SPACE */
1414              case 0x202f:    /* NARROW NO-BREAK SPACE */
1415              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1416              case 0x3000:    /* IDEOGRAPHIC SPACE */
1417              OK = TRUE;
1418              break;
1419    
1420              default:
1421              OK = FALSE;
1422              break;
1423              }
1424    
1425            if (OK == (d == OP_HSPACE))
1426              {
1427              if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
1428                  codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
1429                {
1430                active_count--;           /* Remove non-match possibility */
1431                next_active_state--;
1432                }
1433              ADD_NEW_DATA(-(state_offset + count), 0, 0);
1434              }
1435            }
1436          break;
1437    
1438        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1439    #ifdef SUPPORT_UCP
1440        case OP_PROP_EXTRA + OP_TYPEEXACT:        case OP_PROP_EXTRA + OP_TYPEEXACT:
1441        case OP_PROP_EXTRA + OP_TYPEUPTO:        case OP_PROP_EXTRA + OP_TYPEUPTO:
1442        case OP_PROP_EXTRA + OP_TYPEMINUPTO:        case OP_PROP_EXTRA + OP_TYPEMINUPTO:
1443          case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
1444        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1445          { ADD_ACTIVE(state_offset + 6, 0); }          { ADD_ACTIVE(state_offset + 6, 0); }
1446        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
# Line 1093  for (;;) Line 1479  for (;;)
1479    
1480          if (OK == (d == OP_PROP))          if (OK == (d == OP_PROP))
1481            {            {
1482              if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
1483                {
1484                active_count--;           /* Remove non-match possibility */
1485                next_active_state--;
1486                }
1487            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1488              { ADD_NEW(state_offset + 6, 0); }              { ADD_NEW(state_offset + 6, 0); }
1489            else            else
# Line 1105  for (;;) Line 1496  for (;;)
1496        case OP_EXTUNI_EXTRA + OP_TYPEEXACT:        case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
1497        case OP_EXTUNI_EXTRA + OP_TYPEUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
1498        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
1499          case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
1500        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1501          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 4, 0); }
1502        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
# Line 1112  for (;;) Line 1504  for (;;)
1504          {          {
1505          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1506          int ncount = 0;          int ncount = 0;
1507            if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
1508              {
1509              active_count--;           /* Remove non-match possibility */
1510              next_active_state--;
1511              }
1512          while (nptr < end_subject)          while (nptr < end_subject)
1513            {            {
1514            int nd;            int nd;
# Line 1127  for (;;) Line 1524  for (;;)
1524            { ADD_NEW_DATA(-state_offset, count, ncount); }            { ADD_NEW_DATA(-state_offset, count, ncount); }
1525          }          }
1526        break;        break;
1527    #endif
1528    
1529          /*-----------------------------------------------------------------*/
1530          case OP_ANYNL_EXTRA + OP_TYPEEXACT:
1531          case OP_ANYNL_EXTRA + OP_TYPEUPTO:
1532          case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
1533          case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
1534          if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
1535            { ADD_ACTIVE(state_offset + 4, 0); }
1536          count = current_state->count;  /* Number already matched */
1537          if (clen > 0)
1538            {
1539            int ncount = 0;
1540            switch (c)
1541              {
1542              case 0x000d:
1543              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1544              /* Fall through */
1545              case 0x000a:
1546              case 0x000b:
1547              case 0x000c:
1548              case 0x0085:
1549              case 0x2028:
1550              case 0x2029:
1551              if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
1552                {
1553                active_count--;           /* Remove non-match possibility */
1554                next_active_state--;
1555                }
1556              if (++count >= GET2(code, 1))
1557                { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }
1558              else
1559                { ADD_NEW_DATA(-state_offset, count, ncount); }
1560              break;
1561              default:
1562              break;
1563              }
1564            }
1565          break;
1566    
1567          /*-----------------------------------------------------------------*/
1568          case OP_VSPACE_EXTRA + OP_TYPEEXACT:
1569          case OP_VSPACE_EXTRA + OP_TYPEUPTO:
1570          case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
1571          case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
1572          if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
1573            { ADD_ACTIVE(state_offset + 4, 0); }
1574          count = current_state->count;  /* Number already matched */
1575          if (clen > 0)
1576            {
1577            BOOL OK;
1578            switch (c)
1579              {
1580              case 0x000a:
1581              case 0x000b:
1582              case 0x000c:
1583              case 0x000d:
1584              case 0x0085:
1585              case 0x2028:
1586              case 0x2029:
1587              OK = TRUE;
1588              break;
1589    
1590              default:
1591              OK = FALSE;
1592              }
1593    
1594            if (OK == (d == OP_VSPACE))
1595              {
1596              if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
1597                {
1598                active_count--;           /* Remove non-match possibility */
1599                next_active_state--;
1600                }
1601              if (++count >= GET2(code, 1))
1602                { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
1603              else
1604                { ADD_NEW_DATA(-state_offset, count, 0); }
1605              }
1606            }
1607          break;
1608    
1609          /*-----------------------------------------------------------------*/
1610          case OP_HSPACE_EXTRA + OP_TYPEEXACT:
1611          case OP_HSPACE_EXTRA + OP_TYPEUPTO:
1612          case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
1613          case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
1614          if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
1615            { ADD_ACTIVE(state_offset + 4, 0); }
1616          count = current_state->count;  /* Number already matched */
1617          if (clen > 0)
1618            {
1619            BOOL OK;
1620            switch (c)
1621              {
1622              case 0x09:      /* HT */
1623              case 0x20:      /* SPACE */
1624              case 0xa0:      /* NBSP */
1625              case 0x1680:    /* OGHAM SPACE MARK */
1626              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1627              case 0x2000:    /* EN QUAD */
1628              case 0x2001:    /* EM QUAD */
1629              case 0x2002:    /* EN SPACE */
1630              case 0x2003:    /* EM SPACE */
1631              case 0x2004:    /* THREE-PER-EM SPACE */
1632              case 0x2005:    /* FOUR-PER-EM SPACE */
1633              case 0x2006:    /* SIX-PER-EM SPACE */
1634              case 0x2007:    /* FIGURE SPACE */
1635              case 0x2008:    /* PUNCTUATION SPACE */
1636              case 0x2009:    /* THIN SPACE */
1637              case 0x200A:    /* HAIR SPACE */
1638              case 0x202f:    /* NARROW NO-BREAK SPACE */
1639              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1640              case 0x3000:    /* IDEOGRAPHIC SPACE */
1641              OK = TRUE;
1642              break;
1643    
1644              default:
1645              OK = FALSE;
1646              break;
1647              }
1648    
1649            if (OK == (d == OP_HSPACE))
1650              {
1651              if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
1652                {
1653                active_count--;           /* Remove non-match possibility */
1654                next_active_state--;
1655                }
1656              if (++count >= GET2(code, 1))
1657                { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
1658              else
1659                { ADD_NEW_DATA(-state_offset, count, 0); }
1660              }
1661            }
1662          break;
1663    
1664  /* ========================================================================== */  /* ========================================================================== */
1665        /* These opcodes are followed by a character that is usually compared        /* These opcodes are followed by a character that is usually compared
# Line 1148  for (;;) Line 1681  for (;;)
1681          {          {
1682          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
1683            {            {
1684            int othercase;            unsigned int othercase;
1685            if (c < 128) othercase = fcc[c]; else            if (c < 128) othercase = fcc[c]; else
1686    
1687            /* If we have Unicode property support, we can use it to test the            /* If we have Unicode property support, we can use it to test the
# Line 1157  for (;;) Line 1690  for (;;)
1690  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1691            othercase = _pcre_ucp_othercase(c);            othercase = _pcre_ucp_othercase(c);
1692  #else  #else
1693            othercase = -1;            othercase = NOTACHAR;
1694  #endif  #endif
1695    
1696            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
# Line 1198  for (;;) Line 1731  for (;;)
1731  #endif  #endif
1732    
1733        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1734          /* This is a tricky like EXTUNI because it too can match more than one
1735          character (when CR is followed by LF). In this case, set up a negative
1736          state to wait for one character to pass before continuing. */
1737    
1738          case OP_ANYNL:
1739          if (clen > 0) switch(c)
1740            {
1741            case 0x000a:
1742            case 0x000b:
1743            case 0x000c:
1744            case 0x0085:
1745            case 0x2028:
1746            case 0x2029:
1747            ADD_NEW(state_offset + 1, 0);
1748            break;
1749            case 0x000d:
1750            if (ptr + 1 < end_subject && ptr[1] == 0x0a)
1751              {
1752              ADD_NEW_DATA(-(state_offset + 1), 0, 1);
1753              }
1754            else
1755              {
1756              ADD_NEW(state_offset + 1, 0);
1757              }
1758            break;
1759            }
1760          break;
1761    
1762          /*-----------------------------------------------------------------*/
1763          case OP_NOT_VSPACE:
1764          if (clen > 0) switch(c)
1765            {
1766            case 0x000a:
1767            case 0x000b:
1768            case 0x000c:
1769            case 0x000d:
1770            case 0x0085:
1771            case 0x2028:
1772            case 0x2029:
1773            break;
1774    
1775            default:
1776            ADD_NEW(state_offset + 1, 0);
1777            break;
1778            }
1779          break;
1780    
1781          /*-----------------------------------------------------------------*/
1782          case OP_VSPACE:
1783          if (clen > 0) switch(c)
1784            {
1785            case 0x000a:
1786            case 0x000b:
1787            case 0x000c:
1788            case 0x000d:
1789            case 0x0085:
1790            case 0x2028:
1791            case 0x2029:
1792            ADD_NEW(state_offset + 1, 0);
1793            break;
1794    
1795            default: break;
1796            }
1797          break;
1798    
1799          /*-----------------------------------------------------------------*/
1800          case OP_NOT_HSPACE:
1801          if (clen > 0) switch(c)
1802            {
1803            case 0x09:      /* HT */
1804            case 0x20:      /* SPACE */
1805            case 0xa0:      /* NBSP */
1806            case 0x1680:    /* OGHAM SPACE MARK */
1807            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1808            case 0x2000:    /* EN QUAD */
1809            case 0x2001:    /* EM QUAD */
1810            case 0x2002:    /* EN SPACE */
1811            case 0x2003:    /* EM SPACE */
1812            case 0x2004:    /* THREE-PER-EM SPACE */
1813            case 0x2005:    /* FOUR-PER-EM SPACE */
1814            case 0x2006:    /* SIX-PER-EM SPACE */
1815            case 0x2007:    /* FIGURE SPACE */
1816            case 0x2008:    /* PUNCTUATION SPACE */
1817            case 0x2009:    /* THIN SPACE */
1818            case 0x200A:    /* HAIR SPACE */
1819            case 0x202f:    /* NARROW NO-BREAK SPACE */
1820            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1821            case 0x3000:    /* IDEOGRAPHIC SPACE */
1822            break;
1823    
1824            default:
1825            ADD_NEW(state_offset + 1, 0);
1826            break;
1827            }
1828          break;
1829    
1830          /*-----------------------------------------------------------------*/
1831          case OP_HSPACE:
1832          if (clen > 0) switch(c)
1833            {
1834            case 0x09:      /* HT */
1835            case 0x20:      /* SPACE */
1836            case 0xa0:      /* NBSP */
1837            case 0x1680:    /* OGHAM SPACE MARK */
1838            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1839            case 0x2000:    /* EN QUAD */
1840            case 0x2001:    /* EM QUAD */
1841            case 0x2002:    /* EN SPACE */
1842            case 0x2003:    /* EM SPACE */
1843            case 0x2004:    /* THREE-PER-EM SPACE */
1844            case 0x2005:    /* FOUR-PER-EM SPACE */
1845            case 0x2006:    /* SIX-PER-EM SPACE */
1846            case 0x2007:    /* FIGURE SPACE */
1847            case 0x2008:    /* PUNCTUATION SPACE */
1848            case 0x2009:    /* THIN SPACE */
1849            case 0x200A:    /* HAIR SPACE */
1850            case 0x202f:    /* NARROW NO-BREAK SPACE */
1851            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1852            case 0x3000:    /* IDEOGRAPHIC SPACE */
1853            ADD_NEW(state_offset + 1, 0);
1854            break;
1855            }
1856          break;
1857    
1858          /*-----------------------------------------------------------------*/
1859        /* Match a negated single character. This is only used for one-byte        /* Match a negated single character. This is only used for one-byte
1860        characters, that is, we know that d < 256. The character we are        characters, that is, we know that d < 256. The character we are
1861        checking (c) can be multibyte. */        checking (c) can be multibyte. */
# Line 1205  for (;;) Line 1863  for (;;)
1863        case OP_NOT:        case OP_NOT:
1864        if (clen > 0)        if (clen > 0)
1865          {          {
1866          int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;          unsigned int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;
1867          if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }          if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }
1868          }          }
1869        break;        break;
# Line 1213  for (;;) Line 1871  for (;;)
1871        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1872        case OP_PLUS:        case OP_PLUS:
1873        case OP_MINPLUS:        case OP_MINPLUS:
1874          case OP_POSPLUS:
1875        case OP_NOTPLUS:        case OP_NOTPLUS:
1876        case OP_NOTMINPLUS:        case OP_NOTMINPLUS:
1877          case OP_NOTPOSPLUS:
1878        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1879        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
1880        if (clen > 0)        if (clen > 0)
1881          {          {
1882          int otherd = -1;          unsigned int otherd = NOTACHAR;
1883          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1884            {            {
1885  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1234  for (;;) Line 1894  for (;;)
1894            otherd = fcc[d];            otherd = fcc[d];
1895            }            }
1896          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1897            { count++; ADD_NEW(state_offset, count); }            {
1898              if (count > 0 &&
1899                  (codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS))
1900                {
1901                active_count--;             /* Remove non-match possibility */
1902                next_active_state--;
1903                }
1904              count++;
1905              ADD_NEW(state_offset, count);
1906              }
1907          }          }
1908        break;        break;
1909    
1910        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1911        case OP_QUERY:        case OP_QUERY:
1912        case OP_MINQUERY:        case OP_MINQUERY:
1913          case OP_POSQUERY:
1914        case OP_NOTQUERY:        case OP_NOTQUERY:
1915        case OP_NOTMINQUERY:        case OP_NOTMINQUERY:
1916          case OP_NOTPOSQUERY:
1917        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
1918        if (clen > 0)        if (clen > 0)
1919          {          {
1920          int otherd = -1;          unsigned int otherd = NOTACHAR;
1921          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1922            {            {
1923  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1261  for (;;) Line 1932  for (;;)
1932            otherd = fcc[d];            otherd = fcc[d];
1933            }            }
1934          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1935            { ADD_NEW(state_offset + dlen + 1, 0); }            {
1936              if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY)
1937                {
1938                active_count--;            /* Remove non-match possibility */
1939                next_active_state--;
1940                }
1941              ADD_NEW(state_offset + dlen + 1, 0);
1942              }
1943          }          }
1944        break;        break;
1945    
1946        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1947        case OP_STAR:        case OP_STAR:
1948        case OP_MINSTAR:        case OP_MINSTAR:
1949          case OP_POSSTAR:
1950        case OP_NOTSTAR:        case OP_NOTSTAR:
1951        case OP_NOTMINSTAR:        case OP_NOTMINSTAR:
1952          case OP_NOTPOSSTAR:
1953        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
1954        if (clen > 0)        if (clen > 0)
1955          {          {
1956          int otherd = -1;          unsigned int otherd = NOTACHAR;
1957          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1958            {            {
1959  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1288  for (;;) Line 1968  for (;;)
1968            otherd = fcc[d];            otherd = fcc[d];
1969            }            }
1970          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1971            { ADD_NEW(state_offset, 0); }            {
1972              if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR)
1973                {
1974                active_count--;            /* Remove non-match possibility */
1975                next_active_state--;
1976                }
1977              ADD_NEW(state_offset, 0);
1978              }
1979          }          }
1980        break;        break;
1981    
1982        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1983        case OP_EXACT:        case OP_EXACT:
1984          case OP_NOTEXACT:
1985          count = current_state->count;  /* Number already matched */
1986          if (clen > 0)
1987            {
1988            unsigned int otherd = NOTACHAR;
1989            if ((ims & PCRE_CASELESS) != 0)
1990              {
1991    #ifdef SUPPORT_UTF8
1992              if (utf8 && d >= 128)
1993                {
1994    #ifdef SUPPORT_UCP
1995                otherd = _pcre_ucp_othercase(d);
1996    #endif  /* SUPPORT_UCP */
1997                }
1998              else
1999    #endif  /* SUPPORT_UTF8 */
2000              otherd = fcc[d];
2001              }
2002            if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2003              {
2004              if (++count >= GET2(code, 1))
2005                { ADD_NEW(state_offset + dlen + 3, 0); }
2006              else
2007                { ADD_NEW(state_offset, count); }
2008              }
2009            }
2010          break;
2011    
2012          /*-----------------------------------------------------------------*/
2013        case OP_UPTO:        case OP_UPTO:
2014        case OP_MINUPTO:        case OP_MINUPTO:
2015        case OP_NOTEXACT:        case OP_POSUPTO:
2016        case OP_NOTUPTO:        case OP_NOTUPTO:
2017        case OP_NOTMINUPTO:        case OP_NOTMINUPTO:
2018        if (codevalue != OP_EXACT && codevalue != OP_NOTEXACT)        case OP_NOTPOSUPTO:
2019          { ADD_ACTIVE(state_offset + dlen + 3, 0); }        ADD_ACTIVE(state_offset + dlen + 3, 0);
2020        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2021        if (clen > 0)        if (clen > 0)
2022          {          {
2023          int otherd = -1;          unsigned int otherd = NOTACHAR;
2024          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
2025            {            {
2026  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1320  for (;;) Line 2036  for (;;)
2036            }            }
2037          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2038            {            {
2039              if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO)
2040                {
2041                active_count--;             /* Remove non-match possibility */
2042                next_active_state--;
2043                }
2044            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2045              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 3, 0); }
2046            else            else
# Line 1448  for (;;) Line 2169  for (;;)
2169    
2170        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2171        case OP_COND:        case OP_COND:
2172          case OP_SCOND:
2173          {          {
2174          int local_offsets[1000];          int local_offsets[1000];
2175          int local_workspace[1000];          int local_workspace[1000];
2176          int condcode = code[LINK_SIZE+1];          int condcode = code[LINK_SIZE+1];
2177    
2178          /* The only supported version of OP_CREF is for the value 0xffff, which          /* Back reference conditions are not supported */
2179          means "test if in a recursion". */  
2180            if (condcode == OP_CREF) return PCRE_ERROR_DFA_UCOND;
2181    
2182            /* The DEFINE condition is always false */
2183    
2184            if (condcode == OP_DEF)
2185              {
2186              ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0);
2187              }
2188    
2189            /* The only supported version of OP_RREF is for the value RREF_ANY,
2190            which means "test if in any recursion". We can't test for specifically
2191            recursed groups. */
2192    
2193          if (condcode == OP_CREF)          else if (condcode == OP_RREF)
2194            {            {
2195            int value = GET2(code, LINK_SIZE+2);            int value = GET2(code, LINK_SIZE+2);
2196            if (value != 0xffff) return PCRE_ERROR_DFA_UCOND;            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
2197            if (recursing > 0) { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }            if (recursing > 0) { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }
2198              else { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }              else { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }
2199            }            }
# Line 1728  is not anchored. Line 2462  is not anchored.
2462    
2463  Arguments:  Arguments:
2464    argument_re     points to the compiled expression    argument_re     points to the compiled expression
2465    extra_data      points to extra data or is NULL (not currently used)    extra_data      points to extra data or is NULL
2466    subject         points to the subject string    subject         points to the subject string
2467    length          length of subject string (may contain binary zeros)    length          length of subject string (may contain binary zeros)
2468    start_offset    where to start in the subject string    start_offset    where to start in the subject string
# Line 1744  Returns:          > 0 => number of match Line 2478  Returns:          > 0 => number of match
2478                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
2479  */  */
2480    
2481  PCRE_DATA_SCOPE int  PCRE_EXP_DEFN int
2482  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
2483    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
2484    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
# Line 1834  md->end_subject = end_subject; Line 2568  md->end_subject = end_subject;
2568  md->moptions = options;  md->moptions = options;
2569  md->poptions = re->options;  md->poptions = re->options;
2570    
2571  /* Handle different types of newline. The two bits give four cases. If nothing  /* Handle different types of newline. The three bits give eight cases. If
2572  is set at run time, whatever was used at compile time applies. */  nothing is set at run time, whatever was used at compile time applies. */
2573    
2574  switch ((((options & PCRE_NEWLINE_CRLF) == 0)? re->options : options) &  switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
2575           PCRE_NEWLINE_CRLF)           PCRE_NEWLINE_BITS)
2576    {    {
2577    default:              newline = NEWLINE; break;   /* Compile-time default */    case 0: newline = NEWLINE; break;   /* Compile-time default */
2578    case PCRE_NEWLINE_CR: newline = '\r'; break;    case PCRE_NEWLINE_CR: newline = '\r'; break;
2579    case PCRE_NEWLINE_LF: newline = '\n'; break;    case PCRE_NEWLINE_LF: newline = '\n'; break;
2580    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
2581         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
2582      case PCRE_NEWLINE_ANY: newline = -1; break;
2583      case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
2584      default: return PCRE_ERROR_BADNEWLINE;
2585    }    }
2586    
2587  if (newline > 255)  if (newline == -2)
2588      {
2589      md->nltype = NLTYPE_ANYCRLF;
2590      }
2591    else if (newline < 0)
2592    {    {
2593    md->nllen = 2;    md->nltype = NLTYPE_ANY;
   md->nl[0] = (newline >> 8) & 255;  
   md->nl[1] = newline & 255;  
2594    }    }
2595  else  else
2596    {    {
2597    md->nllen = 1;    md->nltype = NLTYPE_FIXED;
2598    md->nl[0] = newline;    if (newline > 255)
2599        {
2600        md->nllen = 2;
2601        md->nl[0] = (newline >> 8) & 255;
2602        md->nl[1] = newline & 255;
2603        }
2604      else
2605        {
2606        md->nllen = 1;
2607        md->nl[0] = newline;
2608        }
2609    }    }
2610    
2611  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
# Line 1946  for (;;) Line 2695  for (;;)
2695      if (firstline)      if (firstline)
2696        {        {
2697        const uschar *t = current_subject;        const uschar *t = current_subject;
2698        while (t <= save_end_subject - md->nllen && !IS_NEWLINE(t)) t++;        while (t < md->end_subject && !IS_NEWLINE(t)) t++;
2699        end_subject = t;        end_subject = t;
2700        }        }
2701    
# Line 1965  for (;;) Line 2714  for (;;)
2714    
2715      else if (startline)      else if (startline)
2716        {        {
2717        if (current_subject > md->start_subject + md->nllen +        if (current_subject > md->start_subject + start_offset)
           start_offset)  
2718          {          {
2719          while (current_subject <= end_subject &&          while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))
2720                 !IS_NEWLINE(current_subject - md->nllen))            current_subject++;
2721    
2722            /* If we have just passed a CR and the newline option is ANY or
2723            ANYCRLF, and we are now at a LF, advance the match position by one more
2724            character. */
2725    
2726            if (current_subject[-1] == '\r' &&
2727                 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
2728                 current_subject < end_subject &&
2729                 *current_subject == '\n')
2730            current_subject++;            current_subject++;
2731          }          }
2732        }        }
# Line 2070  for (;;) Line 2827  for (;;)
2827    /* Advance to the next subject character unless we are at the end of a line    /* Advance to the next subject character unless we are at the end of a line
2828    and firstline is set. */    and firstline is set. */
2829    
2830    if (firstline &&    if (firstline && IS_NEWLINE(current_subject)) break;
       current_subject <= end_subject - md->nllen &&  
       IS_NEWLINE(current_subject)) break;  
2831    current_subject++;    current_subject++;
2832    if (utf8)    if (utf8)
2833      {      {
# Line 2080  for (;;) Line 2835  for (;;)
2835        current_subject++;        current_subject++;
2836      }      }
2837    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
2838    }  
2839      /* If we have just passed a CR and the newline option is CRLF or ANY or
2840      ANYCRLF, and we are now at a LF, advance the match position by one more
2841      character. */
2842    
2843      if (current_subject[-1] == '\r' &&
2844           (md->nltype == NLTYPE_ANY ||
2845            md->nltype == NLTYPE_ANYCRLF ||
2846            md->nllen == 2) &&
2847           current_subject < end_subject &&
2848           *current_subject == '\n')
2849        current_subject++;
2850    
2851      }   /* "Bumpalong" loop */
2852    
2853  return PCRE_ERROR_NOMATCH;  return PCRE_ERROR_NOMATCH;
2854  }  }

Legend:
Removed from v.91  
changed lines
  Added in v.178

  ViewVC Help
Powered by ViewVC 1.1.5