/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 91 by nigel, Sat Feb 24 21:41:34 2007 UTC revision 305 by ph10, Sun Jan 20 20:07:32 2008 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2008 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 39  POSSIBILITY OF SUCH DAMAGE. Line 39  POSSIBILITY OF SUCH DAMAGE.
39    
40    
41  /* This module contains the external function pcre_dfa_exec(), which is an  /* This module contains the external function pcre_dfa_exec(), which is an
42  alternative matching function that uses a DFA algorithm. This is NOT Perl-  alternative matching function that uses a sort of DFA algorithm (not a true
43  compatible, but it has advantages in certain applications. */  FSM). This is NOT Perl- compatible, but it has advantages in certain
44    applications. */
45    
46    
47  #define NLBLOCK md           /* The block containing newline information */  #ifdef HAVE_CONFIG_H
48    #include "config.h"
49    #endif
50    
51    #define NLBLOCK md             /* Block containing newline information */
52    #define PSSTART start_subject  /* Field containing processed string start */
53    #define PSEND   end_subject    /* Field containing processed string end */
54    
55  #include "pcre_internal.h"  #include "pcre_internal.h"
56    
57    
# Line 58  compatible, but it has advantages in cer Line 66  compatible, but it has advantages in cer
66  *************************************************/  *************************************************/
67    
68  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
69  into others, under special conditions. A gap of 10 between the blocks should be  into others, under special conditions. A gap of 20 between the blocks should be
70  enough. */  enough. The resulting opcodes don't have to be less than 256 because they are
71    never stored, so we push them well clear of the normal opcodes. */
72  #define OP_PROP_EXTRA    (EXTRACT_BASIC_MAX+1)  
73  #define OP_EXTUNI_EXTRA  (EXTRACT_BASIC_MAX+11)  #define OP_PROP_EXTRA       300
74    #define OP_EXTUNI_EXTRA     320
75    #define OP_ANYNL_EXTRA      340
76    #define OP_HSPACE_EXTRA     360
77    #define OP_VSPACE_EXTRA     380
78    
79    
80  /* This table identifies those opcodes that are followed immediately by a  /* This table identifies those opcodes that are followed immediately by a
81  character that is to be tested in some way. This makes is possible to  character that is to be tested in some way. This makes is possible to
82  centralize the loading of these characters. In the case of Type * etc, the  centralize the loading of these characters. In the case of Type * etc, the
83  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
84  small value. */  small value. ***NOTE*** If the start of this table is modified, the two tables
85    that follow must also be modified. */
86    
87  static uschar coptable[] = {  static uschar coptable[] = {
88    0,                             /* End                                    */    0,                             /* End                                    */
89    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* \A, \G, \B, \b, \D, \d, \S, \s, \W, \w */    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
90      0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
91    0, 0,                          /* Any, Anybyte                           */    0, 0,                          /* Any, Anybyte                           */
92    0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */    0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */
93      0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
94    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */
95    1,                             /* Char                                   */    1,                             /* Char                                   */
96    1,                             /* Charnc                                 */    1,                             /* Charnc                                 */
# Line 83  static uschar coptable[] = { Line 98  static uschar coptable[] = {
98    /* Positive single-char repeats                                          */    /* Positive single-char repeats                                          */
99    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
100    3, 3, 3,                       /* upto, minupto, exact                   */    3, 3, 3,                       /* upto, minupto, exact                   */
101      1, 1, 1, 3,                    /* *+, ++, ?+, upto+                      */
102    /* Negative single-char repeats - only for chars < 256                   */    /* Negative single-char repeats - only for chars < 256                   */
103    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
104    3, 3, 3,                       /* NOT upto, minupto, exact               */    3, 3, 3,                       /* NOT upto, minupto, exact               */
105      1, 1, 1, 3,                    /* NOT *+, ++, ?+, updo+                  */
106    /* Positive type repeats                                                 */    /* Positive type repeats                                                 */
107    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
108    3, 3, 3,                       /* Type upto, minupto, exact              */    3, 3, 3,                       /* Type upto, minupto, exact              */
109      1, 1, 1, 3,                    /* Type *+, ++, ?+, upto+                 */
110    /* Character class & ref repeats                                         */    /* Character class & ref repeats                                         */
111    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */
112    0, 0,                          /* CRRANGE, CRMINRANGE                    */    0, 0,                          /* CRRANGE, CRMINRANGE                    */
# Line 107  static uschar coptable[] = { Line 125  static uschar coptable[] = {
125    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
126    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
127    0,                             /* Reverse                                */    0,                             /* Reverse                                */
128    0,                             /* Once                                   */    0, 0, 0, 0,                    /* ONCE, BRA, CBRA, COND                  */
129    0,                             /* COND                                   */    0, 0, 0,                       /* SBRA, SCBRA, SCOND                     */
130    0,                             /* CREF                                   */    0,                             /* CREF                                   */
131      0,                             /* RREF                                   */
132      0,                             /* DEF                                    */
133    0, 0,                          /* BRAZERO, BRAMINZERO                    */    0, 0,                          /* BRAZERO, BRAMINZERO                    */
134    0,                             /* BRANUMBER                              */    0, 0, 0, 0,                    /* PRUNE, SKIP, THEN, COMMIT              */
135    0                              /* BRA                                    */    0, 0                           /* FAIL, ACCEPT                           */
136  };  };
137    
138  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
139  and \w */  and \w */
140    
141  static uschar toptable1[] = {  static uschar toptable1[] = {
142    0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
143    ctype_digit, ctype_digit,    ctype_digit, ctype_digit,
144    ctype_space, ctype_space,    ctype_space, ctype_space,
145    ctype_word,  ctype_word,    ctype_word,  ctype_word,
# Line 127  static uschar toptable1[] = { Line 147  static uschar toptable1[] = {
147  };  };
148    
149  static uschar toptable2[] = {  static uschar toptable2[] = {
150    0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
151    ctype_digit, 0,    ctype_digit, 0,
152    ctype_space, 0,    ctype_space, 0,
153    ctype_word,  0,    ctype_word,  0,
# Line 278  stateblock *next_active_state, *next_new Line 298  stateblock *next_active_state, *next_new
298    
299  const uschar *ctypes, *lcc, *fcc;  const uschar *ctypes, *lcc, *fcc;
300  const uschar *ptr;  const uschar *ptr;
301  const uschar *end_code;  const uschar *end_code, *first_op;
302    
303  int active_count, new_count, match_count;  int active_count, new_count, match_count;
304    
# Line 291  const uschar *start_code = md->start_cod Line 311  const uschar *start_code = md->start_cod
311    
312  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
313  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;
314    #else
315    BOOL utf8 = FALSE;
316  #endif  #endif
317    
318  rlevel++;  rlevel++;
# Line 314  active_states = (stateblock *)(workspace Line 336  active_states = (stateblock *)(workspace
336  next_new_state = new_states = active_states + wscount;  next_new_state = new_states = active_states + wscount;
337  new_count = 0;  new_count = 0;
338    
339    first_op = this_start_code + 1 + LINK_SIZE +
340      ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
341    
342  /* The first thing in any (sub) pattern is a bracket of some sort. Push all  /* The first thing in any (sub) pattern is a bracket of some sort. Push all
343  the alternative states onto the list, and find out where the end is. This  the alternative states onto the list, and find out where the end is. This
344  makes is possible to use this function recursively, when we want to stop at a  makes is possible to use this function recursively, when we want to stop at a
# Line 323  If the first opcode in the first alterna Line 348  If the first opcode in the first alterna
348  a backward assertion. In that case, we have to find out the maximum amount to  a backward assertion. In that case, we have to find out the maximum amount to
349  move back, and set up each alternative appropriately. */  move back, and set up each alternative appropriately. */
350    
351  if (this_start_code[1+LINK_SIZE] == OP_REVERSE)  if (*first_op == OP_REVERSE)
352    {    {
353    int max_back = 0;    int max_back = 0;
354    int gone_back;    int gone_back;
# Line 405  else Line 430  else
430    
431    else    else
432      {      {
433        int length = 1 + LINK_SIZE +
434          ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
435      do      do
436        {        {
437        ADD_NEW(end_code - start_code + 1 + LINK_SIZE, 0);        ADD_NEW(end_code - start_code + length, 0);
438        end_code += GET(end_code, 1);        end_code += GET(end_code, 1);
439          length = 1 + LINK_SIZE;
440        }        }
441      while (*end_code == OP_ALT);      while (*end_code == OP_ALT);
442      }      }
# Line 461  for (;;) Line 489  for (;;)
489    
490    if (ptr < end_subject)    if (ptr < end_subject)
491      {      {
492      clen = 1;      clen = 1;        /* Number of bytes in the character */
493  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
494      if (utf8) { GETCHARLEN(c, ptr, clen); } else      if (utf8) { GETCHARLEN(c, ptr, clen); } else
495  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
# Line 469  for (;;) Line 497  for (;;)
497      }      }
498    else    else
499      {      {
500      clen = 0;    /* At end subject */      clen = 0;        /* This indicates the end of the subject */
501      c = -1;      c = NOTACHAR;    /* This value should never actually be used */
502      }      }
503    
504    /* Scan up the active states and act on each one. The result of an action    /* Scan up the active states and act on each one. The result of an action
# Line 484  for (;;) Line 512  for (;;)
512      const uschar *code;      const uschar *code;
513      int state_offset = current_state->offset;      int state_offset = current_state->offset;
514      int count, codevalue;      int count, codevalue;
515    #ifdef SUPPORT_UCP
516      int chartype, script;      int chartype, script;
517    #endif
518    
519  #ifdef DEBUG  #ifdef DEBUG
520      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
521      if (c < 0) printf("-1\n");      if (clen == 0) printf("EOL\n");
522        else if (c > 32 && c < 127) printf("'%c'\n", c);        else if (c > 32 && c < 127) printf("'%c'\n", c);
523          else printf("0x%02x\n", c);          else printf("0x%02x\n", c);
524  #endif  #endif
# Line 532  for (;;) Line 562  for (;;)
562    
563      code = start_code + state_offset;      code = start_code + state_offset;
564      codevalue = *code;      codevalue = *code;
     if (codevalue >= OP_BRA) codevalue = OP_BRA; /* All brackets are equal */  
565    
566      /* If this opcode is followed by an inline character, load it. It is      /* If this opcode is followed by an inline character, load it. It is
567      tempting to test for the presence of a subject character here, but that      tempting to test for the presence of a subject character here, but that
# Line 540  for (;;) Line 569  for (;;)
569      permitted.      permitted.
570    
571      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
572      argument that is not a data character - but is always one byte long.      argument that is not a data character - but is always one byte long. We
573      Unfortunately, we have to take special action to deal with  \P, \p, and      have to take special action to deal with  \P, \p, \H, \h, \V, \v and \X in
574      \X in this case. To keep the other cases fast, convert these ones to new      this case. To keep the other cases fast, convert these ones to new opcodes.
575      opcodes. */      */
576    
577      if (coptable[codevalue] > 0)      if (coptable[codevalue] > 0)
578        {        {
# Line 554  for (;;) Line 583  for (;;)
583        d = code[coptable[codevalue]];        d = code[coptable[codevalue]];
584        if (codevalue >= OP_TYPESTAR)        if (codevalue >= OP_TYPESTAR)
585          {          {
586          if (d == OP_ANYBYTE) return PCRE_ERROR_DFA_UITEM;          switch(d)
587          if (d >= OP_NOTPROP)            {
588            codevalue += (d == OP_EXTUNI)? OP_EXTUNI_EXTRA : OP_PROP_EXTRA;            case OP_ANYBYTE: return PCRE_ERROR_DFA_UITEM;
589              case OP_NOTPROP:
590              case OP_PROP: codevalue += OP_PROP_EXTRA; break;
591              case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
592              case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
593              case OP_NOT_HSPACE:
594              case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
595              case OP_NOT_VSPACE:
596              case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
597              default: break;
598              }
599          }          }
600        }        }
601      else      else
602        {        {
603        dlen = 0;         /* Not strictly necessary, but compilers moan */        dlen = 0;         /* Not strictly necessary, but compilers moan */
604        d = -1;           /* if these variables are not set. */        d = NOTACHAR;     /* if these variables are not set. */
605        }        }
606    
607    
# Line 624  for (;;) Line 663  for (;;)
663    
664        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
665        case OP_BRA:        case OP_BRA:
666          case OP_SBRA:
667        do        do
668          {          {
669          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
# Line 633  for (;;) Line 673  for (;;)
673        break;        break;
674    
675        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
676          case OP_CBRA:
677          case OP_SCBRA:
678          ADD_ACTIVE(code - start_code + 3 + LINK_SIZE,  0);
679          code += GET(code, 1);
680          while (*code == OP_ALT)
681            {
682            ADD_ACTIVE(code - start_code + 1 + LINK_SIZE,  0);
683            code += GET(code, 1);
684            }
685          break;
686    
687          /*-----------------------------------------------------------------*/
688        case OP_BRAZERO:        case OP_BRAZERO:
689        case OP_BRAMINZERO:        case OP_BRAMINZERO:
690        ADD_ACTIVE(state_offset + 1, 0);        ADD_ACTIVE(state_offset + 1, 0);
# Line 642  for (;;) Line 694  for (;;)
694        break;        break;
695    
696        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
       case OP_BRANUMBER:  
       ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);  
       break;  
   
       /*-----------------------------------------------------------------*/  
697        case OP_CIRC:        case OP_CIRC:
698        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
699            ((ims & PCRE_MULTILINE) != 0 &&            ((ims & PCRE_MULTILINE) != 0 &&
             ptr >= start_subject + md->nllen &&  
700              ptr != end_subject &&              ptr != end_subject &&
701              IS_NEWLINE(ptr - md->nllen)))              WAS_NEWLINE(ptr)))
702          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
703        break;        break;
704    
# Line 686  for (;;) Line 732  for (;;)
732    
733        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
734        case OP_ANY:        case OP_ANY:
735        if (clen > 0 && ((ims & PCRE_DOTALL) != 0 ||        if (clen > 0 && ((ims & PCRE_DOTALL) != 0 || !IS_NEWLINE(ptr)))
                        ptr > end_subject - md->nllen ||  
                        !IS_NEWLINE(ptr)))  
736          { ADD_NEW(state_offset + 1, 0); }          { ADD_NEW(state_offset + 1, 0); }
737        break;        break;
738    
739        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
740        case OP_EODN:        case OP_EODN:
741        if (clen == 0 ||        if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
            (ptr == end_subject - md->nllen && IS_NEWLINE(ptr)))  
742          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
743        break;        break;
744    
# Line 704  for (;;) Line 747  for (;;)
747        if ((md->moptions & PCRE_NOTEOL) == 0)        if ((md->moptions & PCRE_NOTEOL) == 0)
748          {          {
749          if (clen == 0 ||          if (clen == 0 ||
750              (ptr <= end_subject - md->nllen && IS_NEWLINE(ptr) &&              (IS_NEWLINE(ptr) &&
751                 ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)                 ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)
752              ))              ))
753            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
754          }          }
755        else if ((ims & PCRE_MULTILINE) != 0 &&        else if ((ims & PCRE_MULTILINE) != 0 && IS_NEWLINE(ptr))
                ptr <= end_subject - md->nllen && IS_NEWLINE(ptr))  
756          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
757        break;        break;
758    
# Line 759  for (;;) Line 801  for (;;)
801        break;        break;
802    
803    
 #ifdef SUPPORT_UCP  
   
804        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
805        /* Check the next character by Unicode property. We will get here only        /* Check the next character by Unicode property. We will get here only
806        if the support is in the binary; otherwise a compile-time error occurs.        if the support is in the binary; otherwise a compile-time error occurs.
807        */        */
808    
809    #ifdef SUPPORT_UCP
810        case OP_PROP:        case OP_PROP:
811        case OP_NOTPROP:        case OP_NOTPROP:
812        if (clen > 0)        if (clen > 0)
# Line 816  for (;;) Line 857  for (;;)
857    
858        case OP_TYPEPLUS:        case OP_TYPEPLUS:
859        case OP_TYPEMINPLUS:        case OP_TYPEMINPLUS:
860          case OP_TYPEPOSPLUS:
861        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
862        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
863        if (clen > 0)        if (clen > 0)
# Line 824  for (;;) Line 866  for (;;)
866              (c < 256 &&              (c < 256 &&
867                (d != OP_ANY ||                (d != OP_ANY ||
868                 (ims & PCRE_DOTALL) != 0 ||                 (ims & PCRE_DOTALL) != 0 ||
                ptr > end_subject - md->nllen ||  
869                 !IS_NEWLINE(ptr)                 !IS_NEWLINE(ptr)
870                ) &&                ) &&
871                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
872            {            {
873              if (count > 0 && codevalue == OP_TYPEPOSPLUS)
874                {
875                active_count--;            /* Remove non-match possibility */
876                next_active_state--;
877                }
878            count++;            count++;
879            ADD_NEW(state_offset, count);            ADD_NEW(state_offset, count);
880            }            }
# Line 838  for (;;) Line 884  for (;;)
884        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
885        case OP_TYPEQUERY:        case OP_TYPEQUERY:
886        case OP_TYPEMINQUERY:        case OP_TYPEMINQUERY:
887          case OP_TYPEPOSQUERY:
888        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
889        if (clen > 0)        if (clen > 0)
890          {          {
# Line 845  for (;;) Line 892  for (;;)
892              (c < 256 &&              (c < 256 &&
893                (d != OP_ANY ||                (d != OP_ANY ||
894                 (ims & PCRE_DOTALL) != 0 ||                 (ims & PCRE_DOTALL) != 0 ||
                ptr > end_subject - md->nllen ||  
895                 !IS_NEWLINE(ptr)                 !IS_NEWLINE(ptr)
896                ) &&                ) &&
897                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
898            {            {
899              if (codevalue == OP_TYPEPOSQUERY)
900                {
901                active_count--;            /* Remove non-match possibility */
902                next_active_state--;
903                }
904            ADD_NEW(state_offset + 2, 0);            ADD_NEW(state_offset + 2, 0);
905            }            }
906          }          }
# Line 858  for (;;) Line 909  for (;;)
909        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
910        case OP_TYPESTAR:        case OP_TYPESTAR:
911        case OP_TYPEMINSTAR:        case OP_TYPEMINSTAR:
912          case OP_TYPEPOSSTAR:
913        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
914        if (clen > 0)        if (clen > 0)
915          {          {
# Line 865  for (;;) Line 917  for (;;)
917              (c < 256 &&              (c < 256 &&
918                (d != OP_ANY ||                (d != OP_ANY ||
919                 (ims & PCRE_DOTALL) != 0 ||                 (ims & PCRE_DOTALL) != 0 ||
                ptr > end_subject - md->nllen ||  
920                 !IS_NEWLINE(ptr)                 !IS_NEWLINE(ptr)
921                ) &&                ) &&
922                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
923            {            {
924              if (codevalue == OP_TYPEPOSSTAR)
925                {
926                active_count--;            /* Remove non-match possibility */
927                next_active_state--;
928                }
929            ADD_NEW(state_offset, 0);            ADD_NEW(state_offset, 0);
930            }            }
931          }          }
# Line 877  for (;;) Line 933  for (;;)
933    
934        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
935        case OP_TYPEEXACT:        case OP_TYPEEXACT:
936          count = current_state->count;  /* Number already matched */
937          if (clen > 0)
938            {
939            if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
940                (c < 256 &&
941                  (d != OP_ANY ||
942                   (ims & PCRE_DOTALL) != 0 ||
943                   !IS_NEWLINE(ptr)
944                  ) &&
945                  ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
946              {
947              if (++count >= GET2(code, 1))
948                { ADD_NEW(state_offset + 4, 0); }
949              else
950                { ADD_NEW(state_offset, count); }
951              }
952            }
953          break;
954    
955          /*-----------------------------------------------------------------*/
956        case OP_TYPEUPTO:        case OP_TYPEUPTO:
957        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
958        if (codevalue != OP_TYPEEXACT)        case OP_TYPEPOSUPTO:
959          { ADD_ACTIVE(state_offset + 4, 0); }        ADD_ACTIVE(state_offset + 4, 0);
960        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
961        if (clen > 0)        if (clen > 0)
962          {          {
# Line 888  for (;;) Line 964  for (;;)
964              (c < 256 &&              (c < 256 &&
965                (d != OP_ANY ||                (d != OP_ANY ||
966                 (ims & PCRE_DOTALL) != 0 ||                 (ims & PCRE_DOTALL) != 0 ||
                ptr > end_subject - md->nllen ||  
967                 !IS_NEWLINE(ptr)                 !IS_NEWLINE(ptr)
968                ) &&                ) &&
969                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
970            {            {
971              if (codevalue == OP_TYPEPOSUPTO)
972                {
973                active_count--;           /* Remove non-match possibility */
974                next_active_state--;
975                }
976            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
977              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 4, 0); }
978            else            else
# Line 903  for (;;) Line 983  for (;;)
983    
984  /* ========================================================================== */  /* ========================================================================== */
985        /* These are virtual opcodes that are used when something like        /* These are virtual opcodes that are used when something like
986        OP_TYPEPLUS has OP_PROP, OP_NOTPROP, or OP_EXTUNI as its argument. It        OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its
987        keeps the code above fast for the other cases. The argument is in the        argument. It keeps the code above fast for the other cases. The argument
988        d variable. */        is in the d variable. */
989    
990    #ifdef SUPPORT_UCP
991        case OP_PROP_EXTRA + OP_TYPEPLUS:        case OP_PROP_EXTRA + OP_TYPEPLUS:
992        case OP_PROP_EXTRA + OP_TYPEMINPLUS:        case OP_PROP_EXTRA + OP_TYPEMINPLUS:
993          case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
994        count = current_state->count;           /* Already matched */        count = current_state->count;           /* Already matched */
995        if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
996        if (clen > 0)        if (clen > 0)
# Line 944  for (;;) Line 1026  for (;;)
1026            break;            break;
1027            }            }
1028    
1029          if (OK == (d == OP_PROP)) { count++; ADD_NEW(state_offset, count); }          if (OK == (d == OP_PROP))
1030              {
1031              if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
1032                {
1033                active_count--;           /* Remove non-match possibility */
1034                next_active_state--;
1035                }
1036              count++;
1037              ADD_NEW(state_offset, count);
1038              }
1039          }          }
1040        break;        break;
1041    
1042        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1043        case OP_EXTUNI_EXTRA + OP_TYPEPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
1044        case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
1045          case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1046        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1047        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1048        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
1049          {          {
1050          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1051          int ncount = 0;          int ncount = 0;
1052            if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
1053              {
1054              active_count--;           /* Remove non-match possibility */
1055              next_active_state--;
1056              }
1057          while (nptr < end_subject)          while (nptr < end_subject)
1058            {            {
1059            int nd;            int nd;
# Line 970  for (;;) Line 1067  for (;;)
1067          ADD_NEW_DATA(-state_offset, count, ncount);          ADD_NEW_DATA(-state_offset, count, ncount);
1068          }          }
1069        break;        break;
1070    #endif
1071    
1072          /*-----------------------------------------------------------------*/
1073          case OP_ANYNL_EXTRA + OP_TYPEPLUS:
1074          case OP_ANYNL_EXTRA + OP_TYPEMINPLUS:
1075          case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS:
1076          count = current_state->count;  /* Already matched */
1077          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1078          if (clen > 0)
1079            {
1080            int ncount = 0;
1081            switch (c)
1082              {
1083              case 0x000b:
1084              case 0x000c:
1085              case 0x0085:
1086              case 0x2028:
1087              case 0x2029:
1088              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1089              goto ANYNL01;
1090    
1091              case 0x000d:
1092              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1093              /* Fall through */
1094    
1095              ANYNL01:
1096              case 0x000a:
1097              if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1098                {
1099                active_count--;           /* Remove non-match possibility */
1100                next_active_state--;
1101                }
1102              count++;
1103              ADD_NEW_DATA(-state_offset, count, ncount);
1104              break;
1105    
1106              default:
1107              break;
1108              }
1109            }
1110          break;
1111    
1112        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1113          case OP_VSPACE_EXTRA + OP_TYPEPLUS:
1114          case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
1115          case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
1116          count = current_state->count;  /* Already matched */
1117          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1118          if (clen > 0)
1119            {
1120            BOOL OK;
1121            switch (c)
1122              {
1123              case 0x000a:
1124              case 0x000b:
1125              case 0x000c:
1126              case 0x000d:
1127              case 0x0085:
1128              case 0x2028:
1129              case 0x2029:
1130              OK = TRUE;
1131              break;
1132    
1133              default:
1134              OK = FALSE;
1135              break;
1136              }
1137    
1138            if (OK == (d == OP_VSPACE))
1139              {
1140              if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
1141                {
1142                active_count--;           /* Remove non-match possibility */
1143                next_active_state--;
1144                }
1145              count++;
1146              ADD_NEW_DATA(-state_offset, count, 0);
1147              }
1148            }
1149          break;
1150    
1151          /*-----------------------------------------------------------------*/
1152          case OP_HSPACE_EXTRA + OP_TYPEPLUS:
1153          case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
1154          case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
1155          count = current_state->count;  /* Already matched */
1156          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1157          if (clen > 0)
1158            {
1159            BOOL OK;
1160            switch (c)
1161              {
1162              case 0x09:      /* HT */
1163              case 0x20:      /* SPACE */
1164              case 0xa0:      /* NBSP */
1165              case 0x1680:    /* OGHAM SPACE MARK */
1166              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1167              case 0x2000:    /* EN QUAD */
1168              case 0x2001:    /* EM QUAD */
1169              case 0x2002:    /* EN SPACE */
1170              case 0x2003:    /* EM SPACE */
1171              case 0x2004:    /* THREE-PER-EM SPACE */
1172              case 0x2005:    /* FOUR-PER-EM SPACE */
1173              case 0x2006:    /* SIX-PER-EM SPACE */
1174              case 0x2007:    /* FIGURE SPACE */
1175              case 0x2008:    /* PUNCTUATION SPACE */
1176              case 0x2009:    /* THIN SPACE */
1177              case 0x200A:    /* HAIR SPACE */
1178              case 0x202f:    /* NARROW NO-BREAK SPACE */
1179              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1180              case 0x3000:    /* IDEOGRAPHIC SPACE */
1181              OK = TRUE;
1182              break;
1183    
1184              default:
1185              OK = FALSE;
1186              break;
1187              }
1188    
1189            if (OK == (d == OP_HSPACE))
1190              {
1191              if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
1192                {
1193                active_count--;           /* Remove non-match possibility */
1194                next_active_state--;
1195                }
1196              count++;
1197              ADD_NEW_DATA(-state_offset, count, 0);
1198              }
1199            }
1200          break;
1201    
1202          /*-----------------------------------------------------------------*/
1203    #ifdef SUPPORT_UCP
1204        case OP_PROP_EXTRA + OP_TYPEQUERY:        case OP_PROP_EXTRA + OP_TYPEQUERY:
1205        case OP_PROP_EXTRA + OP_TYPEMINQUERY:        case OP_PROP_EXTRA + OP_TYPEMINQUERY:
1206          case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
1207        count = 4;        count = 4;
1208        goto QS1;        goto QS1;
1209    
1210        case OP_PROP_EXTRA + OP_TYPESTAR:        case OP_PROP_EXTRA + OP_TYPESTAR:
1211        case OP_PROP_EXTRA + OP_TYPEMINSTAR:        case OP_PROP_EXTRA + OP_TYPEMINSTAR:
1212          case OP_PROP_EXTRA + OP_TYPEPOSSTAR:
1213        count = 0;        count = 0;
1214    
1215        QS1:        QS1:
# Line 1017  for (;;) Line 1248  for (;;)
1248            break;            break;
1249            }            }
1250    
1251          if (OK == (d == OP_PROP)) { ADD_NEW(state_offset + count, 0); }          if (OK == (d == OP_PROP))
1252              {
1253              if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR ||
1254                  codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
1255                {
1256                active_count--;           /* Remove non-match possibility */
1257                next_active_state--;
1258                }
1259              ADD_NEW(state_offset + count, 0);
1260              }
1261          }          }
1262        break;        break;
1263    
1264        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1265        case OP_EXTUNI_EXTRA + OP_TYPEQUERY:        case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
1266        case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:        case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
1267          case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY:
1268        count = 2;        count = 2;
1269        goto QS2;        goto QS2;
1270    
1271        case OP_EXTUNI_EXTRA + OP_TYPESTAR:        case OP_EXTUNI_EXTRA + OP_TYPESTAR:
1272        case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:        case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
1273          case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR:
1274        count = 0;        count = 0;
1275    
1276        QS2:        QS2:
# Line 1038  for (;;) Line 1280  for (;;)
1280          {          {
1281          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1282          int ncount = 0;          int ncount = 0;
1283            if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
1284                codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
1285              {
1286              active_count--;           /* Remove non-match possibility */
1287              next_active_state--;
1288              }
1289          while (nptr < end_subject)          while (nptr < end_subject)
1290            {            {
1291            int nd;            int nd;
# Line 1050  for (;;) Line 1298  for (;;)
1298          ADD_NEW_DATA(-(state_offset + count), 0, ncount);          ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1299          }          }
1300        break;        break;
1301    #endif
1302    
1303          /*-----------------------------------------------------------------*/
1304          case OP_ANYNL_EXTRA + OP_TYPEQUERY:
1305          case OP_ANYNL_EXTRA + OP_TYPEMINQUERY:
1306          case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY:
1307          count = 2;
1308          goto QS3;
1309    
1310          case OP_ANYNL_EXTRA + OP_TYPESTAR:
1311          case OP_ANYNL_EXTRA + OP_TYPEMINSTAR:
1312          case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR:
1313          count = 0;
1314    
1315          QS3:
1316          ADD_ACTIVE(state_offset + 2, 0);
1317          if (clen > 0)
1318            {
1319            int ncount = 0;
1320            switch (c)
1321              {
1322              case 0x000b:
1323              case 0x000c:
1324              case 0x0085:
1325              case 0x2028:
1326              case 0x2029:
1327              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1328              goto ANYNL02;
1329    
1330              case 0x000d:
1331              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1332              /* Fall through */
1333    
1334              ANYNL02:
1335              case 0x000a:
1336              if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1337                  codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1338                {
1339                active_count--;           /* Remove non-match possibility */
1340                next_active_state--;
1341                }
1342              ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1343              break;
1344    
1345              default:
1346              break;
1347              }
1348            }
1349          break;
1350    
1351          /*-----------------------------------------------------------------*/
1352          case OP_VSPACE_EXTRA + OP_TYPEQUERY:
1353          case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
1354          case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
1355          count = 2;
1356          goto QS4;
1357    
1358          case OP_VSPACE_EXTRA + OP_TYPESTAR:
1359          case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
1360          case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
1361          count = 0;
1362    
1363          QS4:
1364          ADD_ACTIVE(state_offset + 2, 0);
1365          if (clen > 0)
1366            {
1367            BOOL OK;
1368            switch (c)
1369              {
1370              case 0x000a:
1371              case 0x000b:
1372              case 0x000c:
1373              case 0x000d:
1374              case 0x0085:
1375              case 0x2028:
1376              case 0x2029:
1377              OK = TRUE;
1378              break;
1379    
1380              default:
1381              OK = FALSE;
1382              break;
1383              }
1384            if (OK == (d == OP_VSPACE))
1385              {
1386              if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
1387                  codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
1388                {
1389                active_count--;           /* Remove non-match possibility */
1390                next_active_state--;
1391                }
1392              ADD_NEW_DATA(-(state_offset + count), 0, 0);
1393              }
1394            }
1395          break;
1396    
1397        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1398          case OP_HSPACE_EXTRA + OP_TYPEQUERY:
1399          case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
1400          case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
1401          count = 2;
1402          goto QS5;
1403    
1404          case OP_HSPACE_EXTRA + OP_TYPESTAR:
1405          case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
1406          case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
1407          count = 0;
1408    
1409          QS5:
1410          ADD_ACTIVE(state_offset + 2, 0);
1411          if (clen > 0)
1412            {
1413            BOOL OK;
1414            switch (c)
1415              {
1416              case 0x09:      /* HT */
1417              case 0x20:      /* SPACE */
1418              case 0xa0:      /* NBSP */
1419              case 0x1680:    /* OGHAM SPACE MARK */
1420              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1421              case 0x2000:    /* EN QUAD */
1422              case 0x2001:    /* EM QUAD */
1423              case 0x2002:    /* EN SPACE */
1424              case 0x2003:    /* EM SPACE */
1425              case 0x2004:    /* THREE-PER-EM SPACE */
1426              case 0x2005:    /* FOUR-PER-EM SPACE */
1427              case 0x2006:    /* SIX-PER-EM SPACE */
1428              case 0x2007:    /* FIGURE SPACE */
1429              case 0x2008:    /* PUNCTUATION SPACE */
1430              case 0x2009:    /* THIN SPACE */
1431              case 0x200A:    /* HAIR SPACE */
1432              case 0x202f:    /* NARROW NO-BREAK SPACE */
1433              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1434              case 0x3000:    /* IDEOGRAPHIC SPACE */
1435              OK = TRUE;
1436              break;
1437    
1438              default:
1439              OK = FALSE;
1440              break;
1441              }
1442    
1443            if (OK == (d == OP_HSPACE))
1444              {
1445              if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
1446                  codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
1447                {
1448                active_count--;           /* Remove non-match possibility */
1449                next_active_state--;
1450                }
1451              ADD_NEW_DATA(-(state_offset + count), 0, 0);
1452              }
1453            }
1454          break;
1455    
1456          /*-----------------------------------------------------------------*/
1457    #ifdef SUPPORT_UCP
1458        case OP_PROP_EXTRA + OP_TYPEEXACT:        case OP_PROP_EXTRA + OP_TYPEEXACT:
1459        case OP_PROP_EXTRA + OP_TYPEUPTO:        case OP_PROP_EXTRA + OP_TYPEUPTO:
1460        case OP_PROP_EXTRA + OP_TYPEMINUPTO:        case OP_PROP_EXTRA + OP_TYPEMINUPTO:
1461          case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
1462        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1463          { ADD_ACTIVE(state_offset + 6, 0); }          { ADD_ACTIVE(state_offset + 6, 0); }
1464        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
# Line 1093  for (;;) Line 1497  for (;;)
1497    
1498          if (OK == (d == OP_PROP))          if (OK == (d == OP_PROP))
1499            {            {
1500              if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
1501                {
1502                active_count--;           /* Remove non-match possibility */
1503                next_active_state--;
1504                }
1505            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1506              { ADD_NEW(state_offset + 6, 0); }              { ADD_NEW(state_offset + 6, 0); }
1507            else            else
# Line 1105  for (;;) Line 1514  for (;;)
1514        case OP_EXTUNI_EXTRA + OP_TYPEEXACT:        case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
1515        case OP_EXTUNI_EXTRA + OP_TYPEUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
1516        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
1517          case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
1518        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1519          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 4, 0); }
1520        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
# Line 1112  for (;;) Line 1522  for (;;)
1522          {          {
1523          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1524          int ncount = 0;          int ncount = 0;
1525            if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
1526              {
1527              active_count--;           /* Remove non-match possibility */
1528              next_active_state--;
1529              }
1530          while (nptr < end_subject)          while (nptr < end_subject)
1531            {            {
1532            int nd;            int nd;
# Line 1127  for (;;) Line 1542  for (;;)
1542            { ADD_NEW_DATA(-state_offset, count, ncount); }            { ADD_NEW_DATA(-state_offset, count, ncount); }
1543          }          }
1544        break;        break;
1545    #endif
1546    
1547          /*-----------------------------------------------------------------*/
1548          case OP_ANYNL_EXTRA + OP_TYPEEXACT:
1549          case OP_ANYNL_EXTRA + OP_TYPEUPTO:
1550          case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
1551          case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
1552          if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
1553            { ADD_ACTIVE(state_offset + 4, 0); }
1554          count = current_state->count;  /* Number already matched */
1555          if (clen > 0)
1556            {
1557            int ncount = 0;
1558            switch (c)
1559              {
1560              case 0x000b:
1561              case 0x000c:
1562              case 0x0085:
1563              case 0x2028:
1564              case 0x2029:
1565              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1566              goto ANYNL03;
1567    
1568              case 0x000d:
1569              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1570              /* Fall through */
1571    
1572              ANYNL03:
1573              case 0x000a:
1574              if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
1575                {
1576                active_count--;           /* Remove non-match possibility */
1577                next_active_state--;
1578                }
1579              if (++count >= GET2(code, 1))
1580                { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }
1581              else
1582                { ADD_NEW_DATA(-state_offset, count, ncount); }
1583              break;
1584    
1585              default:
1586              break;
1587              }
1588            }
1589          break;
1590    
1591          /*-----------------------------------------------------------------*/
1592          case OP_VSPACE_EXTRA + OP_TYPEEXACT:
1593          case OP_VSPACE_EXTRA + OP_TYPEUPTO:
1594          case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
1595          case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
1596          if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
1597            { ADD_ACTIVE(state_offset + 4, 0); }
1598          count = current_state->count;  /* Number already matched */
1599          if (clen > 0)
1600            {
1601            BOOL OK;
1602            switch (c)
1603              {
1604              case 0x000a:
1605              case 0x000b:
1606              case 0x000c:
1607              case 0x000d:
1608              case 0x0085:
1609              case 0x2028:
1610              case 0x2029:
1611              OK = TRUE;
1612              break;
1613    
1614              default:
1615              OK = FALSE;
1616              }
1617    
1618            if (OK == (d == OP_VSPACE))
1619              {
1620              if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
1621                {
1622                active_count--;           /* Remove non-match possibility */
1623                next_active_state--;
1624                }
1625              if (++count >= GET2(code, 1))
1626                { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
1627              else
1628                { ADD_NEW_DATA(-state_offset, count, 0); }
1629              }
1630            }
1631          break;
1632    
1633          /*-----------------------------------------------------------------*/
1634          case OP_HSPACE_EXTRA + OP_TYPEEXACT:
1635          case OP_HSPACE_EXTRA + OP_TYPEUPTO:
1636          case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
1637          case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
1638          if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
1639            { ADD_ACTIVE(state_offset + 4, 0); }
1640          count = current_state->count;  /* Number already matched */
1641          if (clen > 0)
1642            {
1643            BOOL OK;
1644            switch (c)
1645              {
1646              case 0x09:      /* HT */
1647              case 0x20:      /* SPACE */
1648              case 0xa0:      /* NBSP */
1649              case 0x1680:    /* OGHAM SPACE MARK */
1650              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1651              case 0x2000:    /* EN QUAD */
1652              case 0x2001:    /* EM QUAD */
1653              case 0x2002:    /* EN SPACE */
1654              case 0x2003:    /* EM SPACE */
1655              case 0x2004:    /* THREE-PER-EM SPACE */
1656              case 0x2005:    /* FOUR-PER-EM SPACE */
1657              case 0x2006:    /* SIX-PER-EM SPACE */
1658              case 0x2007:    /* FIGURE SPACE */
1659              case 0x2008:    /* PUNCTUATION SPACE */
1660              case 0x2009:    /* THIN SPACE */
1661              case 0x200A:    /* HAIR SPACE */
1662              case 0x202f:    /* NARROW NO-BREAK SPACE */
1663              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1664              case 0x3000:    /* IDEOGRAPHIC SPACE */
1665              OK = TRUE;
1666              break;
1667    
1668              default:
1669              OK = FALSE;
1670              break;
1671              }
1672    
1673            if (OK == (d == OP_HSPACE))
1674              {
1675              if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
1676                {
1677                active_count--;           /* Remove non-match possibility */
1678                next_active_state--;
1679                }
1680              if (++count >= GET2(code, 1))
1681                { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
1682              else
1683                { ADD_NEW_DATA(-state_offset, count, 0); }
1684              }
1685            }
1686          break;
1687    
1688  /* ========================================================================== */  /* ========================================================================== */
1689        /* These opcodes are followed by a character that is usually compared        /* These opcodes are followed by a character that is usually compared
# Line 1148  for (;;) Line 1705  for (;;)
1705          {          {
1706          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
1707            {            {
1708            int othercase;            unsigned int othercase;
1709            if (c < 128) othercase = fcc[c]; else            if (c < 128) othercase = fcc[c]; else
1710    
1711            /* If we have Unicode property support, we can use it to test the            /* If we have Unicode property support, we can use it to test the
# Line 1157  for (;;) Line 1714  for (;;)
1714  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1715            othercase = _pcre_ucp_othercase(c);            othercase = _pcre_ucp_othercase(c);
1716  #else  #else
1717            othercase = -1;            othercase = NOTACHAR;
1718  #endif  #endif
1719    
1720            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
# Line 1198  for (;;) Line 1755  for (;;)
1755  #endif  #endif
1756    
1757        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1758          /* This is a tricky like EXTUNI because it too can match more than one
1759          character (when CR is followed by LF). In this case, set up a negative
1760          state to wait for one character to pass before continuing. */
1761    
1762          case OP_ANYNL:
1763          if (clen > 0) switch(c)
1764            {
1765            case 0x000b:
1766            case 0x000c:
1767            case 0x0085:
1768            case 0x2028:
1769            case 0x2029:
1770            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1771    
1772            case 0x000a:
1773            ADD_NEW(state_offset + 1, 0);
1774            break;
1775    
1776            case 0x000d:
1777            if (ptr + 1 < end_subject && ptr[1] == 0x0a)
1778              {
1779              ADD_NEW_DATA(-(state_offset + 1), 0, 1);
1780              }
1781            else
1782              {
1783              ADD_NEW(state_offset + 1, 0);
1784              }
1785            break;
1786            }
1787          break;
1788    
1789          /*-----------------------------------------------------------------*/
1790          case OP_NOT_VSPACE:
1791          if (clen > 0) switch(c)
1792            {
1793            case 0x000a:
1794            case 0x000b:
1795            case 0x000c:
1796            case 0x000d:
1797            case 0x0085:
1798            case 0x2028:
1799            case 0x2029:
1800            break;
1801    
1802            default:
1803            ADD_NEW(state_offset + 1, 0);
1804            break;
1805            }
1806          break;
1807    
1808          /*-----------------------------------------------------------------*/
1809          case OP_VSPACE:
1810          if (clen > 0) switch(c)
1811            {
1812            case 0x000a:
1813            case 0x000b:
1814            case 0x000c:
1815            case 0x000d:
1816            case 0x0085:
1817            case 0x2028:
1818            case 0x2029:
1819            ADD_NEW(state_offset + 1, 0);
1820            break;
1821    
1822            default: break;
1823            }
1824          break;
1825    
1826          /*-----------------------------------------------------------------*/
1827          case OP_NOT_HSPACE:
1828          if (clen > 0) switch(c)
1829            {
1830            case 0x09:      /* HT */
1831            case 0x20:      /* SPACE */
1832            case 0xa0:      /* NBSP */
1833            case 0x1680:    /* OGHAM SPACE MARK */
1834            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1835            case 0x2000:    /* EN QUAD */
1836            case 0x2001:    /* EM QUAD */
1837            case 0x2002:    /* EN SPACE */
1838            case 0x2003:    /* EM SPACE */
1839            case 0x2004:    /* THREE-PER-EM SPACE */
1840            case 0x2005:    /* FOUR-PER-EM SPACE */
1841            case 0x2006:    /* SIX-PER-EM SPACE */
1842            case 0x2007:    /* FIGURE SPACE */
1843            case 0x2008:    /* PUNCTUATION SPACE */
1844            case 0x2009:    /* THIN SPACE */
1845            case 0x200A:    /* HAIR SPACE */
1846            case 0x202f:    /* NARROW NO-BREAK SPACE */
1847            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1848            case 0x3000:    /* IDEOGRAPHIC SPACE */
1849            break;
1850    
1851            default:
1852            ADD_NEW(state_offset + 1, 0);
1853            break;
1854            }
1855          break;
1856    
1857          /*-----------------------------------------------------------------*/
1858          case OP_HSPACE:
1859          if (clen > 0) switch(c)
1860            {
1861            case 0x09:      /* HT */
1862            case 0x20:      /* SPACE */
1863            case 0xa0:      /* NBSP */
1864            case 0x1680:    /* OGHAM SPACE MARK */
1865            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1866            case 0x2000:    /* EN QUAD */
1867            case 0x2001:    /* EM QUAD */
1868            case 0x2002:    /* EN SPACE */
1869            case 0x2003:    /* EM SPACE */
1870            case 0x2004:    /* THREE-PER-EM SPACE */
1871            case 0x2005:    /* FOUR-PER-EM SPACE */
1872            case 0x2006:    /* SIX-PER-EM SPACE */
1873            case 0x2007:    /* FIGURE SPACE */
1874            case 0x2008:    /* PUNCTUATION SPACE */
1875            case 0x2009:    /* THIN SPACE */
1876            case 0x200A:    /* HAIR SPACE */
1877            case 0x202f:    /* NARROW NO-BREAK SPACE */
1878            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1879            case 0x3000:    /* IDEOGRAPHIC SPACE */
1880            ADD_NEW(state_offset + 1, 0);
1881            break;
1882            }
1883          break;
1884    
1885          /*-----------------------------------------------------------------*/
1886        /* Match a negated single character. This is only used for one-byte        /* Match a negated single character. This is only used for one-byte
1887        characters, that is, we know that d < 256. The character we are        characters, that is, we know that d < 256. The character we are
1888        checking (c) can be multibyte. */        checking (c) can be multibyte. */
# Line 1205  for (;;) Line 1890  for (;;)
1890        case OP_NOT:        case OP_NOT:
1891        if (clen > 0)        if (clen > 0)
1892          {          {
1893          int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;          unsigned int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;
1894          if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }          if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }
1895          }          }
1896        break;        break;
# Line 1213  for (;;) Line 1898  for (;;)
1898        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1899        case OP_PLUS:        case OP_PLUS:
1900        case OP_MINPLUS:        case OP_MINPLUS:
1901          case OP_POSPLUS:
1902        case OP_NOTPLUS:        case OP_NOTPLUS:
1903        case OP_NOTMINPLUS:        case OP_NOTMINPLUS:
1904          case OP_NOTPOSPLUS:
1905        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1906        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
1907        if (clen > 0)        if (clen > 0)
1908          {          {
1909          int otherd = -1;          unsigned int otherd = NOTACHAR;
1910          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1911            {            {
1912  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1234  for (;;) Line 1921  for (;;)
1921            otherd = fcc[d];            otherd = fcc[d];
1922            }            }
1923          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1924            { count++; ADD_NEW(state_offset, count); }            {
1925              if (count > 0 &&
1926                  (codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS))
1927                {
1928                active_count--;             /* Remove non-match possibility */
1929                next_active_state--;
1930                }
1931              count++;
1932              ADD_NEW(state_offset, count);
1933              }
1934          }          }
1935        break;        break;
1936    
1937        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1938        case OP_QUERY:        case OP_QUERY:
1939        case OP_MINQUERY:        case OP_MINQUERY:
1940          case OP_POSQUERY:
1941        case OP_NOTQUERY:        case OP_NOTQUERY:
1942        case OP_NOTMINQUERY:        case OP_NOTMINQUERY:
1943          case OP_NOTPOSQUERY:
1944        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
1945        if (clen > 0)        if (clen > 0)
1946          {          {
1947          int otherd = -1;          unsigned int otherd = NOTACHAR;
1948          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1949            {            {
1950  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1261  for (;;) Line 1959  for (;;)
1959            otherd = fcc[d];            otherd = fcc[d];
1960            }            }
1961          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1962            { ADD_NEW(state_offset + dlen + 1, 0); }            {
1963              if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY)
1964                {
1965                active_count--;            /* Remove non-match possibility */
1966                next_active_state--;
1967                }
1968              ADD_NEW(state_offset + dlen + 1, 0);
1969              }
1970          }          }
1971        break;        break;
1972    
1973        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1974        case OP_STAR:        case OP_STAR:
1975        case OP_MINSTAR:        case OP_MINSTAR:
1976          case OP_POSSTAR:
1977        case OP_NOTSTAR:        case OP_NOTSTAR:
1978        case OP_NOTMINSTAR:        case OP_NOTMINSTAR:
1979          case OP_NOTPOSSTAR:
1980        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
1981        if (clen > 0)        if (clen > 0)
1982          {          {
1983          int otherd = -1;          unsigned int otherd = NOTACHAR;
1984          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1985            {            {
1986  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1288  for (;;) Line 1995  for (;;)
1995            otherd = fcc[d];            otherd = fcc[d];
1996            }            }
1997          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1998            { ADD_NEW(state_offset, 0); }            {
1999              if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR)
2000                {
2001                active_count--;            /* Remove non-match possibility */
2002                next_active_state--;
2003                }
2004              ADD_NEW(state_offset, 0);
2005              }
2006          }          }
2007        break;        break;
2008    
2009        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2010        case OP_EXACT:        case OP_EXACT:
2011          case OP_NOTEXACT:
2012          count = current_state->count;  /* Number already matched */
2013          if (clen > 0)
2014            {
2015            unsigned int otherd = NOTACHAR;
2016            if ((ims & PCRE_CASELESS) != 0)
2017              {
2018    #ifdef SUPPORT_UTF8
2019              if (utf8 && d >= 128)
2020                {
2021    #ifdef SUPPORT_UCP
2022                otherd = _pcre_ucp_othercase(d);
2023    #endif  /* SUPPORT_UCP */
2024                }
2025              else
2026    #endif  /* SUPPORT_UTF8 */
2027              otherd = fcc[d];
2028              }
2029            if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2030              {
2031              if (++count >= GET2(code, 1))
2032                { ADD_NEW(state_offset + dlen + 3, 0); }
2033              else
2034                { ADD_NEW(state_offset, count); }
2035              }
2036            }
2037          break;
2038    
2039          /*-----------------------------------------------------------------*/
2040        case OP_UPTO:        case OP_UPTO:
2041        case OP_MINUPTO:        case OP_MINUPTO:
2042        case OP_NOTEXACT:        case OP_POSUPTO:
2043        case OP_NOTUPTO:        case OP_NOTUPTO:
2044        case OP_NOTMINUPTO:        case OP_NOTMINUPTO:
2045        if (codevalue != OP_EXACT && codevalue != OP_NOTEXACT)        case OP_NOTPOSUPTO:
2046          { ADD_ACTIVE(state_offset + dlen + 3, 0); }        ADD_ACTIVE(state_offset + dlen + 3, 0);
2047        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2048        if (clen > 0)        if (clen > 0)
2049          {          {
2050          int otherd = -1;          unsigned int otherd = NOTACHAR;
2051          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
2052            {            {
2053  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1320  for (;;) Line 2063  for (;;)
2063            }            }
2064          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2065            {            {
2066              if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO)
2067                {
2068                active_count--;             /* Remove non-match possibility */
2069                next_active_state--;
2070                }
2071            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2072              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 3, 0); }
2073            else            else
# Line 1448  for (;;) Line 2196  for (;;)
2196    
2197        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2198        case OP_COND:        case OP_COND:
2199          case OP_SCOND:
2200          {          {
2201          int local_offsets[1000];          int local_offsets[1000];
2202          int local_workspace[1000];          int local_workspace[1000];
2203          int condcode = code[LINK_SIZE+1];          int condcode = code[LINK_SIZE+1];
2204    
2205          /* The only supported version of OP_CREF is for the value 0xffff, which          /* Back reference conditions are not supported */
2206          means "test if in a recursion". */  
2207            if (condcode == OP_CREF) return PCRE_ERROR_DFA_UCOND;
2208    
2209          if (condcode == OP_CREF)          /* The DEFINE condition is always false */
2210    
2211            if (condcode == OP_DEF)
2212              {
2213              ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0);
2214              }
2215    
2216            /* The only supported version of OP_RREF is for the value RREF_ANY,
2217            which means "test if in any recursion". We can't test for specifically
2218            recursed groups. */
2219    
2220            else if (condcode == OP_RREF)
2221            {            {
2222            int value = GET2(code, LINK_SIZE+2);            int value = GET2(code, LINK_SIZE+2);
2223            if (value != 0xffff) return PCRE_ERROR_DFA_UCOND;            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
2224            if (recursing > 0) { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }            if (recursing > 0) { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }
2225              else { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }              else { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }
2226            }            }
# Line 1728  is not anchored. Line 2489  is not anchored.
2489    
2490  Arguments:  Arguments:
2491    argument_re     points to the compiled expression    argument_re     points to the compiled expression
2492    extra_data      points to extra data or is NULL (not currently used)    extra_data      points to extra data or is NULL
2493    subject         points to the subject string    subject         points to the subject string
2494    length          length of subject string (may contain binary zeros)    length          length of subject string (may contain binary zeros)
2495    start_offset    where to start in the subject string    start_offset    where to start in the subject string
# Line 1744  Returns:          > 0 => number of match Line 2505  Returns:          > 0 => number of match
2505                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
2506  */  */
2507    
2508  PCRE_DATA_SCOPE int  PCRE_EXP_DEFN int
2509  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
2510    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
2511    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
# Line 1834  md->end_subject = end_subject; Line 2595  md->end_subject = end_subject;
2595  md->moptions = options;  md->moptions = options;
2596  md->poptions = re->options;  md->poptions = re->options;
2597    
2598  /* Handle different types of newline. The two bits give four cases. If nothing  /* If the BSR option is not set at match time, copy what was set
2599  is set at run time, whatever was used at compile time applies. */  at compile time. */
2600    
2601  switch ((((options & PCRE_NEWLINE_CRLF) == 0)? re->options : options) &  if ((md->moptions & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 0)
          PCRE_NEWLINE_CRLF)  
2602    {    {
2603    default:              newline = NEWLINE; break;   /* Compile-time default */    if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
2604        md->moptions |= re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE);
2605    #ifdef BSR_ANYCRLF
2606      else md->moptions |= PCRE_BSR_ANYCRLF;
2607    #endif
2608      }
2609    
2610    /* Handle different types of newline. The three bits give eight cases. If
2611    nothing is set at run time, whatever was used at compile time applies. */
2612    
2613    switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
2614             PCRE_NEWLINE_BITS)
2615      {
2616      case 0: newline = NEWLINE; break;   /* Compile-time default */
2617    case PCRE_NEWLINE_CR: newline = '\r'; break;    case PCRE_NEWLINE_CR: newline = '\r'; break;
2618    case PCRE_NEWLINE_LF: newline = '\n'; break;    case PCRE_NEWLINE_LF: newline = '\n'; break;
2619    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
2620         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
2621      case PCRE_NEWLINE_ANY: newline = -1; break;
2622      case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
2623      default: return PCRE_ERROR_BADNEWLINE;
2624    }    }
2625    
2626  if (newline > 255)  if (newline == -2)
2627      {
2628      md->nltype = NLTYPE_ANYCRLF;
2629      }
2630    else if (newline < 0)
2631    {    {
2632    md->nllen = 2;    md->nltype = NLTYPE_ANY;
   md->nl[0] = (newline >> 8) & 255;  
   md->nl[1] = newline & 255;  
2633    }    }
2634  else  else
2635    {    {
2636    md->nllen = 1;    md->nltype = NLTYPE_FIXED;
2637    md->nl[0] = newline;    if (newline > 255)
2638        {
2639        md->nllen = 2;
2640        md->nl[0] = (newline >> 8) & 255;
2641        md->nl[1] = newline & 255;
2642        }
2643      else
2644        {
2645        md->nllen = 1;
2646        md->nl[0] = newline;
2647        }
2648    }    }
2649    
2650  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
# Line 1889  if (md->tables == NULL) md->tables = _pc Line 2677  if (md->tables == NULL) md->tables = _pc
2677  used in a loop when finding where to start. */  used in a loop when finding where to start. */
2678    
2679  lcc = md->tables + lcc_offset;  lcc = md->tables + lcc_offset;
2680  startline = (re->options & PCRE_STARTLINE) != 0;  startline = (re->flags & PCRE_STARTLINE) != 0;
2681  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
2682    
2683  /* Set up the first character to match, if available. The first_byte value is  /* Set up the first character to match, if available. The first_byte value is
# Line 1900  studied, there may be a bitmap of possib Line 2688  studied, there may be a bitmap of possib
2688    
2689  if (!anchored)  if (!anchored)
2690    {    {
2691    if ((re->options & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
2692      {      {
2693      first_byte = re->first_byte & 255;      first_byte = re->first_byte & 255;
2694      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
# Line 1917  if (!anchored) Line 2705  if (!anchored)
2705  /* For anchored or unanchored matches, there may be a "last known required  /* For anchored or unanchored matches, there may be a "last known required
2706  character" set. */  character" set. */
2707    
2708  if ((re->options & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
2709    {    {
2710    req_byte = re->req_byte & 255;    req_byte = re->req_byte & 255;
2711    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
# Line 1946  for (;;) Line 2734  for (;;)
2734      if (firstline)      if (firstline)
2735        {        {
2736        const uschar *t = current_subject;        const uschar *t = current_subject;
2737        while (t <= save_end_subject - md->nllen && !IS_NEWLINE(t)) t++;        while (t < md->end_subject && !IS_NEWLINE(t)) t++;
2738        end_subject = t;        end_subject = t;
2739        }        }
2740    
# Line 1965  for (;;) Line 2753  for (;;)
2753    
2754      else if (startline)      else if (startline)
2755        {        {
2756        if (current_subject > md->start_subject + md->nllen +        if (current_subject > md->start_subject + start_offset)
           start_offset)  
2757          {          {
2758          while (current_subject <= end_subject &&          while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))
2759                 !IS_NEWLINE(current_subject - md->nllen))            current_subject++;
2760    
2761            /* If we have just passed a CR and the newline option is ANY or
2762            ANYCRLF, and we are now at a LF, advance the match position by one more
2763            character. */
2764    
2765            if (current_subject[-1] == '\r' &&
2766                 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
2767                 current_subject < end_subject &&
2768                 *current_subject == '\n')
2769            current_subject++;            current_subject++;
2770          }          }
2771        }        }
# Line 2070  for (;;) Line 2866  for (;;)
2866    /* Advance to the next subject character unless we are at the end of a line    /* Advance to the next subject character unless we are at the end of a line
2867    and firstline is set. */    and firstline is set. */
2868    
2869    if (firstline &&    if (firstline && IS_NEWLINE(current_subject)) break;
       current_subject <= end_subject - md->nllen &&  
       IS_NEWLINE(current_subject)) break;  
2870    current_subject++;    current_subject++;
2871    if (utf8)    if (utf8)
2872      {      {
# Line 2080  for (;;) Line 2874  for (;;)
2874        current_subject++;        current_subject++;
2875      }      }
2876    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
2877    }  
2878      /* If we have just passed a CR and we are now at a LF, and the pattern does
2879      not contain any explicit matches for \r or \n, and the newline option is CRLF
2880      or ANY or ANYCRLF, advance the match position by one more character. */
2881    
2882      if (current_subject[-1] == '\r' &&
2883          current_subject < end_subject &&
2884          *current_subject == '\n' &&
2885          (re->flags & PCRE_HASCRORLF) == 0 &&
2886            (md->nltype == NLTYPE_ANY ||
2887             md->nltype == NLTYPE_ANYCRLF ||
2888             md->nllen == 2))
2889        current_subject++;
2890    
2891      }   /* "Bumpalong" loop */
2892    
2893  return PCRE_ERROR_NOMATCH;  return PCRE_ERROR_NOMATCH;
2894  }  }

Legend:
Removed from v.91  
changed lines
  Added in v.305

  ViewVC Help
Powered by ViewVC 1.1.5