/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 91 by nigel, Sat Feb 24 21:41:34 2007 UTC revision 335 by ph10, Sat Apr 12 14:36:14 2008 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2008 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 39  POSSIBILITY OF SUCH DAMAGE. Line 39  POSSIBILITY OF SUCH DAMAGE.
39    
40    
41  /* This module contains the external function pcre_dfa_exec(), which is an  /* This module contains the external function pcre_dfa_exec(), which is an
42  alternative matching function that uses a DFA algorithm. This is NOT Perl-  alternative matching function that uses a sort of DFA algorithm (not a true
43  compatible, but it has advantages in certain applications. */  FSM). This is NOT Perl- compatible, but it has advantages in certain
44    applications. */
45    
46    
47  #define NLBLOCK md           /* The block containing newline information */  #ifdef HAVE_CONFIG_H
48    #include "config.h"
49    #endif
50    
51    #define NLBLOCK md             /* Block containing newline information */
52    #define PSSTART start_subject  /* Field containing processed string start */
53    #define PSEND   end_subject    /* Field containing processed string end */
54    
55  #include "pcre_internal.h"  #include "pcre_internal.h"
56    
57    
# Line 58  compatible, but it has advantages in cer Line 66  compatible, but it has advantages in cer
66  *************************************************/  *************************************************/
67    
68  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
69  into others, under special conditions. A gap of 10 between the blocks should be  into others, under special conditions. A gap of 20 between the blocks should be
70  enough. */  enough. The resulting opcodes don't have to be less than 256 because they are
71    never stored, so we push them well clear of the normal opcodes. */
72  #define OP_PROP_EXTRA    (EXTRACT_BASIC_MAX+1)  
73  #define OP_EXTUNI_EXTRA  (EXTRACT_BASIC_MAX+11)  #define OP_PROP_EXTRA       300
74    #define OP_EXTUNI_EXTRA     320
75    #define OP_ANYNL_EXTRA      340
76    #define OP_HSPACE_EXTRA     360
77    #define OP_VSPACE_EXTRA     380
78    
79    
80  /* This table identifies those opcodes that are followed immediately by a  /* This table identifies those opcodes that are followed immediately by a
81  character that is to be tested in some way. This makes is possible to  character that is to be tested in some way. This makes is possible to
82  centralize the loading of these characters. In the case of Type * etc, the  centralize the loading of these characters. In the case of Type * etc, the
83  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
84  small value. */  small value. ***NOTE*** If the start of this table is modified, the two tables
85    that follow must also be modified. */
86    
87  static uschar coptable[] = {  static const uschar coptable[] = {
88    0,                             /* End                                    */    0,                             /* End                                    */
89    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* \A, \G, \B, \b, \D, \d, \S, \s, \W, \w */    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
90      0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
91    0, 0,                          /* Any, Anybyte                           */    0, 0,                          /* Any, Anybyte                           */
92    0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */    0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */
93      0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
94    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */
95    1,                             /* Char                                   */    1,                             /* Char                                   */
96    1,                             /* Charnc                                 */    1,                             /* Charnc                                 */
# Line 83  static uschar coptable[] = { Line 98  static uschar coptable[] = {
98    /* Positive single-char repeats                                          */    /* Positive single-char repeats                                          */
99    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
100    3, 3, 3,                       /* upto, minupto, exact                   */    3, 3, 3,                       /* upto, minupto, exact                   */
101      1, 1, 1, 3,                    /* *+, ++, ?+, upto+                      */
102    /* Negative single-char repeats - only for chars < 256                   */    /* Negative single-char repeats - only for chars < 256                   */
103    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
104    3, 3, 3,                       /* NOT upto, minupto, exact               */    3, 3, 3,                       /* NOT upto, minupto, exact               */
105      1, 1, 1, 3,                    /* NOT *+, ++, ?+, updo+                  */
106    /* Positive type repeats                                                 */    /* Positive type repeats                                                 */
107    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
108    3, 3, 3,                       /* Type upto, minupto, exact              */    3, 3, 3,                       /* Type upto, minupto, exact              */
109      1, 1, 1, 3,                    /* Type *+, ++, ?+, upto+                 */
110    /* Character class & ref repeats                                         */    /* Character class & ref repeats                                         */
111    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */
112    0, 0,                          /* CRRANGE, CRMINRANGE                    */    0, 0,                          /* CRRANGE, CRMINRANGE                    */
# Line 107  static uschar coptable[] = { Line 125  static uschar coptable[] = {
125    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
126    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
127    0,                             /* Reverse                                */    0,                             /* Reverse                                */
128    0,                             /* Once                                   */    0, 0, 0, 0,                    /* ONCE, BRA, CBRA, COND                  */
129    0,                             /* COND                                   */    0, 0, 0,                       /* SBRA, SCBRA, SCOND                     */
130    0,                             /* CREF                                   */    0,                             /* CREF                                   */
131      0,                             /* RREF                                   */
132      0,                             /* DEF                                    */
133    0, 0,                          /* BRAZERO, BRAMINZERO                    */    0, 0,                          /* BRAZERO, BRAMINZERO                    */
134    0,                             /* BRANUMBER                              */    0, 0, 0, 0,                    /* PRUNE, SKIP, THEN, COMMIT              */
135    0                              /* BRA                                    */    0, 0                           /* FAIL, ACCEPT                           */
136  };  };
137    
138  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
139  and \w */  and \w */
140    
141  static uschar toptable1[] = {  static const uschar toptable1[] = {
142    0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
143    ctype_digit, ctype_digit,    ctype_digit, ctype_digit,
144    ctype_space, ctype_space,    ctype_space, ctype_space,
145    ctype_word,  ctype_word,    ctype_word,  ctype_word,
146    0                               /* OP_ANY */    0                               /* OP_ANY */
147  };  };
148    
149  static uschar toptable2[] = {  static const uschar toptable2[] = {
150    0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
151    ctype_digit, 0,    ctype_digit, 0,
152    ctype_space, 0,    ctype_space, 0,
153    ctype_word,  0,    ctype_word,  0,
# Line 278  stateblock *next_active_state, *next_new Line 298  stateblock *next_active_state, *next_new
298    
299  const uschar *ctypes, *lcc, *fcc;  const uschar *ctypes, *lcc, *fcc;
300  const uschar *ptr;  const uschar *ptr;
301  const uschar *end_code;  const uschar *end_code, *first_op;
302    
303  int active_count, new_count, match_count;  int active_count, new_count, match_count;
304    
# Line 291  const uschar *start_code = md->start_cod Line 311  const uschar *start_code = md->start_cod
311    
312  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
313  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;
314    #else
315    BOOL utf8 = FALSE;
316  #endif  #endif
317    
318  rlevel++;  rlevel++;
# Line 314  active_states = (stateblock *)(workspace Line 336  active_states = (stateblock *)(workspace
336  next_new_state = new_states = active_states + wscount;  next_new_state = new_states = active_states + wscount;
337  new_count = 0;  new_count = 0;
338    
339    first_op = this_start_code + 1 + LINK_SIZE +
340      ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
341    
342  /* The first thing in any (sub) pattern is a bracket of some sort. Push all  /* The first thing in any (sub) pattern is a bracket of some sort. Push all
343  the alternative states onto the list, and find out where the end is. This  the alternative states onto the list, and find out where the end is. This
344  makes is possible to use this function recursively, when we want to stop at a  makes is possible to use this function recursively, when we want to stop at a
# Line 323  If the first opcode in the first alterna Line 348  If the first opcode in the first alterna
348  a backward assertion. In that case, we have to find out the maximum amount to  a backward assertion. In that case, we have to find out the maximum amount to
349  move back, and set up each alternative appropriately. */  move back, and set up each alternative appropriately. */
350    
351  if (this_start_code[1+LINK_SIZE] == OP_REVERSE)  if (*first_op == OP_REVERSE)
352    {    {
353    int max_back = 0;    int max_back = 0;
354    int gone_back;    int gone_back;
# Line 405  else Line 430  else
430    
431    else    else
432      {      {
433        int length = 1 + LINK_SIZE +
434          ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
435      do      do
436        {        {
437        ADD_NEW(end_code - start_code + 1 + LINK_SIZE, 0);        ADD_NEW(end_code - start_code + length, 0);
438        end_code += GET(end_code, 1);        end_code += GET(end_code, 1);
439          length = 1 + LINK_SIZE;
440        }        }
441      while (*end_code == OP_ALT);      while (*end_code == OP_ALT);
442      }      }
# Line 461  for (;;) Line 489  for (;;)
489    
490    if (ptr < end_subject)    if (ptr < end_subject)
491      {      {
492      clen = 1;      clen = 1;        /* Number of bytes in the character */
493  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
494      if (utf8) { GETCHARLEN(c, ptr, clen); } else      if (utf8) { GETCHARLEN(c, ptr, clen); } else
495  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
# Line 469  for (;;) Line 497  for (;;)
497      }      }
498    else    else
499      {      {
500      clen = 0;    /* At end subject */      clen = 0;        /* This indicates the end of the subject */
501      c = -1;      c = NOTACHAR;    /* This value should never actually be used */
502      }      }
503    
504    /* Scan up the active states and act on each one. The result of an action    /* Scan up the active states and act on each one. The result of an action
# Line 484  for (;;) Line 512  for (;;)
512      const uschar *code;      const uschar *code;
513      int state_offset = current_state->offset;      int state_offset = current_state->offset;
514      int count, codevalue;      int count, codevalue;
515    #ifdef SUPPORT_UCP
516      int chartype, script;      int chartype, script;
517    #endif
518    
519  #ifdef DEBUG  #ifdef DEBUG
520      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
521      if (c < 0) printf("-1\n");      if (clen == 0) printf("EOL\n");
522        else if (c > 32 && c < 127) printf("'%c'\n", c);        else if (c > 32 && c < 127) printf("'%c'\n", c);
523          else printf("0x%02x\n", c);          else printf("0x%02x\n", c);
524  #endif  #endif
# Line 532  for (;;) Line 562  for (;;)
562    
563      code = start_code + state_offset;      code = start_code + state_offset;
564      codevalue = *code;      codevalue = *code;
     if (codevalue >= OP_BRA) codevalue = OP_BRA; /* All brackets are equal */  
565    
566      /* If this opcode is followed by an inline character, load it. It is      /* If this opcode is followed by an inline character, load it. It is
567      tempting to test for the presence of a subject character here, but that      tempting to test for the presence of a subject character here, but that
# Line 540  for (;;) Line 569  for (;;)
569      permitted.      permitted.
570    
571      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
572      argument that is not a data character - but is always one byte long.      argument that is not a data character - but is always one byte long. We
573      Unfortunately, we have to take special action to deal with  \P, \p, and      have to take special action to deal with  \P, \p, \H, \h, \V, \v and \X in
574      \X in this case. To keep the other cases fast, convert these ones to new      this case. To keep the other cases fast, convert these ones to new opcodes.
575      opcodes. */      */
576    
577      if (coptable[codevalue] > 0)      if (coptable[codevalue] > 0)
578        {        {
# Line 554  for (;;) Line 583  for (;;)
583        d = code[coptable[codevalue]];        d = code[coptable[codevalue]];
584        if (codevalue >= OP_TYPESTAR)        if (codevalue >= OP_TYPESTAR)
585          {          {
586          if (d == OP_ANYBYTE) return PCRE_ERROR_DFA_UITEM;          switch(d)
587          if (d >= OP_NOTPROP)            {
588            codevalue += (d == OP_EXTUNI)? OP_EXTUNI_EXTRA : OP_PROP_EXTRA;            case OP_ANYBYTE: return PCRE_ERROR_DFA_UITEM;
589              case OP_NOTPROP:
590              case OP_PROP: codevalue += OP_PROP_EXTRA; break;
591              case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
592              case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
593              case OP_NOT_HSPACE:
594              case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
595              case OP_NOT_VSPACE:
596              case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
597              default: break;
598              }
599          }          }
600        }        }
601      else      else
602        {        {
603        dlen = 0;         /* Not strictly necessary, but compilers moan */        dlen = 0;         /* Not strictly necessary, but compilers moan */
604        d = -1;           /* if these variables are not set. */        d = NOTACHAR;     /* if these variables are not set. */
605        }        }
606    
607    
# Line 624  for (;;) Line 663  for (;;)
663    
664        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
665        case OP_BRA:        case OP_BRA:
666          case OP_SBRA:
667        do        do
668          {          {
669          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
# Line 633  for (;;) Line 673  for (;;)
673        break;        break;
674    
675        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
676          case OP_CBRA:
677          case OP_SCBRA:
678          ADD_ACTIVE(code - start_code + 3 + LINK_SIZE,  0);
679          code += GET(code, 1);
680          while (*code == OP_ALT)
681            {
682            ADD_ACTIVE(code - start_code + 1 + LINK_SIZE,  0);
683            code += GET(code, 1);
684            }
685          break;
686    
687          /*-----------------------------------------------------------------*/
688        case OP_BRAZERO:        case OP_BRAZERO:
689        case OP_BRAMINZERO:        case OP_BRAMINZERO:
690        ADD_ACTIVE(state_offset + 1, 0);        ADD_ACTIVE(state_offset + 1, 0);
# Line 642  for (;;) Line 694  for (;;)
694        break;        break;
695    
696        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
697        case OP_BRANUMBER:        case OP_SKIPZERO:
698        ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);        code += 1 + GET(code, 2);
699          while (*code == OP_ALT) code += GET(code, 1);
700          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
701        break;        break;
702    
703        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
704        case OP_CIRC:        case OP_CIRC:
705        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
706            ((ims & PCRE_MULTILINE) != 0 &&            ((ims & PCRE_MULTILINE) != 0 &&
             ptr >= start_subject + md->nllen &&  
707              ptr != end_subject &&              ptr != end_subject &&
708              IS_NEWLINE(ptr - md->nllen)))              WAS_NEWLINE(ptr)))
709          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
710        break;        break;
711    
# Line 686  for (;;) Line 739  for (;;)
739    
740        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
741        case OP_ANY:        case OP_ANY:
742        if (clen > 0 && ((ims & PCRE_DOTALL) != 0 ||        if (clen > 0 && ((ims & PCRE_DOTALL) != 0 || !IS_NEWLINE(ptr)))
                        ptr > end_subject - md->nllen ||  
                        !IS_NEWLINE(ptr)))  
743          { ADD_NEW(state_offset + 1, 0); }          { ADD_NEW(state_offset + 1, 0); }
744        break;        break;
745    
746        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
747        case OP_EODN:        case OP_EODN:
748        if (clen == 0 ||        if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
            (ptr == end_subject - md->nllen && IS_NEWLINE(ptr)))  
749          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
750        break;        break;
751    
# Line 704  for (;;) Line 754  for (;;)
754        if ((md->moptions & PCRE_NOTEOL) == 0)        if ((md->moptions & PCRE_NOTEOL) == 0)
755          {          {
756          if (clen == 0 ||          if (clen == 0 ||
757              (ptr <= end_subject - md->nllen && IS_NEWLINE(ptr) &&              (IS_NEWLINE(ptr) &&
758                 ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)                 ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)
759              ))              ))
760            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
761          }          }
762        else if ((ims & PCRE_MULTILINE) != 0 &&        else if ((ims & PCRE_MULTILINE) != 0 && IS_NEWLINE(ptr))
                ptr <= end_subject - md->nllen && IS_NEWLINE(ptr))  
763          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
764        break;        break;
765    
# Line 759  for (;;) Line 808  for (;;)
808        break;        break;
809    
810    
 #ifdef SUPPORT_UCP  
   
811        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
812        /* Check the next character by Unicode property. We will get here only        /* Check the next character by Unicode property. We will get here only
813        if the support is in the binary; otherwise a compile-time error occurs.        if the support is in the binary; otherwise a compile-time error occurs.
814        */        */
815    
816    #ifdef SUPPORT_UCP
817        case OP_PROP:        case OP_PROP:
818        case OP_NOTPROP:        case OP_NOTPROP:
819        if (clen > 0)        if (clen > 0)
# Line 816  for (;;) Line 864  for (;;)
864    
865        case OP_TYPEPLUS:        case OP_TYPEPLUS:
866        case OP_TYPEMINPLUS:        case OP_TYPEMINPLUS:
867          case OP_TYPEPOSPLUS:
868        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
869        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
870        if (clen > 0)        if (clen > 0)
# Line 824  for (;;) Line 873  for (;;)
873              (c < 256 &&              (c < 256 &&
874                (d != OP_ANY ||                (d != OP_ANY ||
875                 (ims & PCRE_DOTALL) != 0 ||                 (ims & PCRE_DOTALL) != 0 ||
                ptr > end_subject - md->nllen ||  
876                 !IS_NEWLINE(ptr)                 !IS_NEWLINE(ptr)
877                ) &&                ) &&
878                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
879            {            {
880              if (count > 0 && codevalue == OP_TYPEPOSPLUS)
881                {
882                active_count--;            /* Remove non-match possibility */
883                next_active_state--;
884                }
885            count++;            count++;
886            ADD_NEW(state_offset, count);            ADD_NEW(state_offset, count);
887            }            }
# Line 838  for (;;) Line 891  for (;;)
891        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
892        case OP_TYPEQUERY:        case OP_TYPEQUERY:
893        case OP_TYPEMINQUERY:        case OP_TYPEMINQUERY:
894          case OP_TYPEPOSQUERY:
895        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
896        if (clen > 0)        if (clen > 0)
897          {          {
# Line 845  for (;;) Line 899  for (;;)
899              (c < 256 &&              (c < 256 &&
900                (d != OP_ANY ||                (d != OP_ANY ||
901                 (ims & PCRE_DOTALL) != 0 ||                 (ims & PCRE_DOTALL) != 0 ||
                ptr > end_subject - md->nllen ||  
902                 !IS_NEWLINE(ptr)                 !IS_NEWLINE(ptr)
903                ) &&                ) &&
904                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
905            {            {
906              if (codevalue == OP_TYPEPOSQUERY)
907                {
908                active_count--;            /* Remove non-match possibility */
909                next_active_state--;
910                }
911            ADD_NEW(state_offset + 2, 0);            ADD_NEW(state_offset + 2, 0);
912            }            }
913          }          }
# Line 858  for (;;) Line 916  for (;;)
916        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
917        case OP_TYPESTAR:        case OP_TYPESTAR:
918        case OP_TYPEMINSTAR:        case OP_TYPEMINSTAR:
919          case OP_TYPEPOSSTAR:
920        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
921        if (clen > 0)        if (clen > 0)
922          {          {
# Line 865  for (;;) Line 924  for (;;)
924              (c < 256 &&              (c < 256 &&
925                (d != OP_ANY ||                (d != OP_ANY ||
926                 (ims & PCRE_DOTALL) != 0 ||                 (ims & PCRE_DOTALL) != 0 ||
                ptr > end_subject - md->nllen ||  
927                 !IS_NEWLINE(ptr)                 !IS_NEWLINE(ptr)
928                ) &&                ) &&
929                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
930            {            {
931              if (codevalue == OP_TYPEPOSSTAR)
932                {
933                active_count--;            /* Remove non-match possibility */
934                next_active_state--;
935                }
936            ADD_NEW(state_offset, 0);            ADD_NEW(state_offset, 0);
937            }            }
938          }          }
# Line 877  for (;;) Line 940  for (;;)
940    
941        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
942        case OP_TYPEEXACT:        case OP_TYPEEXACT:
943          count = current_state->count;  /* Number already matched */
944          if (clen > 0)
945            {
946            if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
947                (c < 256 &&
948                  (d != OP_ANY ||
949                   (ims & PCRE_DOTALL) != 0 ||
950                   !IS_NEWLINE(ptr)
951                  ) &&
952                  ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
953              {
954              if (++count >= GET2(code, 1))
955                { ADD_NEW(state_offset + 4, 0); }
956              else
957                { ADD_NEW(state_offset, count); }
958              }
959            }
960          break;
961    
962          /*-----------------------------------------------------------------*/
963        case OP_TYPEUPTO:        case OP_TYPEUPTO:
964        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
965        if (codevalue != OP_TYPEEXACT)        case OP_TYPEPOSUPTO:
966          { ADD_ACTIVE(state_offset + 4, 0); }        ADD_ACTIVE(state_offset + 4, 0);
967        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
968        if (clen > 0)        if (clen > 0)
969          {          {
# Line 888  for (;;) Line 971  for (;;)
971              (c < 256 &&              (c < 256 &&
972                (d != OP_ANY ||                (d != OP_ANY ||
973                 (ims & PCRE_DOTALL) != 0 ||                 (ims & PCRE_DOTALL) != 0 ||
                ptr > end_subject - md->nllen ||  
974                 !IS_NEWLINE(ptr)                 !IS_NEWLINE(ptr)
975                ) &&                ) &&
976                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
977            {            {
978              if (codevalue == OP_TYPEPOSUPTO)
979                {
980                active_count--;           /* Remove non-match possibility */
981                next_active_state--;
982                }
983            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
984              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 4, 0); }
985            else            else
# Line 903  for (;;) Line 990  for (;;)
990    
991  /* ========================================================================== */  /* ========================================================================== */
992        /* These are virtual opcodes that are used when something like        /* These are virtual opcodes that are used when something like
993        OP_TYPEPLUS has OP_PROP, OP_NOTPROP, or OP_EXTUNI as its argument. It        OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its
994        keeps the code above fast for the other cases. The argument is in the        argument. It keeps the code above fast for the other cases. The argument
995        d variable. */        is in the d variable. */
996    
997    #ifdef SUPPORT_UCP
998        case OP_PROP_EXTRA + OP_TYPEPLUS:        case OP_PROP_EXTRA + OP_TYPEPLUS:
999        case OP_PROP_EXTRA + OP_TYPEMINPLUS:        case OP_PROP_EXTRA + OP_TYPEMINPLUS:
1000          case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
1001        count = current_state->count;           /* Already matched */        count = current_state->count;           /* Already matched */
1002        if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
1003        if (clen > 0)        if (clen > 0)
# Line 944  for (;;) Line 1033  for (;;)
1033            break;            break;
1034            }            }
1035    
1036          if (OK == (d == OP_PROP)) { count++; ADD_NEW(state_offset, count); }          if (OK == (d == OP_PROP))
1037              {
1038              if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
1039                {
1040                active_count--;           /* Remove non-match possibility */
1041                next_active_state--;
1042                }
1043              count++;
1044              ADD_NEW(state_offset, count);
1045              }
1046          }          }
1047        break;        break;
1048    
1049        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1050        case OP_EXTUNI_EXTRA + OP_TYPEPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
1051        case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
1052          case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1053        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1054        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1055        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
1056          {          {
1057          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1058          int ncount = 0;          int ncount = 0;
1059            if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
1060              {
1061              active_count--;           /* Remove non-match possibility */
1062              next_active_state--;
1063              }
1064          while (nptr < end_subject)          while (nptr < end_subject)
1065            {            {
1066            int nd;            int nd;
# Line 970  for (;;) Line 1074  for (;;)
1074          ADD_NEW_DATA(-state_offset, count, ncount);          ADD_NEW_DATA(-state_offset, count, ncount);
1075          }          }
1076        break;        break;
1077    #endif
1078    
1079          /*-----------------------------------------------------------------*/
1080          case OP_ANYNL_EXTRA + OP_TYPEPLUS:
1081          case OP_ANYNL_EXTRA + OP_TYPEMINPLUS:
1082          case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS:
1083          count = current_state->count;  /* Already matched */
1084          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1085          if (clen > 0)
1086            {
1087            int ncount = 0;
1088            switch (c)
1089              {
1090              case 0x000b:
1091              case 0x000c:
1092              case 0x0085:
1093              case 0x2028:
1094              case 0x2029:
1095              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1096              goto ANYNL01;
1097    
1098              case 0x000d:
1099              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1100              /* Fall through */
1101    
1102              ANYNL01:
1103              case 0x000a:
1104              if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1105                {
1106                active_count--;           /* Remove non-match possibility */
1107                next_active_state--;
1108                }
1109              count++;
1110              ADD_NEW_DATA(-state_offset, count, ncount);
1111              break;
1112    
1113              default:
1114              break;
1115              }
1116            }
1117          break;
1118    
1119        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1120          case OP_VSPACE_EXTRA + OP_TYPEPLUS:
1121          case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
1122          case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
1123          count = current_state->count;  /* Already matched */
1124          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1125          if (clen > 0)
1126            {
1127            BOOL OK;
1128            switch (c)
1129              {
1130              case 0x000a:
1131              case 0x000b:
1132              case 0x000c:
1133              case 0x000d:
1134              case 0x0085:
1135              case 0x2028:
1136              case 0x2029:
1137              OK = TRUE;
1138              break;
1139    
1140              default:
1141              OK = FALSE;
1142              break;
1143              }
1144    
1145            if (OK == (d == OP_VSPACE))
1146              {
1147              if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
1148                {
1149                active_count--;           /* Remove non-match possibility */
1150                next_active_state--;
1151                }
1152              count++;
1153              ADD_NEW_DATA(-state_offset, count, 0);
1154              }
1155            }
1156          break;
1157    
1158          /*-----------------------------------------------------------------*/
1159          case OP_HSPACE_EXTRA + OP_TYPEPLUS:
1160          case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
1161          case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
1162          count = current_state->count;  /* Already matched */
1163          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1164          if (clen > 0)
1165            {
1166            BOOL OK;
1167            switch (c)
1168              {
1169              case 0x09:      /* HT */
1170              case 0x20:      /* SPACE */
1171              case 0xa0:      /* NBSP */
1172              case 0x1680:    /* OGHAM SPACE MARK */
1173              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1174              case 0x2000:    /* EN QUAD */
1175              case 0x2001:    /* EM QUAD */
1176              case 0x2002:    /* EN SPACE */
1177              case 0x2003:    /* EM SPACE */
1178              case 0x2004:    /* THREE-PER-EM SPACE */
1179              case 0x2005:    /* FOUR-PER-EM SPACE */
1180              case 0x2006:    /* SIX-PER-EM SPACE */
1181              case 0x2007:    /* FIGURE SPACE */
1182              case 0x2008:    /* PUNCTUATION SPACE */
1183              case 0x2009:    /* THIN SPACE */
1184              case 0x200A:    /* HAIR SPACE */
1185              case 0x202f:    /* NARROW NO-BREAK SPACE */
1186              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1187              case 0x3000:    /* IDEOGRAPHIC SPACE */
1188              OK = TRUE;
1189              break;
1190    
1191              default:
1192              OK = FALSE;
1193              break;
1194              }
1195    
1196            if (OK == (d == OP_HSPACE))
1197              {
1198              if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
1199                {
1200                active_count--;           /* Remove non-match possibility */
1201                next_active_state--;
1202                }
1203              count++;
1204              ADD_NEW_DATA(-state_offset, count, 0);
1205              }
1206            }
1207          break;
1208    
1209          /*-----------------------------------------------------------------*/
1210    #ifdef SUPPORT_UCP
1211        case OP_PROP_EXTRA + OP_TYPEQUERY:        case OP_PROP_EXTRA + OP_TYPEQUERY:
1212        case OP_PROP_EXTRA + OP_TYPEMINQUERY:        case OP_PROP_EXTRA + OP_TYPEMINQUERY:
1213          case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
1214        count = 4;        count = 4;
1215        goto QS1;        goto QS1;
1216    
1217        case OP_PROP_EXTRA + OP_TYPESTAR:        case OP_PROP_EXTRA + OP_TYPESTAR:
1218        case OP_PROP_EXTRA + OP_TYPEMINSTAR:        case OP_PROP_EXTRA + OP_TYPEMINSTAR:
1219          case OP_PROP_EXTRA + OP_TYPEPOSSTAR:
1220        count = 0;        count = 0;
1221    
1222        QS1:        QS1:
# Line 1017  for (;;) Line 1255  for (;;)
1255            break;            break;
1256            }            }
1257    
1258          if (OK == (d == OP_PROP)) { ADD_NEW(state_offset + count, 0); }          if (OK == (d == OP_PROP))
1259              {
1260              if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR ||
1261                  codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
1262                {
1263                active_count--;           /* Remove non-match possibility */
1264                next_active_state--;
1265                }
1266              ADD_NEW(state_offset + count, 0);
1267              }
1268          }          }
1269        break;        break;
1270    
1271        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1272        case OP_EXTUNI_EXTRA + OP_TYPEQUERY:        case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
1273        case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:        case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
1274          case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY:
1275        count = 2;        count = 2;
1276        goto QS2;        goto QS2;
1277    
1278        case OP_EXTUNI_EXTRA + OP_TYPESTAR:        case OP_EXTUNI_EXTRA + OP_TYPESTAR:
1279        case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:        case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
1280          case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR:
1281        count = 0;        count = 0;
1282    
1283        QS2:        QS2:
# Line 1038  for (;;) Line 1287  for (;;)
1287          {          {
1288          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1289          int ncount = 0;          int ncount = 0;
1290            if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
1291                codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
1292              {
1293              active_count--;           /* Remove non-match possibility */
1294              next_active_state--;
1295              }
1296          while (nptr < end_subject)          while (nptr < end_subject)
1297            {            {
1298            int nd;            int nd;
# Line 1050  for (;;) Line 1305  for (;;)
1305          ADD_NEW_DATA(-(state_offset + count), 0, ncount);          ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1306          }          }
1307        break;        break;
1308    #endif
1309    
1310          /*-----------------------------------------------------------------*/
1311          case OP_ANYNL_EXTRA + OP_TYPEQUERY:
1312          case OP_ANYNL_EXTRA + OP_TYPEMINQUERY:
1313          case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY:
1314          count = 2;
1315          goto QS3;
1316    
1317          case OP_ANYNL_EXTRA + OP_TYPESTAR:
1318          case OP_ANYNL_EXTRA + OP_TYPEMINSTAR:
1319          case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR:
1320          count = 0;
1321    
1322          QS3:
1323          ADD_ACTIVE(state_offset + 2, 0);
1324          if (clen > 0)
1325            {
1326            int ncount = 0;
1327            switch (c)
1328              {
1329              case 0x000b:
1330              case 0x000c:
1331              case 0x0085:
1332              case 0x2028:
1333              case 0x2029:
1334              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1335              goto ANYNL02;
1336    
1337              case 0x000d:
1338              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1339              /* Fall through */
1340    
1341              ANYNL02:
1342              case 0x000a:
1343              if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1344                  codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1345                {
1346                active_count--;           /* Remove non-match possibility */
1347                next_active_state--;
1348                }
1349              ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1350              break;
1351    
1352              default:
1353              break;
1354              }
1355            }
1356          break;
1357    
1358          /*-----------------------------------------------------------------*/
1359          case OP_VSPACE_EXTRA + OP_TYPEQUERY:
1360          case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
1361          case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
1362          count = 2;
1363          goto QS4;
1364    
1365          case OP_VSPACE_EXTRA + OP_TYPESTAR:
1366          case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
1367          case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
1368          count = 0;
1369    
1370          QS4:
1371          ADD_ACTIVE(state_offset + 2, 0);
1372          if (clen > 0)
1373            {
1374            BOOL OK;
1375            switch (c)
1376              {
1377              case 0x000a:
1378              case 0x000b:
1379              case 0x000c:
1380              case 0x000d:
1381              case 0x0085:
1382              case 0x2028:
1383              case 0x2029:
1384              OK = TRUE;
1385              break;
1386    
1387              default:
1388              OK = FALSE;
1389              break;
1390              }
1391            if (OK == (d == OP_VSPACE))
1392              {
1393              if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
1394                  codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
1395                {
1396                active_count--;           /* Remove non-match possibility */
1397                next_active_state--;
1398                }
1399              ADD_NEW_DATA(-(state_offset + count), 0, 0);
1400              }
1401            }
1402          break;
1403    
1404        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1405          case OP_HSPACE_EXTRA + OP_TYPEQUERY:
1406          case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
1407          case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
1408          count = 2;
1409          goto QS5;
1410    
1411          case OP_HSPACE_EXTRA + OP_TYPESTAR:
1412          case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
1413          case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
1414          count = 0;
1415    
1416          QS5:
1417          ADD_ACTIVE(state_offset + 2, 0);
1418          if (clen > 0)
1419            {
1420            BOOL OK;
1421            switch (c)
1422              {
1423              case 0x09:      /* HT */
1424              case 0x20:      /* SPACE */
1425              case 0xa0:      /* NBSP */
1426              case 0x1680:    /* OGHAM SPACE MARK */
1427              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1428              case 0x2000:    /* EN QUAD */
1429              case 0x2001:    /* EM QUAD */
1430              case 0x2002:    /* EN SPACE */
1431              case 0x2003:    /* EM SPACE */
1432              case 0x2004:    /* THREE-PER-EM SPACE */
1433              case 0x2005:    /* FOUR-PER-EM SPACE */
1434              case 0x2006:    /* SIX-PER-EM SPACE */
1435              case 0x2007:    /* FIGURE SPACE */
1436              case 0x2008:    /* PUNCTUATION SPACE */
1437              case 0x2009:    /* THIN SPACE */
1438              case 0x200A:    /* HAIR SPACE */
1439              case 0x202f:    /* NARROW NO-BREAK SPACE */
1440              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1441              case 0x3000:    /* IDEOGRAPHIC SPACE */
1442              OK = TRUE;
1443              break;
1444    
1445              default:
1446              OK = FALSE;
1447              break;
1448              }
1449    
1450            if (OK == (d == OP_HSPACE))
1451              {
1452              if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
1453                  codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
1454                {
1455                active_count--;           /* Remove non-match possibility */
1456                next_active_state--;
1457                }
1458              ADD_NEW_DATA(-(state_offset + count), 0, 0);
1459              }
1460            }
1461          break;
1462    
1463          /*-----------------------------------------------------------------*/
1464    #ifdef SUPPORT_UCP
1465        case OP_PROP_EXTRA + OP_TYPEEXACT:        case OP_PROP_EXTRA + OP_TYPEEXACT:
1466        case OP_PROP_EXTRA + OP_TYPEUPTO:        case OP_PROP_EXTRA + OP_TYPEUPTO:
1467        case OP_PROP_EXTRA + OP_TYPEMINUPTO:        case OP_PROP_EXTRA + OP_TYPEMINUPTO:
1468          case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
1469        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1470          { ADD_ACTIVE(state_offset + 6, 0); }          { ADD_ACTIVE(state_offset + 6, 0); }
1471        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
# Line 1093  for (;;) Line 1504  for (;;)
1504    
1505          if (OK == (d == OP_PROP))          if (OK == (d == OP_PROP))
1506            {            {
1507              if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
1508                {
1509                active_count--;           /* Remove non-match possibility */
1510                next_active_state--;
1511                }
1512            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1513              { ADD_NEW(state_offset + 6, 0); }              { ADD_NEW(state_offset + 6, 0); }
1514            else            else
# Line 1105  for (;;) Line 1521  for (;;)
1521        case OP_EXTUNI_EXTRA + OP_TYPEEXACT:        case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
1522        case OP_EXTUNI_EXTRA + OP_TYPEUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
1523        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
1524          case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
1525        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1526          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 4, 0); }
1527        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
# Line 1112  for (;;) Line 1529  for (;;)
1529          {          {
1530          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1531          int ncount = 0;          int ncount = 0;
1532            if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
1533              {
1534              active_count--;           /* Remove non-match possibility */
1535              next_active_state--;
1536              }
1537          while (nptr < end_subject)          while (nptr < end_subject)
1538            {            {
1539            int nd;            int nd;
# Line 1127  for (;;) Line 1549  for (;;)
1549            { ADD_NEW_DATA(-state_offset, count, ncount); }            { ADD_NEW_DATA(-state_offset, count, ncount); }
1550          }          }
1551        break;        break;
1552    #endif
1553    
1554          /*-----------------------------------------------------------------*/
1555          case OP_ANYNL_EXTRA + OP_TYPEEXACT:
1556          case OP_ANYNL_EXTRA + OP_TYPEUPTO:
1557          case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
1558          case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
1559          if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
1560            { ADD_ACTIVE(state_offset + 4, 0); }
1561          count = current_state->count;  /* Number already matched */
1562          if (clen > 0)
1563            {
1564            int ncount = 0;
1565            switch (c)
1566              {
1567              case 0x000b:
1568              case 0x000c:
1569              case 0x0085:
1570              case 0x2028:
1571              case 0x2029:
1572              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1573              goto ANYNL03;
1574    
1575              case 0x000d:
1576              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1577              /* Fall through */
1578    
1579              ANYNL03:
1580              case 0x000a:
1581              if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
1582                {
1583                active_count--;           /* Remove non-match possibility */
1584                next_active_state--;
1585                }
1586              if (++count >= GET2(code, 1))
1587                { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }
1588              else
1589                { ADD_NEW_DATA(-state_offset, count, ncount); }
1590              break;
1591    
1592              default:
1593              break;
1594              }
1595            }
1596          break;
1597    
1598          /*-----------------------------------------------------------------*/
1599          case OP_VSPACE_EXTRA + OP_TYPEEXACT:
1600          case OP_VSPACE_EXTRA + OP_TYPEUPTO:
1601          case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
1602          case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
1603          if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
1604            { ADD_ACTIVE(state_offset + 4, 0); }
1605          count = current_state->count;  /* Number already matched */
1606          if (clen > 0)
1607            {
1608            BOOL OK;
1609            switch (c)
1610              {
1611              case 0x000a:
1612              case 0x000b:
1613              case 0x000c:
1614              case 0x000d:
1615              case 0x0085:
1616              case 0x2028:
1617              case 0x2029:
1618              OK = TRUE;
1619              break;
1620    
1621              default:
1622              OK = FALSE;
1623              }
1624    
1625            if (OK == (d == OP_VSPACE))
1626              {
1627              if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
1628                {
1629                active_count--;           /* Remove non-match possibility */
1630                next_active_state--;
1631                }
1632              if (++count >= GET2(code, 1))
1633                { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
1634              else
1635                { ADD_NEW_DATA(-state_offset, count, 0); }
1636              }
1637            }
1638          break;
1639    
1640          /*-----------------------------------------------------------------*/
1641          case OP_HSPACE_EXTRA + OP_TYPEEXACT:
1642          case OP_HSPACE_EXTRA + OP_TYPEUPTO:
1643          case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
1644          case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
1645          if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
1646            { ADD_ACTIVE(state_offset + 4, 0); }
1647          count = current_state->count;  /* Number already matched */
1648          if (clen > 0)
1649            {
1650            BOOL OK;
1651            switch (c)
1652              {
1653              case 0x09:      /* HT */
1654              case 0x20:      /* SPACE */
1655              case 0xa0:      /* NBSP */
1656              case 0x1680:    /* OGHAM SPACE MARK */
1657              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1658              case 0x2000:    /* EN QUAD */
1659              case 0x2001:    /* EM QUAD */
1660              case 0x2002:    /* EN SPACE */
1661              case 0x2003:    /* EM SPACE */
1662              case 0x2004:    /* THREE-PER-EM SPACE */
1663              case 0x2005:    /* FOUR-PER-EM SPACE */
1664              case 0x2006:    /* SIX-PER-EM SPACE */
1665              case 0x2007:    /* FIGURE SPACE */
1666              case 0x2008:    /* PUNCTUATION SPACE */
1667              case 0x2009:    /* THIN SPACE */
1668              case 0x200A:    /* HAIR SPACE */
1669              case 0x202f:    /* NARROW NO-BREAK SPACE */
1670              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1671              case 0x3000:    /* IDEOGRAPHIC SPACE */
1672              OK = TRUE;
1673              break;
1674    
1675              default:
1676              OK = FALSE;
1677              break;
1678              }
1679    
1680            if (OK == (d == OP_HSPACE))
1681              {
1682              if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
1683                {
1684                active_count--;           /* Remove non-match possibility */
1685                next_active_state--;
1686                }
1687              if (++count >= GET2(code, 1))
1688                { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
1689              else
1690                { ADD_NEW_DATA(-state_offset, count, 0); }
1691              }
1692            }
1693          break;
1694    
1695  /* ========================================================================== */  /* ========================================================================== */
1696        /* These opcodes are followed by a character that is usually compared        /* These opcodes are followed by a character that is usually compared
# Line 1148  for (;;) Line 1712  for (;;)
1712          {          {
1713          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
1714            {            {
1715            int othercase;            unsigned int othercase;
1716            if (c < 128) othercase = fcc[c]; else            if (c < 128) othercase = fcc[c]; else
1717    
1718            /* If we have Unicode property support, we can use it to test the            /* If we have Unicode property support, we can use it to test the
# Line 1157  for (;;) Line 1721  for (;;)
1721  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1722            othercase = _pcre_ucp_othercase(c);            othercase = _pcre_ucp_othercase(c);
1723  #else  #else
1724            othercase = -1;            othercase = NOTACHAR;
1725  #endif  #endif
1726    
1727            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
# Line 1198  for (;;) Line 1762  for (;;)
1762  #endif  #endif
1763    
1764        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1765          /* This is a tricky like EXTUNI because it too can match more than one
1766          character (when CR is followed by LF). In this case, set up a negative
1767          state to wait for one character to pass before continuing. */
1768    
1769          case OP_ANYNL:
1770          if (clen > 0) switch(c)
1771            {
1772            case 0x000b:
1773            case 0x000c:
1774            case 0x0085:
1775            case 0x2028:
1776            case 0x2029:
1777            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1778    
1779            case 0x000a:
1780            ADD_NEW(state_offset + 1, 0);
1781            break;
1782    
1783            case 0x000d:
1784            if (ptr + 1 < end_subject && ptr[1] == 0x0a)
1785              {
1786              ADD_NEW_DATA(-(state_offset + 1), 0, 1);
1787              }
1788            else
1789              {
1790              ADD_NEW(state_offset + 1, 0);
1791              }
1792            break;
1793            }
1794          break;
1795    
1796          /*-----------------------------------------------------------------*/
1797          case OP_NOT_VSPACE:
1798          if (clen > 0) switch(c)
1799            {
1800            case 0x000a:
1801            case 0x000b:
1802            case 0x000c:
1803            case 0x000d:
1804            case 0x0085:
1805            case 0x2028:
1806            case 0x2029:
1807            break;
1808    
1809            default:
1810            ADD_NEW(state_offset + 1, 0);
1811            break;
1812            }
1813          break;
1814    
1815          /*-----------------------------------------------------------------*/
1816          case OP_VSPACE:
1817          if (clen > 0) switch(c)
1818            {
1819            case 0x000a:
1820            case 0x000b:
1821            case 0x000c:
1822            case 0x000d:
1823            case 0x0085:
1824            case 0x2028:
1825            case 0x2029:
1826            ADD_NEW(state_offset + 1, 0);
1827            break;
1828    
1829            default: break;
1830            }
1831          break;
1832    
1833          /*-----------------------------------------------------------------*/
1834          case OP_NOT_HSPACE:
1835          if (clen > 0) switch(c)
1836            {
1837            case 0x09:      /* HT */
1838            case 0x20:      /* SPACE */
1839            case 0xa0:      /* NBSP */
1840            case 0x1680:    /* OGHAM SPACE MARK */
1841            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1842            case 0x2000:    /* EN QUAD */
1843            case 0x2001:    /* EM QUAD */
1844            case 0x2002:    /* EN SPACE */
1845            case 0x2003:    /* EM SPACE */
1846            case 0x2004:    /* THREE-PER-EM SPACE */
1847            case 0x2005:    /* FOUR-PER-EM SPACE */
1848            case 0x2006:    /* SIX-PER-EM SPACE */
1849            case 0x2007:    /* FIGURE SPACE */
1850            case 0x2008:    /* PUNCTUATION SPACE */
1851            case 0x2009:    /* THIN SPACE */
1852            case 0x200A:    /* HAIR SPACE */
1853            case 0x202f:    /* NARROW NO-BREAK SPACE */
1854            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1855            case 0x3000:    /* IDEOGRAPHIC SPACE */
1856            break;
1857    
1858            default:
1859            ADD_NEW(state_offset + 1, 0);
1860            break;
1861            }
1862          break;
1863    
1864          /*-----------------------------------------------------------------*/
1865          case OP_HSPACE:
1866          if (clen > 0) switch(c)
1867            {
1868            case 0x09:      /* HT */
1869            case 0x20:      /* SPACE */
1870            case 0xa0:      /* NBSP */
1871            case 0x1680:    /* OGHAM SPACE MARK */
1872            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1873            case 0x2000:    /* EN QUAD */
1874            case 0x2001:    /* EM QUAD */
1875            case 0x2002:    /* EN SPACE */
1876            case 0x2003:    /* EM SPACE */
1877            case 0x2004:    /* THREE-PER-EM SPACE */
1878            case 0x2005:    /* FOUR-PER-EM SPACE */
1879            case 0x2006:    /* SIX-PER-EM SPACE */
1880            case 0x2007:    /* FIGURE SPACE */
1881            case 0x2008:    /* PUNCTUATION SPACE */
1882            case 0x2009:    /* THIN SPACE */
1883            case 0x200A:    /* HAIR SPACE */
1884            case 0x202f:    /* NARROW NO-BREAK SPACE */
1885            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1886            case 0x3000:    /* IDEOGRAPHIC SPACE */
1887            ADD_NEW(state_offset + 1, 0);
1888            break;
1889            }
1890          break;
1891    
1892          /*-----------------------------------------------------------------*/
1893        /* Match a negated single character. This is only used for one-byte        /* Match a negated single character. This is only used for one-byte
1894        characters, that is, we know that d < 256. The character we are        characters, that is, we know that d < 256. The character we are
1895        checking (c) can be multibyte. */        checking (c) can be multibyte. */
# Line 1205  for (;;) Line 1897  for (;;)
1897        case OP_NOT:        case OP_NOT:
1898        if (clen > 0)        if (clen > 0)
1899          {          {
1900          int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;          unsigned int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;
1901          if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }          if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }
1902          }          }
1903        break;        break;
# Line 1213  for (;;) Line 1905  for (;;)
1905        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1906        case OP_PLUS:        case OP_PLUS:
1907        case OP_MINPLUS:        case OP_MINPLUS:
1908          case OP_POSPLUS:
1909        case OP_NOTPLUS:        case OP_NOTPLUS:
1910        case OP_NOTMINPLUS:        case OP_NOTMINPLUS:
1911          case OP_NOTPOSPLUS:
1912        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1913        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
1914        if (clen > 0)        if (clen > 0)
1915          {          {
1916          int otherd = -1;          unsigned int otherd = NOTACHAR;
1917          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1918            {            {
1919  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1234  for (;;) Line 1928  for (;;)
1928            otherd = fcc[d];            otherd = fcc[d];
1929            }            }
1930          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1931            { count++; ADD_NEW(state_offset, count); }            {
1932              if (count > 0 &&
1933                  (codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS))
1934                {
1935                active_count--;             /* Remove non-match possibility */
1936                next_active_state--;
1937                }
1938              count++;
1939              ADD_NEW(state_offset, count);
1940              }
1941          }          }
1942        break;        break;
1943    
1944        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1945        case OP_QUERY:        case OP_QUERY:
1946        case OP_MINQUERY:        case OP_MINQUERY:
1947          case OP_POSQUERY:
1948        case OP_NOTQUERY:        case OP_NOTQUERY:
1949        case OP_NOTMINQUERY:        case OP_NOTMINQUERY:
1950          case OP_NOTPOSQUERY:
1951        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
1952        if (clen > 0)        if (clen > 0)
1953          {          {
1954          int otherd = -1;          unsigned int otherd = NOTACHAR;
1955          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1956            {            {
1957  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1261  for (;;) Line 1966  for (;;)
1966            otherd = fcc[d];            otherd = fcc[d];
1967            }            }
1968          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1969            { ADD_NEW(state_offset + dlen + 1, 0); }            {
1970              if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY)
1971                {
1972                active_count--;            /* Remove non-match possibility */
1973                next_active_state--;
1974                }
1975              ADD_NEW(state_offset + dlen + 1, 0);
1976              }
1977          }          }
1978        break;        break;
1979    
1980        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1981        case OP_STAR:        case OP_STAR:
1982        case OP_MINSTAR:        case OP_MINSTAR:
1983          case OP_POSSTAR:
1984        case OP_NOTSTAR:        case OP_NOTSTAR:
1985        case OP_NOTMINSTAR:        case OP_NOTMINSTAR:
1986          case OP_NOTPOSSTAR:
1987        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
1988        if (clen > 0)        if (clen > 0)
1989          {          {
1990          int otherd = -1;          unsigned int otherd = NOTACHAR;
1991          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1992            {            {
1993  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1288  for (;;) Line 2002  for (;;)
2002            otherd = fcc[d];            otherd = fcc[d];
2003            }            }
2004          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2005            { ADD_NEW(state_offset, 0); }            {
2006              if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR)
2007                {
2008                active_count--;            /* Remove non-match possibility */
2009                next_active_state--;
2010                }
2011              ADD_NEW(state_offset, 0);
2012              }
2013          }          }
2014        break;        break;
2015    
2016        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2017        case OP_EXACT:        case OP_EXACT:
2018          case OP_NOTEXACT:
2019          count = current_state->count;  /* Number already matched */
2020          if (clen > 0)
2021            {
2022            unsigned int otherd = NOTACHAR;
2023            if ((ims & PCRE_CASELESS) != 0)
2024              {
2025    #ifdef SUPPORT_UTF8
2026              if (utf8 && d >= 128)
2027                {
2028    #ifdef SUPPORT_UCP
2029                otherd = _pcre_ucp_othercase(d);
2030    #endif  /* SUPPORT_UCP */
2031                }
2032              else
2033    #endif  /* SUPPORT_UTF8 */
2034              otherd = fcc[d];
2035              }
2036            if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2037              {
2038              if (++count >= GET2(code, 1))
2039                { ADD_NEW(state_offset + dlen + 3, 0); }
2040              else
2041                { ADD_NEW(state_offset, count); }
2042              }
2043            }
2044          break;
2045    
2046          /*-----------------------------------------------------------------*/
2047        case OP_UPTO:        case OP_UPTO:
2048        case OP_MINUPTO:        case OP_MINUPTO:
2049        case OP_NOTEXACT:        case OP_POSUPTO:
2050        case OP_NOTUPTO:        case OP_NOTUPTO:
2051        case OP_NOTMINUPTO:        case OP_NOTMINUPTO:
2052        if (codevalue != OP_EXACT && codevalue != OP_NOTEXACT)        case OP_NOTPOSUPTO:
2053          { ADD_ACTIVE(state_offset + dlen + 3, 0); }        ADD_ACTIVE(state_offset + dlen + 3, 0);
2054        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2055        if (clen > 0)        if (clen > 0)
2056          {          {
2057          int otherd = -1;          unsigned int otherd = NOTACHAR;
2058          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
2059            {            {
2060  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
# Line 1320  for (;;) Line 2070  for (;;)
2070            }            }
2071          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2072            {            {
2073              if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO)
2074                {
2075                active_count--;             /* Remove non-match possibility */
2076                next_active_state--;
2077                }
2078            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2079              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 3, 0); }
2080            else            else
# Line 1448  for (;;) Line 2203  for (;;)
2203    
2204        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2205        case OP_COND:        case OP_COND:
2206          case OP_SCOND:
2207          {          {
2208          int local_offsets[1000];          int local_offsets[1000];
2209          int local_workspace[1000];          int local_workspace[1000];
2210          int condcode = code[LINK_SIZE+1];          int condcode = code[LINK_SIZE+1];
2211    
2212          /* The only supported version of OP_CREF is for the value 0xffff, which          /* Back reference conditions are not supported */
2213          means "test if in a recursion". */  
2214            if (condcode == OP_CREF) return PCRE_ERROR_DFA_UCOND;
2215    
2216          if (condcode == OP_CREF)          /* The DEFINE condition is always false */
2217    
2218            if (condcode == OP_DEF)
2219              {
2220              ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0);
2221              }
2222    
2223            /* The only supported version of OP_RREF is for the value RREF_ANY,
2224            which means "test if in any recursion". We can't test for specifically
2225            recursed groups. */
2226    
2227            else if (condcode == OP_RREF)
2228            {            {
2229            int value = GET2(code, LINK_SIZE+2);            int value = GET2(code, LINK_SIZE+2);
2230            if (value != 0xffff) return PCRE_ERROR_DFA_UCOND;            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
2231            if (recursing > 0) { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }            if (recursing > 0) { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }
2232              else { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }              else { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }
2233            }            }
# Line 1728  is not anchored. Line 2496  is not anchored.
2496    
2497  Arguments:  Arguments:
2498    argument_re     points to the compiled expression    argument_re     points to the compiled expression
2499    extra_data      points to extra data or is NULL (not currently used)    extra_data      points to extra data or is NULL
2500    subject         points to the subject string    subject         points to the subject string
2501    length          length of subject string (may contain binary zeros)    length          length of subject string (may contain binary zeros)
2502    start_offset    where to start in the subject string    start_offset    where to start in the subject string
# Line 1744  Returns:          > 0 => number of match Line 2512  Returns:          > 0 => number of match
2512                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
2513  */  */
2514    
2515  PCRE_DATA_SCOPE int  PCRE_EXP_DEFN int
2516  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
2517    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
2518    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
# Line 1834  md->end_subject = end_subject; Line 2602  md->end_subject = end_subject;
2602  md->moptions = options;  md->moptions = options;
2603  md->poptions = re->options;  md->poptions = re->options;
2604    
2605  /* Handle different types of newline. The two bits give four cases. If nothing  /* If the BSR option is not set at match time, copy what was set
2606  is set at run time, whatever was used at compile time applies. */  at compile time. */
2607    
2608  switch ((((options & PCRE_NEWLINE_CRLF) == 0)? re->options : options) &  if ((md->moptions & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 0)
          PCRE_NEWLINE_CRLF)  
2609    {    {
2610    default:              newline = NEWLINE; break;   /* Compile-time default */    if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
2611        md->moptions |= re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE);
2612    #ifdef BSR_ANYCRLF
2613      else md->moptions |= PCRE_BSR_ANYCRLF;
2614    #endif
2615      }
2616    
2617    /* Handle different types of newline. The three bits give eight cases. If
2618    nothing is set at run time, whatever was used at compile time applies. */
2619    
2620    switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
2621             PCRE_NEWLINE_BITS)
2622      {
2623      case 0: newline = NEWLINE; break;   /* Compile-time default */
2624    case PCRE_NEWLINE_CR: newline = '\r'; break;    case PCRE_NEWLINE_CR: newline = '\r'; break;
2625    case PCRE_NEWLINE_LF: newline = '\n'; break;    case PCRE_NEWLINE_LF: newline = '\n'; break;
2626    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
2627         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
2628      case PCRE_NEWLINE_ANY: newline = -1; break;
2629      case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
2630      default: return PCRE_ERROR_BADNEWLINE;
2631    }    }
2632    
2633  if (newline > 255)  if (newline == -2)
2634      {
2635      md->nltype = NLTYPE_ANYCRLF;
2636      }
2637    else if (newline < 0)
2638    {    {
2639    md->nllen = 2;    md->nltype = NLTYPE_ANY;
   md->nl[0] = (newline >> 8) & 255;  
   md->nl[1] = newline & 255;  
2640    }    }
2641  else  else
2642    {    {
2643    md->nllen = 1;    md->nltype = NLTYPE_FIXED;
2644    md->nl[0] = newline;    if (newline > 255)
2645        {
2646        md->nllen = 2;
2647        md->nl[0] = (newline >> 8) & 255;
2648        md->nl[1] = newline & 255;
2649        }
2650      else
2651        {
2652        md->nllen = 1;
2653        md->nl[0] = newline;
2654        }
2655    }    }
2656    
2657  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
# Line 1889  if (md->tables == NULL) md->tables = _pc Line 2684  if (md->tables == NULL) md->tables = _pc
2684  used in a loop when finding where to start. */  used in a loop when finding where to start. */
2685    
2686  lcc = md->tables + lcc_offset;  lcc = md->tables + lcc_offset;
2687  startline = (re->options & PCRE_STARTLINE) != 0;  startline = (re->flags & PCRE_STARTLINE) != 0;
2688  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
2689    
2690  /* Set up the first character to match, if available. The first_byte value is  /* Set up the first character to match, if available. The first_byte value is
# Line 1900  studied, there may be a bitmap of possib Line 2695  studied, there may be a bitmap of possib
2695    
2696  if (!anchored)  if (!anchored)
2697    {    {
2698    if ((re->options & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
2699      {      {
2700      first_byte = re->first_byte & 255;      first_byte = re->first_byte & 255;
2701      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
# Line 1917  if (!anchored) Line 2712  if (!anchored)
2712  /* For anchored or unanchored matches, there may be a "last known required  /* For anchored or unanchored matches, there may be a "last known required
2713  character" set. */  character" set. */
2714    
2715  if ((re->options & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
2716    {    {
2717    req_byte = re->req_byte & 255;    req_byte = re->req_byte & 255;
2718    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
# Line 1946  for (;;) Line 2741  for (;;)
2741      if (firstline)      if (firstline)
2742        {        {
2743        const uschar *t = current_subject;        const uschar *t = current_subject;
2744        while (t <= save_end_subject - md->nllen && !IS_NEWLINE(t)) t++;        while (t < md->end_subject && !IS_NEWLINE(t)) t++;
2745        end_subject = t;        end_subject = t;
2746        }        }
2747    
# Line 1965  for (;;) Line 2760  for (;;)
2760    
2761      else if (startline)      else if (startline)
2762        {        {
2763        if (current_subject > md->start_subject + md->nllen +        if (current_subject > md->start_subject + start_offset)
           start_offset)  
2764          {          {
2765          while (current_subject <= end_subject &&          while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))
2766                 !IS_NEWLINE(current_subject - md->nllen))            current_subject++;
2767    
2768            /* If we have just passed a CR and the newline option is ANY or
2769            ANYCRLF, and we are now at a LF, advance the match position by one more
2770            character. */
2771    
2772            if (current_subject[-1] == '\r' &&
2773                 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
2774                 current_subject < end_subject &&
2775                 *current_subject == '\n')
2776            current_subject++;            current_subject++;
2777          }          }
2778        }        }
# Line 2070  for (;;) Line 2873  for (;;)
2873    /* Advance to the next subject character unless we are at the end of a line    /* Advance to the next subject character unless we are at the end of a line
2874    and firstline is set. */    and firstline is set. */
2875    
2876    if (firstline &&    if (firstline && IS_NEWLINE(current_subject)) break;
       current_subject <= end_subject - md->nllen &&  
       IS_NEWLINE(current_subject)) break;  
2877    current_subject++;    current_subject++;
2878    if (utf8)    if (utf8)
2879      {      {
# Line 2080  for (;;) Line 2881  for (;;)
2881        current_subject++;        current_subject++;
2882      }      }
2883    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
2884    }  
2885      /* If we have just passed a CR and we are now at a LF, and the pattern does
2886      not contain any explicit matches for \r or \n, and the newline option is CRLF
2887      or ANY or ANYCRLF, advance the match position by one more character. */
2888    
2889      if (current_subject[-1] == '\r' &&
2890          current_subject < end_subject &&
2891          *current_subject == '\n' &&
2892          (re->flags & PCRE_HASCRORLF) == 0 &&
2893            (md->nltype == NLTYPE_ANY ||
2894             md->nltype == NLTYPE_ANYCRLF ||
2895             md->nllen == 2))
2896        current_subject++;
2897    
2898      }   /* "Bumpalong" loop */
2899    
2900  return PCRE_ERROR_NOMATCH;  return PCRE_ERROR_NOMATCH;
2901  }  }

Legend:
Removed from v.91  
changed lines
  Added in v.335

  ViewVC Help
Powered by ViewVC 1.1.5