/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 85 by nigel, Sat Feb 24 21:41:13 2007 UTC revision 341 by ph10, Sat Apr 19 16:41:04 2008 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2005 University of Cambridge             Copyright (c) 1997-2008 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 39  POSSIBILITY OF SUCH DAMAGE. Line 39  POSSIBILITY OF SUCH DAMAGE.
39    
40    
41  /* This module contains the external function pcre_dfa_exec(), which is an  /* This module contains the external function pcre_dfa_exec(), which is an
42  alternative matching function that uses a DFA algorithm. This is NOT Perl-  alternative matching function that uses a sort of DFA algorithm (not a true
43  compatible, but it has advantages in certain applications. */  FSM). This is NOT Perl- compatible, but it has advantages in certain
44    applications. */
45    
46    
47    #ifdef HAVE_CONFIG_H
48    #include "config.h"
49    #endif
50    
51    #define NLBLOCK md             /* Block containing newline information */
52    #define PSSTART start_subject  /* Field containing processed string start */
53    #define PSEND   end_subject    /* Field containing processed string end */
54    
55  #include "pcre_internal.h"  #include "pcre_internal.h"
56    
57    
# Line 57  compatible, but it has advantages in cer Line 66  compatible, but it has advantages in cer
66  *************************************************/  *************************************************/
67    
68  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
69  into others, under special conditions. A gap of 10 between the blocks should be  into others, under special conditions. A gap of 20 between the blocks should be
70  enough. */  enough. The resulting opcodes don't have to be less than 256 because they are
71    never stored, so we push them well clear of the normal opcodes. */
72  #define OP_PROP_EXTRA    (EXTRACT_BASIC_MAX+1)  
73  #define OP_EXTUNI_EXTRA  (EXTRACT_BASIC_MAX+11)  #define OP_PROP_EXTRA       300
74    #define OP_EXTUNI_EXTRA     320
75    #define OP_ANYNL_EXTRA      340
76    #define OP_HSPACE_EXTRA     360
77    #define OP_VSPACE_EXTRA     380
78    
79    
80  /* This table identifies those opcodes that are followed immediately by a  /* This table identifies those opcodes that are followed immediately by a
81  character that is to be tested in some way. This makes is possible to  character that is to be tested in some way. This makes is possible to
82  centralize the loading of these characters. In the case of Type * etc, the  centralize the loading of these characters. In the case of Type * etc, the
83  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
84  small value. */  small value. ***NOTE*** If the start of this table is modified, the two tables
85    that follow must also be modified. */
86    
87  static uschar coptable[] = {  static const uschar coptable[] = {
88    0,                             /* End                                    */    0,                             /* End                                    */
89    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* \A, \G, \B, \b, \D, \d, \S, \s, \W, \w */    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
90    0, 0,                          /* Any, Anybyte                           */    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
91      0, 0, 0,                       /* Any, AllAny, Anybyte                   */
92    0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */    0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */
93      0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
94    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */
95    1,                             /* Char                                   */    1,                             /* Char                                   */
96    1,                             /* Charnc                                 */    1,                             /* Charnc                                 */
# Line 82  static uschar coptable[] = { Line 98  static uschar coptable[] = {
98    /* Positive single-char repeats                                          */    /* Positive single-char repeats                                          */
99    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
100    3, 3, 3,                       /* upto, minupto, exact                   */    3, 3, 3,                       /* upto, minupto, exact                   */
101      1, 1, 1, 3,                    /* *+, ++, ?+, upto+                      */
102    /* Negative single-char repeats - only for chars < 256                   */    /* Negative single-char repeats - only for chars < 256                   */
103    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
104    3, 3, 3,                       /* NOT upto, minupto, exact               */    3, 3, 3,                       /* NOT upto, minupto, exact               */
105      1, 1, 1, 3,                    /* NOT *+, ++, ?+, updo+                  */
106    /* Positive type repeats                                                 */    /* Positive type repeats                                                 */
107    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
108    3, 3, 3,                       /* Type upto, minupto, exact              */    3, 3, 3,                       /* Type upto, minupto, exact              */
109      1, 1, 1, 3,                    /* Type *+, ++, ?+, upto+                 */
110    /* Character class & ref repeats                                         */    /* Character class & ref repeats                                         */
111    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */
112    0, 0,                          /* CRRANGE, CRMINRANGE                    */    0, 0,                          /* CRRANGE, CRMINRANGE                    */
# Line 106  static uschar coptable[] = { Line 125  static uschar coptable[] = {
125    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
126    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
127    0,                             /* Reverse                                */    0,                             /* Reverse                                */
128    0,                             /* Once                                   */    0, 0, 0, 0,                    /* ONCE, BRA, CBRA, COND                  */
129    0,                             /* COND                                   */    0, 0, 0,                       /* SBRA, SCBRA, SCOND                     */
130    0,                             /* CREF                                   */    0,                             /* CREF                                   */
131      0,                             /* RREF                                   */
132      0,                             /* DEF                                    */
133    0, 0,                          /* BRAZERO, BRAMINZERO                    */    0, 0,                          /* BRAZERO, BRAMINZERO                    */
134    0,                             /* BRANUMBER                              */    0, 0, 0, 0,                    /* PRUNE, SKIP, THEN, COMMIT              */
135    0                              /* BRA                                    */    0, 0, 0                        /* FAIL, ACCEPT, SKIPZERO                 */
136  };  };
137    
138  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
139  and \w */  and \w */
140    
141  static uschar toptable1[] = {  static const uschar toptable1[] = {
142    0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
143    ctype_digit, ctype_digit,    ctype_digit, ctype_digit,
144    ctype_space, ctype_space,    ctype_space, ctype_space,
145    ctype_word,  ctype_word,    ctype_word,  ctype_word,
146    0                               /* OP_ANY */    0, 0                            /* OP_ANY, OP_ALLANY */
147  };  };
148    
149  static uschar toptable2[] = {  static const uschar toptable2[] = {
150    0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
151    ctype_digit, 0,    ctype_digit, 0,
152    ctype_space, 0,    ctype_space, 0,
153    ctype_word,  0,    ctype_word,  0,
154    1                               /* OP_ANY */    1, 1                            /* OP_ANY, OP_ALLANY */
155  };  };
156    
157    
# Line 202  Arguments: Line 223  Arguments:
223    rlevel            function call recursion level    rlevel            function call recursion level
224    recursing         regex recursive call level    recursing         regex recursive call level
225    
226  Returns:            > 0 =>  Returns:            > 0 => number of match offset pairs placed in offsets
227                      = 0 =>                      = 0 => offsets overflowed; longest matches are present
228                       -1 => failed to match                       -1 => failed to match
229                     < -1 => some kind of unexpected problem                     < -1 => some kind of unexpected problem
230    
# Line 277  stateblock *next_active_state, *next_new Line 298  stateblock *next_active_state, *next_new
298    
299  const uschar *ctypes, *lcc, *fcc;  const uschar *ctypes, *lcc, *fcc;
300  const uschar *ptr;  const uschar *ptr;
301  const uschar *end_code;  const uschar *end_code, *first_op;
302    
303  int active_count, new_count, match_count;  int active_count, new_count, match_count;
304    
# Line 288  const uschar *start_subject = md->start_ Line 309  const uschar *start_subject = md->start_
309  const uschar *end_subject = md->end_subject;  const uschar *end_subject = md->end_subject;
310  const uschar *start_code = md->start_code;  const uschar *start_code = md->start_code;
311    
312    #ifdef SUPPORT_UTF8
313  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;
314    #else
315    BOOL utf8 = FALSE;
316    #endif
317    
318  rlevel++;  rlevel++;
319  offsetcount &= (-2);  offsetcount &= (-2);
# Line 311  active_states = (stateblock *)(workspace Line 336  active_states = (stateblock *)(workspace
336  next_new_state = new_states = active_states + wscount;  next_new_state = new_states = active_states + wscount;
337  new_count = 0;  new_count = 0;
338    
339    first_op = this_start_code + 1 + LINK_SIZE +
340      ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
341    
342  /* The first thing in any (sub) pattern is a bracket of some sort. Push all  /* The first thing in any (sub) pattern is a bracket of some sort. Push all
343  the alternative states onto the list, and find out where the end is. This  the alternative states onto the list, and find out where the end is. This
344  makes is possible to use this function recursively, when we want to stop at a  makes is possible to use this function recursively, when we want to stop at a
# Line 320  If the first opcode in the first alterna Line 348  If the first opcode in the first alterna
348  a backward assertion. In that case, we have to find out the maximum amount to  a backward assertion. In that case, we have to find out the maximum amount to
349  move back, and set up each alternative appropriately. */  move back, and set up each alternative appropriately. */
350    
351  if (this_start_code[1+LINK_SIZE] == OP_REVERSE)  if (*first_op == OP_REVERSE)
352    {    {
353    int max_back = 0;    int max_back = 0;
354    int gone_back;    int gone_back;
# Line 402  else Line 430  else
430    
431    else    else
432      {      {
433        int length = 1 + LINK_SIZE +
434          ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
435      do      do
436        {        {
437        ADD_NEW(end_code - start_code + 1 + LINK_SIZE, 0);        ADD_NEW(end_code - start_code + length, 0);
438        end_code += GET(end_code, 1);        end_code += GET(end_code, 1);
439          length = 1 + LINK_SIZE;
440        }        }
441      while (*end_code == OP_ALT);      while (*end_code == OP_ALT);
442      }      }
# Line 421  ptr = current_subject; Line 452  ptr = current_subject;
452  for (;;)  for (;;)
453    {    {
454    int i, j;    int i, j;
455    int c, d, clen, dlen;    int clen, dlen;
456      unsigned int c, d;
457    
458    /* Make the new state list into the active state list and empty the    /* Make the new state list into the active state list and empty the
459    new state list. */    new state list. */
# Line 457  for (;;) Line 489  for (;;)
489    
490    if (ptr < end_subject)    if (ptr < end_subject)
491      {      {
492      clen = 1;      clen = 1;        /* Number of bytes in the character */
493  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
494      if (utf8) { GETCHARLEN(c, ptr, clen); } else      if (utf8) { GETCHARLEN(c, ptr, clen); } else
495  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
# Line 465  for (;;) Line 497  for (;;)
497      }      }
498    else    else
499      {      {
500      clen = 0;    /* At end subject */      clen = 0;        /* This indicates the end of the subject */
501      c = -1;      c = NOTACHAR;    /* This value should never actually be used */
502      }      }
503    
504    /* Scan up the active states and act on each one. The result of an action    /* Scan up the active states and act on each one. The result of an action
# Line 480  for (;;) Line 512  for (;;)
512      const uschar *code;      const uschar *code;
513      int state_offset = current_state->offset;      int state_offset = current_state->offset;
514      int count, codevalue;      int count, codevalue;
515      int chartype, othercase;  #ifdef SUPPORT_UCP
516        int chartype, script;
517    #endif
518    
519  #ifdef DEBUG  #ifdef DEBUG
520      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
521      if (c < 0) printf("-1\n");      if (clen == 0) printf("EOL\n");
522        else if (c > 32 && c < 127) printf("'%c'\n", c);        else if (c > 32 && c < 127) printf("'%c'\n", c);
523          else printf("0x%02x\n", c);          else printf("0x%02x\n", c);
524  #endif  #endif
# Line 528  for (;;) Line 562  for (;;)
562    
563      code = start_code + state_offset;      code = start_code + state_offset;
564      codevalue = *code;      codevalue = *code;
     if (codevalue >= OP_BRA) codevalue = OP_BRA; /* All brackets are equal */  
565    
566      /* If this opcode is followed by an inline character, load it. It is      /* If this opcode is followed by an inline character, load it. It is
567      tempting to test for the presence of a subject character here, but that      tempting to test for the presence of a subject character here, but that
# Line 536  for (;;) Line 569  for (;;)
569      permitted.      permitted.
570    
571      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
572      argument that is not a data character - but is always one byte long.      argument that is not a data character - but is always one byte long. We
573      Unfortunately, we have to take special action to deal with  \P, \p, and      have to take special action to deal with  \P, \p, \H, \h, \V, \v and \X in
574      \X in this case. To keep the other cases fast, convert these ones to new      this case. To keep the other cases fast, convert these ones to new opcodes.
575      opcodes. */      */
576    
577      if (coptable[codevalue] > 0)      if (coptable[codevalue] > 0)
578        {        {
# Line 550  for (;;) Line 583  for (;;)
583        d = code[coptable[codevalue]];        d = code[coptable[codevalue]];
584        if (codevalue >= OP_TYPESTAR)        if (codevalue >= OP_TYPESTAR)
585          {          {
586          if (d == OP_ANYBYTE) return PCRE_ERROR_DFA_UITEM;          switch(d)
587          if (d >= OP_NOTPROP)            {
588            codevalue += (d == OP_EXTUNI)? OP_EXTUNI_EXTRA : OP_PROP_EXTRA;            case OP_ANYBYTE: return PCRE_ERROR_DFA_UITEM;
589              case OP_NOTPROP:
590              case OP_PROP: codevalue += OP_PROP_EXTRA; break;
591              case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
592              case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
593              case OP_NOT_HSPACE:
594              case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
595              case OP_NOT_VSPACE:
596              case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
597              default: break;
598              }
599          }          }
600        }        }
601      else      else
602        {        {
603        dlen = 0;         /* Not strictly necessary, but compilers moan */        dlen = 0;         /* Not strictly necessary, but compilers moan */
604        d = -1;           /* if these variables are not set. */        d = NOTACHAR;     /* if these variables are not set. */
605        }        }
606    
607    
# Line 620  for (;;) Line 663  for (;;)
663    
664        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
665        case OP_BRA:        case OP_BRA:
666          case OP_SBRA:
667        do        do
668          {          {
669          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
# Line 629  for (;;) Line 673  for (;;)
673        break;        break;
674    
675        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
676          case OP_CBRA:
677          case OP_SCBRA:
678          ADD_ACTIVE(code - start_code + 3 + LINK_SIZE,  0);
679          code += GET(code, 1);
680          while (*code == OP_ALT)
681            {
682            ADD_ACTIVE(code - start_code + 1 + LINK_SIZE,  0);
683            code += GET(code, 1);
684            }
685          break;
686    
687          /*-----------------------------------------------------------------*/
688        case OP_BRAZERO:        case OP_BRAZERO:
689        case OP_BRAMINZERO:        case OP_BRAMINZERO:
690        ADD_ACTIVE(state_offset + 1, 0);        ADD_ACTIVE(state_offset + 1, 0);
# Line 638  for (;;) Line 694  for (;;)
694        break;        break;
695    
696        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
697        case OP_BRANUMBER:        case OP_SKIPZERO:
698        ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);        code += 1 + GET(code, 2);
699          while (*code == OP_ALT) code += GET(code, 1);
700          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
701        break;        break;
702    
703        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
704        case OP_CIRC:        case OP_CIRC:
705        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
706            ((ims & PCRE_MULTILINE) != 0 && ptr[-1] == NEWLINE))            ((ims & PCRE_MULTILINE) != 0 &&
707                ptr != end_subject &&
708                WAS_NEWLINE(ptr)))
709          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
710        break;        break;
711    
# Line 679  for (;;) Line 739  for (;;)
739    
740        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
741        case OP_ANY:        case OP_ANY:
742        if (clen > 0 && (c != NEWLINE || (ims & PCRE_DOTALL) != 0))        if (clen > 0 && ((ims & PCRE_DOTALL) != 0 || !IS_NEWLINE(ptr)))
743            { ADD_NEW(state_offset + 1, 0); }
744          break;
745    
746          /*-----------------------------------------------------------------*/
747          case OP_ALLANY:
748          if (clen > 0)
749          { ADD_NEW(state_offset + 1, 0); }          { ADD_NEW(state_offset + 1, 0); }
750        break;        break;
751    
752        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
753        case OP_EODN:        case OP_EODN:
754        if (clen == 0 || (c == NEWLINE && ptr + 1 == end_subject))        if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
755          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
756        break;        break;
757    
# Line 693  for (;;) Line 759  for (;;)
759        case OP_DOLL:        case OP_DOLL:
760        if ((md->moptions & PCRE_NOTEOL) == 0)        if ((md->moptions & PCRE_NOTEOL) == 0)
761          {          {
762          if (clen == 0 || (c == NEWLINE && (ptr + 1 == end_subject ||          if (clen == 0 ||
763                                  (ims & PCRE_MULTILINE) != 0)))              (IS_NEWLINE(ptr) &&
764                   ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)
765                ))
766            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
767          }          }
768        else if (c == NEWLINE && (ims & PCRE_MULTILINE) != 0)        else if ((ims & PCRE_MULTILINE) != 0 && IS_NEWLINE(ptr))
769          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
770        break;        break;
771    
# Line 746  for (;;) Line 814  for (;;)
814        break;        break;
815    
816    
 #ifdef SUPPORT_UCP  
   
817        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
818        /* Check the next character by Unicode property. We will get here only        /* Check the next character by Unicode property. We will get here only
819        if the support is in the binary; otherwise a compile-time error occurs.        if the support is in the binary; otherwise a compile-time error occurs.
820        */        */
821    
822    #ifdef SUPPORT_UCP
823        case OP_PROP:        case OP_PROP:
824        case OP_NOTPROP:        case OP_NOTPROP:
825        if (clen > 0)        if (clen > 0)
826          {          {
827          int rqdtype, category;          BOOL OK;
828          category = _pcre_ucp_findchar(c, &chartype, &othercase);          int category = _pcre_ucp_findprop(c, &chartype, &script);
829          rqdtype = code[1];          switch(code[1])
         if (rqdtype >= 128)  
830            {            {
831            if ((rqdtype - 128 == category) == (codevalue == OP_PROP))            case PT_ANY:
832              { ADD_NEW(state_offset + 2, 0); }            OK = TRUE;
833            }            break;
834          else  
835            {            case PT_LAMP:
836            if ((rqdtype == chartype) == (codevalue == OP_PROP))            OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
837              { ADD_NEW(state_offset + 2, 0); }            break;
838    
839              case PT_GC:
840              OK = category == code[2];
841              break;
842    
843              case PT_PC:
844              OK = chartype == code[2];
845              break;
846    
847              case PT_SC:
848              OK = script == code[2];
849              break;
850    
851              /* Should never occur, but keep compilers from grumbling. */
852    
853              default:
854              OK = codevalue != OP_PROP;
855              break;
856            }            }
857    
858            if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); }
859          }          }
860        break;        break;
861  #endif  #endif
# Line 779  for (;;) Line 865  for (;;)
865  /* ========================================================================== */  /* ========================================================================== */
866        /* These opcodes likewise inspect the subject character, but have an        /* These opcodes likewise inspect the subject character, but have an
867        argument that is not a data character. It is one of these opcodes:        argument that is not a data character. It is one of these opcodes:
868        OP_ANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE, OP_WORDCHAR,        OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE,
869        OP_NOT_WORDCHAR. The value is loaded into d. */        OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. */
870    
871        case OP_TYPEPLUS:        case OP_TYPEPLUS:
872        case OP_TYPEMINPLUS:        case OP_TYPEMINPLUS:
873          case OP_TYPEPOSPLUS:
874        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
875        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
876        if (clen > 0)        if (clen > 0)
877          {          {
878          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
879              (c < 256 &&              (c < 256 &&
880                (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&                (d != OP_ANY ||
881                   (ims & PCRE_DOTALL) != 0 ||
882                   !IS_NEWLINE(ptr)
883                  ) &&
884                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
885            {            {
886              if (count > 0 && codevalue == OP_TYPEPOSPLUS)
887                {
888                active_count--;            /* Remove non-match possibility */
889                next_active_state--;
890                }
891            count++;            count++;
892            ADD_NEW(state_offset, count);            ADD_NEW(state_offset, count);
893            }            }
# Line 802  for (;;) Line 897  for (;;)
897        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
898        case OP_TYPEQUERY:        case OP_TYPEQUERY:
899        case OP_TYPEMINQUERY:        case OP_TYPEMINQUERY:
900          case OP_TYPEPOSQUERY:
901        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
902        if (clen > 0)        if (clen > 0)
903          {          {
904          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
905              (c < 256 &&              (c < 256 &&
906                (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&                (d != OP_ANY ||
907                   (ims & PCRE_DOTALL) != 0 ||
908                   !IS_NEWLINE(ptr)
909                  ) &&
910                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
911            {            {
912              if (codevalue == OP_TYPEPOSQUERY)
913                {
914                active_count--;            /* Remove non-match possibility */
915                next_active_state--;
916                }
917            ADD_NEW(state_offset + 2, 0);            ADD_NEW(state_offset + 2, 0);
918            }            }
919          }          }
# Line 818  for (;;) Line 922  for (;;)
922        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
923        case OP_TYPESTAR:        case OP_TYPESTAR:
924        case OP_TYPEMINSTAR:        case OP_TYPEMINSTAR:
925          case OP_TYPEPOSSTAR:
926        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
927        if (clen > 0)        if (clen > 0)
928          {          {
929          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
930              (c < 256 &&              (c < 256 &&
931                (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&                (d != OP_ANY ||
932                   (ims & PCRE_DOTALL) != 0 ||
933                   !IS_NEWLINE(ptr)
934                  ) &&
935                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
936            {            {
937              if (codevalue == OP_TYPEPOSSTAR)
938                {
939                active_count--;            /* Remove non-match possibility */
940                next_active_state--;
941                }
942            ADD_NEW(state_offset, 0);            ADD_NEW(state_offset, 0);
943            }            }
944          }          }
# Line 833  for (;;) Line 946  for (;;)
946    
947        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
948        case OP_TYPEEXACT:        case OP_TYPEEXACT:
949          count = current_state->count;  /* Number already matched */
950          if (clen > 0)
951            {
952            if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
953                (c < 256 &&
954                  (d != OP_ANY ||
955                   (ims & PCRE_DOTALL) != 0 ||
956                   !IS_NEWLINE(ptr)
957                  ) &&
958                  ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
959              {
960              if (++count >= GET2(code, 1))
961                { ADD_NEW(state_offset + 4, 0); }
962              else
963                { ADD_NEW(state_offset, count); }
964              }
965            }
966          break;
967    
968          /*-----------------------------------------------------------------*/
969        case OP_TYPEUPTO:        case OP_TYPEUPTO:
970        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
971        if (codevalue != OP_TYPEEXACT)        case OP_TYPEPOSUPTO:
972          { ADD_ACTIVE(state_offset + 4, 0); }        ADD_ACTIVE(state_offset + 4, 0);
973        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
974        if (clen > 0)        if (clen > 0)
975          {          {
976          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
977              (c < 256 &&              (c < 256 &&
978                (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&                (d != OP_ANY ||
979                   (ims & PCRE_DOTALL) != 0 ||
980                   !IS_NEWLINE(ptr)
981                  ) &&
982                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
983            {            {
984              if (codevalue == OP_TYPEPOSUPTO)
985                {
986                active_count--;           /* Remove non-match possibility */
987                next_active_state--;
988                }
989            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
990              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 4, 0); }
991            else            else
# Line 855  for (;;) Line 996  for (;;)
996    
997  /* ========================================================================== */  /* ========================================================================== */
998        /* These are virtual opcodes that are used when something like        /* These are virtual opcodes that are used when something like
999        OP_TYPEPLUS has OP_PROP, OP_NOTPROP, or OP_EXTUNI as its argument. It        OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its
1000        keeps the code above fast for the other cases. The argument is in the        argument. It keeps the code above fast for the other cases. The argument
1001        d variable. */        is in the d variable. */
1002    
1003    #ifdef SUPPORT_UCP
1004        case OP_PROP_EXTRA + OP_TYPEPLUS:        case OP_PROP_EXTRA + OP_TYPEPLUS:
1005        case OP_PROP_EXTRA + OP_TYPEMINPLUS:        case OP_PROP_EXTRA + OP_TYPEMINPLUS:
1006          case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
1007        count = current_state->count;           /* Already matched */        count = current_state->count;           /* Already matched */
1008        if (count > 0) { ADD_ACTIVE(state_offset + 3, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
1009        if (clen > 0)        if (clen > 0)
1010          {          {
1011          int category = _pcre_ucp_findchar(c, &chartype, &othercase);          BOOL OK;
1012          int rqdtype = code[2];          int category = _pcre_ucp_findprop(c, &chartype, &script);
1013          if ((d == OP_PROP) ==          switch(code[2])
1014              (rqdtype == ((rqdtype >= 128)? (category + 128) : chartype)))            {
1015            { count++; ADD_NEW(state_offset, count); }            case PT_ANY:
1016              OK = TRUE;
1017              break;
1018    
1019              case PT_LAMP:
1020              OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
1021              break;
1022    
1023              case PT_GC:
1024              OK = category == code[3];
1025              break;
1026    
1027              case PT_PC:
1028              OK = chartype == code[3];
1029              break;
1030    
1031              case PT_SC:
1032              OK = script == code[3];
1033              break;
1034    
1035              /* Should never occur, but keep compilers from grumbling. */
1036    
1037              default:
1038              OK = codevalue != OP_PROP;
1039              break;
1040              }
1041    
1042            if (OK == (d == OP_PROP))
1043              {
1044              if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
1045                {
1046                active_count--;           /* Remove non-match possibility */
1047                next_active_state--;
1048                }
1049              count++;
1050              ADD_NEW(state_offset, count);
1051              }
1052          }          }
1053        break;        break;
1054    
1055        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1056        case OP_EXTUNI_EXTRA + OP_TYPEPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
1057        case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
1058          case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1059        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1060        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1061        if (clen > 0 && _pcre_ucp_findchar(c, &chartype, &othercase) != ucp_M)        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
1062          {          {
1063          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1064          int ncount = 0;          int ncount = 0;
1065            if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
1066              {
1067              active_count--;           /* Remove non-match possibility */
1068              next_active_state--;
1069              }
1070          while (nptr < end_subject)          while (nptr < end_subject)
1071            {            {
1072            int nd;            int nd;
1073            int ndlen = 1;            int ndlen = 1;
1074            GETCHARLEN(nd, nptr, ndlen);            GETCHARLEN(nd, nptr, ndlen);
1075            if (_pcre_ucp_findchar(nd, &chartype, &othercase) != ucp_M) break;            if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
1076            ncount++;            ncount++;
1077            nptr += ndlen;            nptr += ndlen;
1078            }            }
# Line 895  for (;;) Line 1080  for (;;)
1080          ADD_NEW_DATA(-state_offset, count, ncount);          ADD_NEW_DATA(-state_offset, count, ncount);
1081          }          }
1082        break;        break;
1083    #endif
1084    
1085          /*-----------------------------------------------------------------*/
1086          case OP_ANYNL_EXTRA + OP_TYPEPLUS:
1087          case OP_ANYNL_EXTRA + OP_TYPEMINPLUS:
1088          case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS:
1089          count = current_state->count;  /* Already matched */
1090          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1091          if (clen > 0)
1092            {
1093            int ncount = 0;
1094            switch (c)
1095              {
1096              case 0x000b:
1097              case 0x000c:
1098              case 0x0085:
1099              case 0x2028:
1100              case 0x2029:
1101              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1102              goto ANYNL01;
1103    
1104              case 0x000d:
1105              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1106              /* Fall through */
1107    
1108              ANYNL01:
1109              case 0x000a:
1110              if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1111                {
1112                active_count--;           /* Remove non-match possibility */
1113                next_active_state--;
1114                }
1115              count++;
1116              ADD_NEW_DATA(-state_offset, count, ncount);
1117              break;
1118    
1119              default:
1120              break;
1121              }
1122            }
1123          break;
1124    
1125          /*-----------------------------------------------------------------*/
1126          case OP_VSPACE_EXTRA + OP_TYPEPLUS:
1127          case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
1128          case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
1129          count = current_state->count;  /* Already matched */
1130          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1131          if (clen > 0)
1132            {
1133            BOOL OK;
1134            switch (c)
1135              {
1136              case 0x000a:
1137              case 0x000b:
1138              case 0x000c:
1139              case 0x000d:
1140              case 0x0085:
1141              case 0x2028:
1142              case 0x2029:
1143              OK = TRUE;
1144              break;
1145    
1146              default:
1147              OK = FALSE;
1148              break;
1149              }
1150    
1151            if (OK == (d == OP_VSPACE))
1152              {
1153              if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
1154                {
1155                active_count--;           /* Remove non-match possibility */
1156                next_active_state--;
1157                }
1158              count++;
1159              ADD_NEW_DATA(-state_offset, count, 0);
1160              }
1161            }
1162          break;
1163    
1164        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1165          case OP_HSPACE_EXTRA + OP_TYPEPLUS:
1166          case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
1167          case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
1168          count = current_state->count;  /* Already matched */
1169          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1170          if (clen > 0)
1171            {
1172            BOOL OK;
1173            switch (c)
1174              {
1175              case 0x09:      /* HT */
1176              case 0x20:      /* SPACE */
1177              case 0xa0:      /* NBSP */
1178              case 0x1680:    /* OGHAM SPACE MARK */
1179              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1180              case 0x2000:    /* EN QUAD */
1181              case 0x2001:    /* EM QUAD */
1182              case 0x2002:    /* EN SPACE */
1183              case 0x2003:    /* EM SPACE */
1184              case 0x2004:    /* THREE-PER-EM SPACE */
1185              case 0x2005:    /* FOUR-PER-EM SPACE */
1186              case 0x2006:    /* SIX-PER-EM SPACE */
1187              case 0x2007:    /* FIGURE SPACE */
1188              case 0x2008:    /* PUNCTUATION SPACE */
1189              case 0x2009:    /* THIN SPACE */
1190              case 0x200A:    /* HAIR SPACE */
1191              case 0x202f:    /* NARROW NO-BREAK SPACE */
1192              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1193              case 0x3000:    /* IDEOGRAPHIC SPACE */
1194              OK = TRUE;
1195              break;
1196    
1197              default:
1198              OK = FALSE;
1199              break;
1200              }
1201    
1202            if (OK == (d == OP_HSPACE))
1203              {
1204              if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
1205                {
1206                active_count--;           /* Remove non-match possibility */
1207                next_active_state--;
1208                }
1209              count++;
1210              ADD_NEW_DATA(-state_offset, count, 0);
1211              }
1212            }
1213          break;
1214    
1215          /*-----------------------------------------------------------------*/
1216    #ifdef SUPPORT_UCP
1217        case OP_PROP_EXTRA + OP_TYPEQUERY:        case OP_PROP_EXTRA + OP_TYPEQUERY:
1218        case OP_PROP_EXTRA + OP_TYPEMINQUERY:        case OP_PROP_EXTRA + OP_TYPEMINQUERY:
1219        count = 3;        case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
1220          count = 4;
1221        goto QS1;        goto QS1;
1222    
1223        case OP_PROP_EXTRA + OP_TYPESTAR:        case OP_PROP_EXTRA + OP_TYPESTAR:
1224        case OP_PROP_EXTRA + OP_TYPEMINSTAR:        case OP_PROP_EXTRA + OP_TYPEMINSTAR:
1225          case OP_PROP_EXTRA + OP_TYPEPOSSTAR:
1226        count = 0;        count = 0;
1227    
1228        QS1:        QS1:
1229    
1230        ADD_ACTIVE(state_offset + 3, 0);        ADD_ACTIVE(state_offset + 4, 0);
1231        if (clen > 0)        if (clen > 0)
1232          {          {
1233          int category = _pcre_ucp_findchar(c, &chartype, &othercase);          BOOL OK;
1234          int rqdtype = code[2];          int category = _pcre_ucp_findprop(c, &chartype, &script);
1235          if ((d == OP_PROP) ==          switch(code[2])
1236              (rqdtype == ((rqdtype >= 128)? (category + 128) : chartype)))            {
1237            { ADD_NEW(state_offset + count, 0); }            case PT_ANY:
1238              OK = TRUE;
1239              break;
1240    
1241              case PT_LAMP:
1242              OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
1243              break;
1244    
1245              case PT_GC:
1246              OK = category == code[3];
1247              break;
1248    
1249              case PT_PC:
1250              OK = chartype == code[3];
1251              break;
1252    
1253              case PT_SC:
1254              OK = script == code[3];
1255              break;
1256    
1257              /* Should never occur, but keep compilers from grumbling. */
1258    
1259              default:
1260              OK = codevalue != OP_PROP;
1261              break;
1262              }
1263    
1264            if (OK == (d == OP_PROP))
1265              {
1266              if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR ||
1267                  codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
1268                {
1269                active_count--;           /* Remove non-match possibility */
1270                next_active_state--;
1271                }
1272              ADD_NEW(state_offset + count, 0);
1273              }
1274          }          }
1275        break;        break;
1276    
1277        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1278        case OP_EXTUNI_EXTRA + OP_TYPEQUERY:        case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
1279        case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:        case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
1280          case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY:
1281        count = 2;        count = 2;
1282        goto QS2;        goto QS2;
1283    
1284        case OP_EXTUNI_EXTRA + OP_TYPESTAR:        case OP_EXTUNI_EXTRA + OP_TYPESTAR:
1285        case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:        case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
1286          case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR:
1287        count = 0;        count = 0;
1288    
1289        QS2:        QS2:
1290    
1291        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1292        if (clen > 0 && _pcre_ucp_findchar(c, &chartype, &othercase) != ucp_M)        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
1293          {          {
1294          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1295          int ncount = 0;          int ncount = 0;
1296            if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
1297                codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
1298              {
1299              active_count--;           /* Remove non-match possibility */
1300              next_active_state--;
1301              }
1302          while (nptr < end_subject)          while (nptr < end_subject)
1303            {            {
1304            int nd;            int nd;
1305            int ndlen = 1;            int ndlen = 1;
1306            GETCHARLEN(nd, nptr, ndlen);            GETCHARLEN(nd, nptr, ndlen);
1307            if (_pcre_ucp_findchar(nd, &chartype, &othercase) != ucp_M) break;            if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
1308            ncount++;            ncount++;
1309            nptr += ndlen;            nptr += ndlen;
1310            }            }
1311          ADD_NEW_DATA(-(state_offset + count), 0, ncount);          ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1312          }          }
1313        break;        break;
1314    #endif
1315    
1316        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1317          case OP_ANYNL_EXTRA + OP_TYPEQUERY:
1318          case OP_ANYNL_EXTRA + OP_TYPEMINQUERY:
1319          case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY:
1320          count = 2;
1321          goto QS3;
1322    
1323          case OP_ANYNL_EXTRA + OP_TYPESTAR:
1324          case OP_ANYNL_EXTRA + OP_TYPEMINSTAR:
1325          case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR:
1326          count = 0;
1327    
1328          QS3:
1329          ADD_ACTIVE(state_offset + 2, 0);
1330          if (clen > 0)
1331            {
1332            int ncount = 0;
1333            switch (c)
1334              {
1335              case 0x000b:
1336              case 0x000c:
1337              case 0x0085:
1338              case 0x2028:
1339              case 0x2029:
1340              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1341              goto ANYNL02;
1342    
1343              case 0x000d:
1344              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1345              /* Fall through */
1346    
1347              ANYNL02:
1348              case 0x000a:
1349              if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1350                  codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1351                {
1352                active_count--;           /* Remove non-match possibility */
1353                next_active_state--;
1354                }
1355              ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1356              break;
1357    
1358              default:
1359              break;
1360              }
1361            }
1362          break;
1363    
1364          /*-----------------------------------------------------------------*/
1365          case OP_VSPACE_EXTRA + OP_TYPEQUERY:
1366          case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
1367          case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
1368          count = 2;
1369          goto QS4;
1370    
1371          case OP_VSPACE_EXTRA + OP_TYPESTAR:
1372          case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
1373          case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
1374          count = 0;
1375    
1376          QS4:
1377          ADD_ACTIVE(state_offset + 2, 0);
1378          if (clen > 0)
1379            {
1380            BOOL OK;
1381            switch (c)
1382              {
1383              case 0x000a:
1384              case 0x000b:
1385              case 0x000c:
1386              case 0x000d:
1387              case 0x0085:
1388              case 0x2028:
1389              case 0x2029:
1390              OK = TRUE;
1391              break;
1392    
1393              default:
1394              OK = FALSE;
1395              break;
1396              }
1397            if (OK == (d == OP_VSPACE))
1398              {
1399              if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
1400                  codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
1401                {
1402                active_count--;           /* Remove non-match possibility */
1403                next_active_state--;
1404                }
1405              ADD_NEW_DATA(-(state_offset + count), 0, 0);
1406              }
1407            }
1408          break;
1409    
1410          /*-----------------------------------------------------------------*/
1411          case OP_HSPACE_EXTRA + OP_TYPEQUERY:
1412          case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
1413          case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
1414          count = 2;
1415          goto QS5;
1416    
1417          case OP_HSPACE_EXTRA + OP_TYPESTAR:
1418          case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
1419          case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
1420          count = 0;
1421    
1422          QS5:
1423          ADD_ACTIVE(state_offset + 2, 0);
1424          if (clen > 0)
1425            {
1426            BOOL OK;
1427            switch (c)
1428              {
1429              case 0x09:      /* HT */
1430              case 0x20:      /* SPACE */
1431              case 0xa0:      /* NBSP */
1432              case 0x1680:    /* OGHAM SPACE MARK */
1433              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1434              case 0x2000:    /* EN QUAD */
1435              case 0x2001:    /* EM QUAD */
1436              case 0x2002:    /* EN SPACE */
1437              case 0x2003:    /* EM SPACE */
1438              case 0x2004:    /* THREE-PER-EM SPACE */
1439              case 0x2005:    /* FOUR-PER-EM SPACE */
1440              case 0x2006:    /* SIX-PER-EM SPACE */
1441              case 0x2007:    /* FIGURE SPACE */
1442              case 0x2008:    /* PUNCTUATION SPACE */
1443              case 0x2009:    /* THIN SPACE */
1444              case 0x200A:    /* HAIR SPACE */
1445              case 0x202f:    /* NARROW NO-BREAK SPACE */
1446              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1447              case 0x3000:    /* IDEOGRAPHIC SPACE */
1448              OK = TRUE;
1449              break;
1450    
1451              default:
1452              OK = FALSE;
1453              break;
1454              }
1455    
1456            if (OK == (d == OP_HSPACE))
1457              {
1458              if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
1459                  codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
1460                {
1461                active_count--;           /* Remove non-match possibility */
1462                next_active_state--;
1463                }
1464              ADD_NEW_DATA(-(state_offset + count), 0, 0);
1465              }
1466            }
1467          break;
1468    
1469          /*-----------------------------------------------------------------*/
1470    #ifdef SUPPORT_UCP
1471        case OP_PROP_EXTRA + OP_TYPEEXACT:        case OP_PROP_EXTRA + OP_TYPEEXACT:
1472        case OP_PROP_EXTRA + OP_TYPEUPTO:        case OP_PROP_EXTRA + OP_TYPEUPTO:
1473        case OP_PROP_EXTRA + OP_TYPEMINUPTO:        case OP_PROP_EXTRA + OP_TYPEMINUPTO:
1474          case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
1475        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1476          { ADD_ACTIVE(state_offset + 5, 0); }          { ADD_ACTIVE(state_offset + 6, 0); }
1477        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1478        if (clen > 0)        if (clen > 0)
1479          {          {
1480          int category = _pcre_ucp_findchar(c, &chartype, &othercase);          BOOL OK;
1481          int rqdtype = code[4];          int category = _pcre_ucp_findprop(c, &chartype, &script);
1482          if ((d == OP_PROP) ==          switch(code[4])
             (rqdtype == ((rqdtype >= 128)? (category + 128) : chartype)))  
1483            {            {
1484              case PT_ANY:
1485              OK = TRUE;
1486              break;
1487    
1488              case PT_LAMP:
1489              OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
1490              break;
1491    
1492              case PT_GC:
1493              OK = category == code[5];
1494              break;
1495    
1496              case PT_PC:
1497              OK = chartype == code[5];
1498              break;
1499    
1500              case PT_SC:
1501              OK = script == code[5];
1502              break;
1503    
1504              /* Should never occur, but keep compilers from grumbling. */
1505    
1506              default:
1507              OK = codevalue != OP_PROP;
1508              break;
1509              }
1510    
1511            if (OK == (d == OP_PROP))
1512              {
1513              if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
1514                {
1515                active_count--;           /* Remove non-match possibility */
1516                next_active_state--;
1517                }
1518            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1519              { ADD_NEW(state_offset + 5, 0); }              { ADD_NEW(state_offset + 6, 0); }
1520            else            else
1521              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
1522            }            }
# Line 975  for (;;) Line 1527  for (;;)
1527        case OP_EXTUNI_EXTRA + OP_TYPEEXACT:        case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
1528        case OP_EXTUNI_EXTRA + OP_TYPEUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
1529        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
1530          case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
1531        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1532          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 4, 0); }
1533        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1534        if (clen > 0 && _pcre_ucp_findchar(c, &chartype, &othercase) != ucp_M)        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
1535          {          {
1536          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1537          int ncount = 0;          int ncount = 0;
1538            if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
1539              {
1540              active_count--;           /* Remove non-match possibility */
1541              next_active_state--;
1542              }
1543          while (nptr < end_subject)          while (nptr < end_subject)
1544            {            {
1545            int nd;            int nd;
1546            int ndlen = 1;            int ndlen = 1;
1547            GETCHARLEN(nd, nptr, ndlen);            GETCHARLEN(nd, nptr, ndlen);
1548            if (_pcre_ucp_findchar(nd, &chartype, &othercase) != ucp_M) break;            if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
1549            ncount++;            ncount++;
1550            nptr += ndlen;            nptr += ndlen;
1551            }            }
# Line 997  for (;;) Line 1555  for (;;)
1555            { ADD_NEW_DATA(-state_offset, count, ncount); }            { ADD_NEW_DATA(-state_offset, count, ncount); }
1556          }          }
1557        break;        break;
1558    #endif
1559    
1560          /*-----------------------------------------------------------------*/
1561          case OP_ANYNL_EXTRA + OP_TYPEEXACT:
1562          case OP_ANYNL_EXTRA + OP_TYPEUPTO:
1563          case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
1564          case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
1565          if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
1566            { ADD_ACTIVE(state_offset + 4, 0); }
1567          count = current_state->count;  /* Number already matched */
1568          if (clen > 0)
1569            {
1570            int ncount = 0;
1571            switch (c)
1572              {
1573              case 0x000b:
1574              case 0x000c:
1575              case 0x0085:
1576              case 0x2028:
1577              case 0x2029:
1578              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1579              goto ANYNL03;
1580    
1581              case 0x000d:
1582              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1583              /* Fall through */
1584    
1585              ANYNL03:
1586              case 0x000a:
1587              if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
1588                {
1589                active_count--;           /* Remove non-match possibility */
1590                next_active_state--;
1591                }
1592              if (++count >= GET2(code, 1))
1593                { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }
1594              else
1595                { ADD_NEW_DATA(-state_offset, count, ncount); }
1596              break;
1597    
1598              default:
1599              break;
1600              }
1601            }
1602          break;
1603    
1604          /*-----------------------------------------------------------------*/
1605          case OP_VSPACE_EXTRA + OP_TYPEEXACT:
1606          case OP_VSPACE_EXTRA + OP_TYPEUPTO:
1607          case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
1608          case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
1609          if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
1610            { ADD_ACTIVE(state_offset + 4, 0); }
1611          count = current_state->count;  /* Number already matched */
1612          if (clen > 0)
1613            {
1614            BOOL OK;
1615            switch (c)
1616              {
1617              case 0x000a:
1618              case 0x000b:
1619              case 0x000c:
1620              case 0x000d:
1621              case 0x0085:
1622              case 0x2028:
1623              case 0x2029:
1624              OK = TRUE;
1625              break;
1626    
1627              default:
1628              OK = FALSE;
1629              }
1630    
1631            if (OK == (d == OP_VSPACE))
1632              {
1633              if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
1634                {
1635                active_count--;           /* Remove non-match possibility */
1636                next_active_state--;
1637                }
1638              if (++count >= GET2(code, 1))
1639                { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
1640              else
1641                { ADD_NEW_DATA(-state_offset, count, 0); }
1642              }
1643            }
1644          break;
1645    
1646          /*-----------------------------------------------------------------*/
1647          case OP_HSPACE_EXTRA + OP_TYPEEXACT:
1648          case OP_HSPACE_EXTRA + OP_TYPEUPTO:
1649          case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
1650          case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
1651          if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
1652            { ADD_ACTIVE(state_offset + 4, 0); }
1653          count = current_state->count;  /* Number already matched */
1654          if (clen > 0)
1655            {
1656            BOOL OK;
1657            switch (c)
1658              {
1659              case 0x09:      /* HT */
1660              case 0x20:      /* SPACE */
1661              case 0xa0:      /* NBSP */
1662              case 0x1680:    /* OGHAM SPACE MARK */
1663              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1664              case 0x2000:    /* EN QUAD */
1665              case 0x2001:    /* EM QUAD */
1666              case 0x2002:    /* EN SPACE */
1667              case 0x2003:    /* EM SPACE */
1668              case 0x2004:    /* THREE-PER-EM SPACE */
1669              case 0x2005:    /* FOUR-PER-EM SPACE */
1670              case 0x2006:    /* SIX-PER-EM SPACE */
1671              case 0x2007:    /* FIGURE SPACE */
1672              case 0x2008:    /* PUNCTUATION SPACE */
1673              case 0x2009:    /* THIN SPACE */
1674              case 0x200A:    /* HAIR SPACE */
1675              case 0x202f:    /* NARROW NO-BREAK SPACE */
1676              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1677              case 0x3000:    /* IDEOGRAPHIC SPACE */
1678              OK = TRUE;
1679              break;
1680    
1681              default:
1682              OK = FALSE;
1683              break;
1684              }
1685    
1686            if (OK == (d == OP_HSPACE))
1687              {
1688              if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
1689                {
1690                active_count--;           /* Remove non-match possibility */
1691                next_active_state--;
1692                }
1693              if (++count >= GET2(code, 1))
1694                { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
1695              else
1696                { ADD_NEW_DATA(-state_offset, count, 0); }
1697              }
1698            }
1699          break;
1700    
1701  /* ========================================================================== */  /* ========================================================================== */
1702        /* These opcodes are followed by a character that is usually compared        /* These opcodes are followed by a character that is usually compared
# Line 1018  for (;;) Line 1718  for (;;)
1718          {          {
1719          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
1720            {            {
1721              unsigned int othercase;
1722            if (c < 128) othercase = fcc[c]; else            if (c < 128) othercase = fcc[c]; else
1723    
1724            /* If we have Unicode property support, we can use it to test the            /* If we have Unicode property support, we can use it to test the
1725            other case of the character, if there is one. The result of            other case of the character. */
           _pcre_ucp_findchar() is < 0 if the char isn't found, and othercase is  
           returned as zero if there isn't another case. */  
1726    
1727  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1728            if (_pcre_ucp_findchar(c, &chartype, &othercase) < 0)            othercase = _pcre_ucp_othercase(c);
1729    #else
1730              othercase = NOTACHAR;
1731  #endif  #endif
             othercase = -1;  
1732    
1733            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
1734            }            }
# Line 1050  for (;;) Line 1750  for (;;)
1750        to wait for them to pass before continuing. */        to wait for them to pass before continuing. */
1751    
1752        case OP_EXTUNI:        case OP_EXTUNI:
1753        if (clen > 0 && _pcre_ucp_findchar(c, &chartype, &othercase) != ucp_M)        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
1754          {          {
1755          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1756          int ncount = 0;          int ncount = 0;
# Line 1058  for (;;) Line 1758  for (;;)
1758            {            {
1759            int nclen = 1;            int nclen = 1;
1760            GETCHARLEN(c, nptr, nclen);            GETCHARLEN(c, nptr, nclen);
1761            if (_pcre_ucp_findchar(c, &chartype, &othercase) != ucp_M) break;            if (_pcre_ucp_findprop(c, &chartype, &script) != ucp_M) break;
1762            ncount++;            ncount++;
1763            nptr += nclen;            nptr += nclen;
1764            }            }
# Line 1068  for (;;) Line 1768  for (;;)
1768  #endif  #endif
1769    
1770        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1771          /* This is a tricky like EXTUNI because it too can match more than one
1772          character (when CR is followed by LF). In this case, set up a negative
1773          state to wait for one character to pass before continuing. */
1774    
1775          case OP_ANYNL:
1776          if (clen > 0) switch(c)
1777            {
1778            case 0x000b:
1779            case 0x000c:
1780            case 0x0085:
1781            case 0x2028:
1782            case 0x2029:
1783            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1784    
1785            case 0x000a:
1786            ADD_NEW(state_offset + 1, 0);
1787            break;
1788    
1789            case 0x000d:
1790            if (ptr + 1 < end_subject && ptr[1] == 0x0a)
1791              {
1792              ADD_NEW_DATA(-(state_offset + 1), 0, 1);
1793              }
1794            else
1795              {
1796              ADD_NEW(state_offset + 1, 0);
1797              }
1798            break;
1799            }
1800          break;
1801    
1802          /*-----------------------------------------------------------------*/
1803          case OP_NOT_VSPACE:
1804          if (clen > 0) switch(c)
1805            {
1806            case 0x000a:
1807            case 0x000b:
1808            case 0x000c:
1809            case 0x000d:
1810            case 0x0085:
1811            case 0x2028:
1812            case 0x2029:
1813            break;
1814    
1815            default:
1816            ADD_NEW(state_offset + 1, 0);
1817            break;
1818            }
1819          break;
1820    
1821          /*-----------------------------------------------------------------*/
1822          case OP_VSPACE:
1823          if (clen > 0) switch(c)
1824            {
1825            case 0x000a:
1826            case 0x000b:
1827            case 0x000c:
1828            case 0x000d:
1829            case 0x0085:
1830            case 0x2028:
1831            case 0x2029:
1832            ADD_NEW(state_offset + 1, 0);
1833            break;
1834    
1835            default: break;
1836            }
1837          break;
1838    
1839          /*-----------------------------------------------------------------*/
1840          case OP_NOT_HSPACE:
1841          if (clen > 0) switch(c)
1842            {
1843            case 0x09:      /* HT */
1844            case 0x20:      /* SPACE */
1845            case 0xa0:      /* NBSP */
1846            case 0x1680:    /* OGHAM SPACE MARK */
1847            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1848            case 0x2000:    /* EN QUAD */
1849            case 0x2001:    /* EM QUAD */
1850            case 0x2002:    /* EN SPACE */
1851            case 0x2003:    /* EM SPACE */
1852            case 0x2004:    /* THREE-PER-EM SPACE */
1853            case 0x2005:    /* FOUR-PER-EM SPACE */
1854            case 0x2006:    /* SIX-PER-EM SPACE */
1855            case 0x2007:    /* FIGURE SPACE */
1856            case 0x2008:    /* PUNCTUATION SPACE */
1857            case 0x2009:    /* THIN SPACE */
1858            case 0x200A:    /* HAIR SPACE */
1859            case 0x202f:    /* NARROW NO-BREAK SPACE */
1860            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1861            case 0x3000:    /* IDEOGRAPHIC SPACE */
1862            break;
1863    
1864            default:
1865            ADD_NEW(state_offset + 1, 0);
1866            break;
1867            }
1868          break;
1869    
1870          /*-----------------------------------------------------------------*/
1871          case OP_HSPACE:
1872          if (clen > 0) switch(c)
1873            {
1874            case 0x09:      /* HT */
1875            case 0x20:      /* SPACE */
1876            case 0xa0:      /* NBSP */
1877            case 0x1680:    /* OGHAM SPACE MARK */
1878            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1879            case 0x2000:    /* EN QUAD */
1880            case 0x2001:    /* EM QUAD */
1881            case 0x2002:    /* EN SPACE */
1882            case 0x2003:    /* EM SPACE */
1883            case 0x2004:    /* THREE-PER-EM SPACE */
1884            case 0x2005:    /* FOUR-PER-EM SPACE */
1885            case 0x2006:    /* SIX-PER-EM SPACE */
1886            case 0x2007:    /* FIGURE SPACE */
1887            case 0x2008:    /* PUNCTUATION SPACE */
1888            case 0x2009:    /* THIN SPACE */
1889            case 0x200A:    /* HAIR SPACE */
1890            case 0x202f:    /* NARROW NO-BREAK SPACE */
1891            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1892            case 0x3000:    /* IDEOGRAPHIC SPACE */
1893            ADD_NEW(state_offset + 1, 0);
1894            break;
1895            }
1896          break;
1897    
1898          /*-----------------------------------------------------------------*/
1899        /* Match a negated single character. This is only used for one-byte        /* Match a negated single character. This is only used for one-byte
1900        characters, that is, we know that d < 256. The character we are        characters, that is, we know that d < 256. The character we are
1901        checking (c) can be multibyte. */        checking (c) can be multibyte. */
# Line 1075  for (;;) Line 1903  for (;;)
1903        case OP_NOT:        case OP_NOT:
1904        if (clen > 0)        if (clen > 0)
1905          {          {
1906          int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;          unsigned int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;
1907          if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }          if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }
1908          }          }
1909        break;        break;
# Line 1083  for (;;) Line 1911  for (;;)
1911        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1912        case OP_PLUS:        case OP_PLUS:
1913        case OP_MINPLUS:        case OP_MINPLUS:
1914          case OP_POSPLUS:
1915        case OP_NOTPLUS:        case OP_NOTPLUS:
1916        case OP_NOTMINPLUS:        case OP_NOTMINPLUS:
1917          case OP_NOTPOSPLUS:
1918        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1919        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
1920        if (clen > 0)        if (clen > 0)
1921          {          {
1922          int otherd = -1;          unsigned int otherd = NOTACHAR;
1923          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1924            {            {
1925  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1926            if (utf8 && c >= 128)            if (utf8 && d >= 128)
1927              {              {
1928  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1929              if (_pcre_ucp_findchar(d, &chartype, &otherd) < 0) otherd = -1;              otherd = _pcre_ucp_othercase(d);
1930  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
1931              }              }
1932            else            else
# Line 1104  for (;;) Line 1934  for (;;)
1934            otherd = fcc[d];            otherd = fcc[d];
1935            }            }
1936          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1937            { count++; ADD_NEW(state_offset, count); }            {
1938              if (count > 0 &&
1939                  (codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS))
1940                {
1941                active_count--;             /* Remove non-match possibility */
1942                next_active_state--;
1943                }
1944              count++;
1945              ADD_NEW(state_offset, count);
1946              }
1947          }          }
1948        break;        break;
1949    
1950        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1951        case OP_QUERY:        case OP_QUERY:
1952        case OP_MINQUERY:        case OP_MINQUERY:
1953          case OP_POSQUERY:
1954        case OP_NOTQUERY:        case OP_NOTQUERY:
1955        case OP_NOTMINQUERY:        case OP_NOTMINQUERY:
1956          case OP_NOTPOSQUERY:
1957        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
1958        if (clen > 0)        if (clen > 0)
1959          {          {
1960          int otherd = -1;          unsigned int otherd = NOTACHAR;
1961          if ((ims && PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1962            {            {
1963  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1964            if (utf8 && c >= 128)            if (utf8 && d >= 128)
1965              {              {
1966  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1967              if (_pcre_ucp_findchar(c, &chartype, &otherd) < 0) otherd = -1;              otherd = _pcre_ucp_othercase(d);
1968  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
1969              }              }
1970            else            else
# Line 1131  for (;;) Line 1972  for (;;)
1972            otherd = fcc[d];            otherd = fcc[d];
1973            }            }
1974          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1975            { ADD_NEW(state_offset + dlen + 1, 0); }            {
1976              if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY)
1977                {
1978                active_count--;            /* Remove non-match possibility */
1979                next_active_state--;
1980                }
1981              ADD_NEW(state_offset + dlen + 1, 0);
1982              }
1983          }          }
1984        break;        break;
1985    
1986        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1987        case OP_STAR:        case OP_STAR:
1988        case OP_MINSTAR:        case OP_MINSTAR:
1989          case OP_POSSTAR:
1990        case OP_NOTSTAR:        case OP_NOTSTAR:
1991        case OP_NOTMINSTAR:        case OP_NOTMINSTAR:
1992          case OP_NOTPOSSTAR:
1993        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
1994        if (clen > 0)        if (clen > 0)
1995          {          {
1996          int otherd = -1;          unsigned int otherd = NOTACHAR;
1997          if ((ims && PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1998            {            {
1999  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2000            if (utf8 && c >= 128)            if (utf8 && d >= 128)
2001              {              {
2002  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2003              if (_pcre_ucp_findchar(c, &chartype, &otherd) < 0) otherd = -1;              otherd = _pcre_ucp_othercase(d);
2004  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2005              }              }
2006            else            else
# Line 1158  for (;;) Line 2008  for (;;)
2008            otherd = fcc[d];            otherd = fcc[d];
2009            }            }
2010          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2011            { ADD_NEW(state_offset, 0); }            {
2012              if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR)
2013                {
2014                active_count--;            /* Remove non-match possibility */
2015                next_active_state--;
2016                }
2017              ADD_NEW(state_offset, 0);
2018              }
2019          }          }
2020        break;        break;
2021    
2022        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2023        case OP_EXACT:        case OP_EXACT:
2024          case OP_NOTEXACT:
2025          count = current_state->count;  /* Number already matched */
2026          if (clen > 0)
2027            {
2028            unsigned int otherd = NOTACHAR;
2029            if ((ims & PCRE_CASELESS) != 0)
2030              {
2031    #ifdef SUPPORT_UTF8
2032              if (utf8 && d >= 128)
2033                {
2034    #ifdef SUPPORT_UCP
2035                otherd = _pcre_ucp_othercase(d);
2036    #endif  /* SUPPORT_UCP */
2037                }
2038              else
2039    #endif  /* SUPPORT_UTF8 */
2040              otherd = fcc[d];
2041              }
2042            if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2043              {
2044              if (++count >= GET2(code, 1))
2045                { ADD_NEW(state_offset + dlen + 3, 0); }
2046              else
2047                { ADD_NEW(state_offset, count); }
2048              }
2049            }
2050          break;
2051    
2052          /*-----------------------------------------------------------------*/
2053        case OP_UPTO:        case OP_UPTO:
2054        case OP_MINUPTO:        case OP_MINUPTO:
2055        case OP_NOTEXACT:        case OP_POSUPTO:
2056        case OP_NOTUPTO:        case OP_NOTUPTO:
2057        case OP_NOTMINUPTO:        case OP_NOTMINUPTO:
2058        if (codevalue != OP_EXACT && codevalue != OP_NOTEXACT)        case OP_NOTPOSUPTO:
2059          { ADD_ACTIVE(state_offset + dlen + 3, 0); }        ADD_ACTIVE(state_offset + dlen + 3, 0);
2060        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2061        if (clen > 0)        if (clen > 0)
2062          {          {
2063          int otherd = -1;          unsigned int otherd = NOTACHAR;
2064          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
2065            {            {
2066  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
2067            if (utf8 && c >= 128)            if (utf8 && d >= 128)
2068              {              {
2069  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2070              if (_pcre_ucp_findchar(d, &chartype, &otherd) < 0) otherd = -1;              otherd = _pcre_ucp_othercase(d);
2071  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2072              }              }
2073            else            else
# Line 1190  for (;;) Line 2076  for (;;)
2076            }            }
2077          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2078            {            {
2079              if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO)
2080                {
2081                active_count--;             /* Remove non-match possibility */
2082                next_active_state--;
2083                }
2084            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2085              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 3, 0); }
2086            else            else
# Line 1267  for (;;) Line 2158  for (;;)
2158              { ADD_ACTIVE(next_state_offset + 5, 0); }              { ADD_ACTIVE(next_state_offset + 5, 0); }
2159            if (isinclass)            if (isinclass)
2160              {              {
2161              if (++count >= GET2(ecode, 3))              int max = GET2(ecode, 3);
2162                if (++count >= max && max != 0)   /* Max 0 => no limit */
2163                { ADD_NEW(next_state_offset + 5, 0); }                { ADD_NEW(next_state_offset + 5, 0); }
2164              else              else
2165                { ADD_NEW(state_offset, count); }                { ADD_NEW(state_offset, count); }
# Line 1283  for (;;) Line 2175  for (;;)
2175    
2176  /* ========================================================================== */  /* ========================================================================== */
2177        /* These are the opcodes for fancy brackets of various kinds. We have        /* These are the opcodes for fancy brackets of various kinds. We have
2178        to use recursion in order to handle them. */        to use recursion in order to handle them. The "always failing" assersion
2179          (?!) is optimised when compiling to OP_FAIL, so we have to support that,
2180          though the other "backtracking verbs" are not supported. */
2181    
2182          case OP_FAIL:
2183          break;
2184    
2185        case OP_ASSERT:        case OP_ASSERT:
2186        case OP_ASSERT_NOT:        case OP_ASSERT_NOT:
# Line 1317  for (;;) Line 2214  for (;;)
2214    
2215        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2216        case OP_COND:        case OP_COND:
2217          case OP_SCOND:
2218          {          {
2219          int local_offsets[1000];          int local_offsets[1000];
2220          int local_workspace[1000];          int local_workspace[1000];
2221          int condcode = code[LINK_SIZE+1];          int condcode = code[LINK_SIZE+1];
2222    
2223          /* The only supported version of OP_CREF is for the value 0xffff, which          /* Back reference conditions are not supported */
2224          means "test if in a recursion". */  
2225            if (condcode == OP_CREF) return PCRE_ERROR_DFA_UCOND;
2226    
2227          if (condcode == OP_CREF)          /* The DEFINE condition is always false */
2228    
2229            if (condcode == OP_DEF)
2230              {
2231              ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0);
2232              }
2233    
2234            /* The only supported version of OP_RREF is for the value RREF_ANY,
2235            which means "test if in any recursion". We can't test for specifically
2236            recursed groups. */
2237    
2238            else if (condcode == OP_RREF)
2239            {            {
2240            int value = GET2(code, LINK_SIZE+2);            int value = GET2(code, LINK_SIZE+2);
2241            if (value != 0xffff) return PCRE_ERROR_DFA_UCOND;            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
2242            if (recursing > 0) { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }            if (recursing > 0) { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }
2243              else { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }              else { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }
2244            }            }
# Line 1519  for (;;) Line 2429  for (;;)
2429          cb.version          = 1;   /* Version 1 of the callout block */          cb.version          = 1;   /* Version 1 of the callout block */
2430          cb.callout_number   = code[1];          cb.callout_number   = code[1];
2431          cb.offset_vector    = offsets;          cb.offset_vector    = offsets;
2432          cb.subject          = (char *)start_subject;          cb.subject          = (PCRE_SPTR)start_subject;
2433          cb.subject_length   = end_subject - start_subject;          cb.subject_length   = end_subject - start_subject;
2434          cb.start_match      = current_subject - start_subject;          cb.start_match      = current_subject - start_subject;
2435          cb.current_position = ptr - start_subject;          cb.current_position = ptr - start_subject;
# Line 1567  for (;;) Line 2477  for (;;)
2477      DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"      DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
2478        "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,        "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
2479        rlevel*2-2, SP));        rlevel*2-2, SP));
2480      return match_count;      break;        /* In effect, "return", but see the comment below */
2481      }      }
2482    
2483    /* One or more states are active for the next character. */    /* One or more states are active for the next character. */
# Line 1575  for (;;) Line 2485  for (;;)
2485    ptr += clen;    /* Advance to next subject character */    ptr += clen;    /* Advance to next subject character */
2486    }               /* Loop to move along the subject string */    }               /* Loop to move along the subject string */
2487    
2488  /* Control never gets here, but we must keep the compiler happy. */  /* Control gets here from "break" a few lines above. We do it this way because
2489    if we use "return" above, we have compiler trouble. Some compilers warn if
2490    there's nothing here because they think the function doesn't return a value. On
2491    the other hand, if we put a dummy statement here, some more clever compilers
2492    complain that it can't be reached. Sigh. */
2493    
2494  DPRINTF(("%.*s+++ Unexpected end of internal_dfa_exec %d +++\n"  return match_count;
   "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, rlevel*2-2, SP));  
 return PCRE_ERROR_NOMATCH;  
2495  }  }
2496    
2497    
# Line 1595  is not anchored. Line 2507  is not anchored.
2507    
2508  Arguments:  Arguments:
2509    argument_re     points to the compiled expression    argument_re     points to the compiled expression
2510    extra_data      points to extra data or is NULL (not currently used)    extra_data      points to extra data or is NULL
2511    subject         points to the subject string    subject         points to the subject string
2512    length          length of subject string (may contain binary zeros)    length          length of subject string (may contain binary zeros)
2513    start_offset    where to start in the subject string    start_offset    where to start in the subject string
# Line 1611  Returns:          > 0 => number of match Line 2523  Returns:          > 0 => number of match
2523                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
2524  */  */
2525    
2526  PCRE_EXPORT int  PCRE_EXP_DEFN int
2527  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
2528    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
2529    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
2530  {  {
2531  real_pcre *re = (real_pcre *)argument_re;  real_pcre *re = (real_pcre *)argument_re;
2532  dfa_match_data match_block;  dfa_match_data match_block;
2533    dfa_match_data *md = &match_block;
2534  BOOL utf8, anchored, startline, firstline;  BOOL utf8, anchored, startline, firstline;
2535  const uschar *current_subject, *end_subject, *lcc;  const uschar *current_subject, *end_subject, *lcc;
2536    
# Line 1632  BOOL req_byte_caseless = FALSE; Line 2545  BOOL req_byte_caseless = FALSE;
2545  int first_byte = -1;  int first_byte = -1;
2546  int req_byte = -1;  int req_byte = -1;
2547  int req_byte2 = -1;  int req_byte2 = -1;
2548    int newline;
2549    
2550  /* Plausibility checks */  /* Plausibility checks */
2551    
# Line 1646  flipping, so we scan the extra_data bloc Line 2560  flipping, so we scan the extra_data bloc
2560  match block, so we must initialize them beforehand. However, the other fields  match block, so we must initialize them beforehand. However, the other fields
2561  in the match block must not be set until after the byte flipping. */  in the match block must not be set until after the byte flipping. */
2562    
2563  match_block.tables = re->tables;  md->tables = re->tables;
2564  match_block.callout_data = NULL;  md->callout_data = NULL;
2565    
2566  if (extra_data != NULL)  if (extra_data != NULL)
2567    {    {
# Line 1655  if (extra_data != NULL) Line 2569  if (extra_data != NULL)
2569    if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)    if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
2570      study = (const pcre_study_data *)extra_data->study_data;      study = (const pcre_study_data *)extra_data->study_data;
2571    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;
2572      if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
2573        return PCRE_ERROR_DFA_UMLIMIT;
2574    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
2575      match_block.callout_data = extra_data->callout_data;      md->callout_data = extra_data->callout_data;
2576    if ((flags & PCRE_EXTRA_TABLES) != 0)    if ((flags & PCRE_EXTRA_TABLES) != 0)
2577      match_block.tables = extra_data->tables;      md->tables = extra_data->tables;
2578    }    }
2579    
2580  /* Check that the first field in the block is the magic number. If it is not,  /* Check that the first field in the block is the magic number. If it is not,
# Line 1679  current_subject = (const unsigned char * Line 2595  current_subject = (const unsigned char *
2595  end_subject = (const unsigned char *)subject + length;  end_subject = (const unsigned char *)subject + length;
2596  req_byte_ptr = current_subject - 1;  req_byte_ptr = current_subject - 1;
2597    
2598    #ifdef SUPPORT_UTF8
2599  utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = (re->options & PCRE_UTF8) != 0;
2600  anchored = (options & PCRE_ANCHORED) != 0 || (re->options & PCRE_ANCHORED) != 0;  #else
2601    utf8 = FALSE;
2602    #endif
2603    
2604    anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
2605      (re->options & PCRE_ANCHORED) != 0;
2606    
2607  /* The remaining fixed data for passing around. */  /* The remaining fixed data for passing around. */
2608    
2609  match_block.start_code = (const uschar *)argument_re +  md->start_code = (const uschar *)argument_re +
2610      re->name_table_offset + re->name_count * re->name_entry_size;      re->name_table_offset + re->name_count * re->name_entry_size;
2611  match_block.start_subject = (const unsigned char *)subject;  md->start_subject = (const unsigned char *)subject;
2612  match_block.end_subject = end_subject;  md->end_subject = end_subject;
2613  match_block.moptions = options;  md->moptions = options;
2614  match_block.poptions = re->options;  md->poptions = re->options;
2615    
2616    /* If the BSR option is not set at match time, copy what was set
2617    at compile time. */
2618    
2619    if ((md->moptions & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 0)
2620      {
2621      if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
2622        md->moptions |= re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE);
2623    #ifdef BSR_ANYCRLF
2624      else md->moptions |= PCRE_BSR_ANYCRLF;
2625    #endif
2626      }
2627    
2628    /* Handle different types of newline. The three bits give eight cases. If
2629    nothing is set at run time, whatever was used at compile time applies. */
2630    
2631    switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
2632             PCRE_NEWLINE_BITS)
2633      {
2634      case 0: newline = NEWLINE; break;   /* Compile-time default */
2635      case PCRE_NEWLINE_CR: newline = '\r'; break;
2636      case PCRE_NEWLINE_LF: newline = '\n'; break;
2637      case PCRE_NEWLINE_CR+
2638           PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
2639      case PCRE_NEWLINE_ANY: newline = -1; break;
2640      case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
2641      default: return PCRE_ERROR_BADNEWLINE;
2642      }
2643    
2644    if (newline == -2)
2645      {
2646      md->nltype = NLTYPE_ANYCRLF;
2647      }
2648    else if (newline < 0)
2649      {
2650      md->nltype = NLTYPE_ANY;
2651      }
2652    else
2653      {
2654      md->nltype = NLTYPE_FIXED;
2655      if (newline > 255)
2656        {
2657        md->nllen = 2;
2658        md->nl[0] = (newline >> 8) & 255;
2659        md->nl[1] = newline & 255;
2660        }
2661      else
2662        {
2663        md->nllen = 1;
2664        md->nl[0] = newline;
2665        }
2666      }
2667    
2668  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
2669  back the character offset. */  back the character offset. */
# Line 1715  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 2689  if (utf8 && (options & PCRE_NO_UTF8_CHEC
2689  is a feature that makes it possible to save compiled regex and re-use them  is a feature that makes it possible to save compiled regex and re-use them
2690  in other programs later. */  in other programs later. */
2691    
2692  if (match_block.tables == NULL) match_block.tables = _pcre_default_tables;  if (md->tables == NULL) md->tables = _pcre_default_tables;
2693    
2694  /* The lower casing table and the "must be at the start of a line" flag are  /* The lower casing table and the "must be at the start of a line" flag are
2695  used in a loop when finding where to start. */  used in a loop when finding where to start. */
2696    
2697  lcc = match_block.tables + lcc_offset;  lcc = md->tables + lcc_offset;
2698  startline = (re->options & PCRE_STARTLINE) != 0;  startline = (re->flags & PCRE_STARTLINE) != 0;
2699  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
2700    
2701  /* Set up the first character to match, if available. The first_byte value is  /* Set up the first character to match, if available. The first_byte value is
# Line 1732  studied, there may be a bitmap of possib Line 2706  studied, there may be a bitmap of possib
2706    
2707  if (!anchored)  if (!anchored)
2708    {    {
2709    if ((re->options & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
2710      {      {
2711      first_byte = re->first_byte & 255;      first_byte = re->first_byte & 255;
2712      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
# Line 1749  if (!anchored) Line 2723  if (!anchored)
2723  /* For anchored or unanchored matches, there may be a "last known required  /* For anchored or unanchored matches, there may be a "last known required
2724  character" set. */  character" set. */
2725    
2726  if ((re->options & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
2727    {    {
2728    req_byte = re->req_byte & 255;    req_byte = re->req_byte & 255;
2729    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
2730    req_byte2 = (match_block.tables + fcc_offset)[req_byte];  /* case flipped */    req_byte2 = (md->tables + fcc_offset)[req_byte];  /* case flipped */
2731    }    }
2732    
2733  /* Call the main matching function, looping for a non-anchored regex after a  /* Call the main matching function, looping for a non-anchored regex after a
# Line 1771  for (;;) Line 2745  for (;;)
2745    
2746      /* Advance to a unique first char if possible. If firstline is TRUE, the      /* Advance to a unique first char if possible. If firstline is TRUE, the
2747      start of the match is constrained to the first line of a multiline string.      start of the match is constrained to the first line of a multiline string.
2748      Implement this by temporarily adjusting end_subject so that we stop scanning      Implement this by temporarily adjusting end_subject so that we stop
2749      at a newline. If the match fails at the newline, later code breaks this loop.      scanning at a newline. If the match fails at the newline, later code breaks
2750      */      this loop. */
2751    
2752      if (firstline)      if (firstline)
2753        {        {
2754        const uschar *t = current_subject;        const uschar *t = current_subject;
2755        while (t < save_end_subject && *t != '\n') t++;        while (t < md->end_subject && !IS_NEWLINE(t)) t++;
2756        end_subject = t;        end_subject = t;
2757        }        }
2758    
# Line 1793  for (;;) Line 2767  for (;;)
2767            current_subject++;            current_subject++;
2768        }        }
2769    
2770      /* Or to just after \n for a multiline match if possible */      /* Or to just after a linebreak for a multiline match if possible */
2771    
2772      else if (startline)      else if (startline)
2773        {        {
2774        if (current_subject > match_block.start_subject + start_offset)        if (current_subject > md->start_subject + start_offset)
2775          {          {
2776          while (current_subject < end_subject && current_subject[-1] != NEWLINE)          while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))
2777              current_subject++;
2778    
2779            /* If we have just passed a CR and the newline option is ANY or
2780            ANYCRLF, and we are now at a LF, advance the match position by one more
2781            character. */
2782    
2783            if (current_subject[-1] == '\r' &&
2784                 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
2785                 current_subject < end_subject &&
2786                 *current_subject == '\n')
2787            current_subject++;            current_subject++;
2788          }          }
2789        }        }
# Line 1880  for (;;) Line 2864  for (;;)
2864    /* OK, now we can do the business */    /* OK, now we can do the business */
2865    
2866    rc = internal_dfa_exec(    rc = internal_dfa_exec(
2867      &match_block,                              /* fixed match data */      md,                                /* fixed match data */
2868      match_block.start_code,                    /* this subexpression's code */      md->start_code,                    /* this subexpression's code */
2869      current_subject,                           /* where we currently are */      current_subject,                   /* where we currently are */
2870      start_offset,                              /* start offset in subject */      start_offset,                      /* start offset in subject */
2871      offsets,                                   /* offset vector */      offsets,                           /* offset vector */
2872      offsetcount,                               /* size of same */      offsetcount,                       /* size of same */
2873      workspace,                                 /* workspace vector */      workspace,                         /* workspace vector */
2874      wscount,                                   /* size of same */      wscount,                           /* size of same */
2875      re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL), /* ims flags */      re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL), /* ims flags */
2876      0,                                         /* function recurse level */      0,                                 /* function recurse level */
2877      0);                                        /* regex recurse level */      0);                                /* regex recurse level */
2878    
2879    /* Anything other than "no match" means we are done, always; otherwise, carry    /* Anything other than "no match" means we are done, always; otherwise, carry
2880    on only if not anchored. */    on only if not anchored. */
# Line 1900  for (;;) Line 2884  for (;;)
2884    /* Advance to the next subject character unless we are at the end of a line    /* Advance to the next subject character unless we are at the end of a line
2885    and firstline is set. */    and firstline is set. */
2886    
2887    if (firstline && *current_subject == NEWLINE) break;    if (firstline && IS_NEWLINE(current_subject)) break;
2888    current_subject++;    current_subject++;
   
 #ifdef SUPPORT_UTF8  
2889    if (utf8)    if (utf8)
2890      {      {
2891      while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)      while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)
2892        current_subject++;        current_subject++;
2893      }      }
 #endif  
   
2894    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
2895    }  
2896      /* If we have just passed a CR and we are now at a LF, and the pattern does
2897      not contain any explicit matches for \r or \n, and the newline option is CRLF
2898      or ANY or ANYCRLF, advance the match position by one more character. */
2899    
2900      if (current_subject[-1] == '\r' &&
2901          current_subject < end_subject &&
2902          *current_subject == '\n' &&
2903          (re->flags & PCRE_HASCRORLF) == 0 &&
2904            (md->nltype == NLTYPE_ANY ||
2905             md->nltype == NLTYPE_ANYCRLF ||
2906             md->nllen == 2))
2907        current_subject++;
2908    
2909      }   /* "Bumpalong" loop */
2910    
2911  return PCRE_ERROR_NOMATCH;  return PCRE_ERROR_NOMATCH;
2912  }  }

Legend:
Removed from v.85  
changed lines
  Added in v.341

  ViewVC Help
Powered by ViewVC 1.1.5