/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 77 by nigel, Sat Feb 24 21:40:45 2007 UTC revision 168 by ph10, Tue May 29 15:18:18 2007 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2005 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 39  POSSIBILITY OF SUCH DAMAGE. Line 39  POSSIBILITY OF SUCH DAMAGE.
39    
40    
41  /* This module contains the external function pcre_dfa_exec(), which is an  /* This module contains the external function pcre_dfa_exec(), which is an
42  alternative matching function that uses a DFA algorithm. This is NOT Perl-  alternative matching function that uses a sort of DFA algorithm (not a true
43  compatible, but it has advantages in certain applications. */  FSM). This is NOT Perl- compatible, but it has advantages in certain
44    applications. */
45    
46    
47    #define NLBLOCK md             /* Block containing newline information */
48    #define PSSTART start_subject  /* Field containing processed string start */
49    #define PSEND   end_subject    /* Field containing processed string end */
50    
51  #include "pcre_internal.h"  #include "pcre_internal.h"
52    
53    
# Line 57  compatible, but it has advantages in cer Line 62  compatible, but it has advantages in cer
62  *************************************************/  *************************************************/
63    
64  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
65  into others, under special conditions. A gap of 10 between the blocks should be  into others, under special conditions. A gap of 20 between the blocks should be
66  enough. */  enough. */
67    
68  #define OP_PROP_EXTRA    (EXTRACT_BASIC_MAX+1)  #define OP_PROP_EXTRA 100
69  #define OP_EXTUNI_EXTRA  (EXTRACT_BASIC_MAX+11)  #define OP_EXTUNI_EXTRA 120
70    #define OP_ANYNL_EXTRA 140
71    
72    
73  /* This table identifies those opcodes that are followed immediately by a  /* This table identifies those opcodes that are followed immediately by a
74  character that is to be tested in some way. This makes is possible to  character that is to be tested in some way. This makes is possible to
75  centralize the loading of these characters. In the case of Type * etc, the  centralize the loading of these characters. In the case of Type * etc, the
76  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
77  small value. */  small value. ***NOTE*** If the start of this table is modified, the two tables
78    that follow must also be modified. */
79    
80  static uschar coptable[] = {  static uschar coptable[] = {
81    0,                             /* End                                    */    0,                             /* End                                    */
82    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* \A, \G, \B, \b, \D, \d, \S, \s, \W, \w */    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
83      0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
84    0, 0,                          /* Any, Anybyte                           */    0, 0,                          /* Any, Anybyte                           */
85    0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */    0, 0, 0, 0,                    /* NOTPROP, PROP, EXTUNI, ANYNL           */
86    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */
87    1,                             /* Char                                   */    1,                             /* Char                                   */
88    1,                             /* Charnc                                 */    1,                             /* Charnc                                 */
# Line 82  static uschar coptable[] = { Line 90  static uschar coptable[] = {
90    /* Positive single-char repeats                                          */    /* Positive single-char repeats                                          */
91    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
92    3, 3, 3,                       /* upto, minupto, exact                   */    3, 3, 3,                       /* upto, minupto, exact                   */
93      1, 1, 1, 3,                    /* *+, ++, ?+, upto+                      */
94    /* Negative single-char repeats - only for chars < 256                   */    /* Negative single-char repeats - only for chars < 256                   */
95    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
96    3, 3, 3,                       /* NOT upto, minupto, exact               */    3, 3, 3,                       /* NOT upto, minupto, exact               */
97      1, 1, 1, 3,                    /* NOT *+, ++, ?+, updo+                  */
98    /* Positive type repeats                                                 */    /* Positive type repeats                                                 */
99    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
100    3, 3, 3,                       /* Type upto, minupto, exact              */    3, 3, 3,                       /* Type upto, minupto, exact              */
101      1, 1, 1, 3,                    /* Type *+, ++, ?+, upto+                 */
102    /* Character class & ref repeats                                         */    /* Character class & ref repeats                                         */
103    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */
104    0, 0,                          /* CRRANGE, CRMINRANGE                    */    0, 0,                          /* CRRANGE, CRMINRANGE                    */
# Line 106  static uschar coptable[] = { Line 117  static uschar coptable[] = {
117    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
118    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
119    0,                             /* Reverse                                */    0,                             /* Reverse                                */
120    0,                             /* Once                                   */    0, 0, 0, 0,                    /* ONCE, BRA, CBRA, COND                  */
121    0,                             /* COND                                   */    0, 0, 0,                       /* SBRA, SCBRA, SCOND                     */
122    0,                             /* CREF                                   */    0,                             /* CREF                                   */
123    0, 0,                          /* BRAZERO, BRAMINZERO                    */    0,                             /* RREF                                   */
124    0,                             /* BRANUMBER                              */    0,                             /* DEF                                    */
125    0                              /* BRA                                    */    0, 0                           /* BRAZERO, BRAMINZERO                    */
126  };  };
127    
128  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
129  and \w */  and \w */
130    
131  static uschar toptable1[] = {  static uschar toptable1[] = {
132    0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
133    ctype_digit, ctype_digit,    ctype_digit, ctype_digit,
134    ctype_space, ctype_space,    ctype_space, ctype_space,
135    ctype_word,  ctype_word,    ctype_word,  ctype_word,
# Line 126  static uschar toptable1[] = { Line 137  static uschar toptable1[] = {
137  };  };
138    
139  static uschar toptable2[] = {  static uschar toptable2[] = {
140    0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
141    ctype_digit, 0,    ctype_digit, 0,
142    ctype_space, 0,    ctype_space, 0,
143    ctype_word,  0,    ctype_word,  0,
# Line 277  stateblock *next_active_state, *next_new Line 288  stateblock *next_active_state, *next_new
288    
289  const uschar *ctypes, *lcc, *fcc;  const uschar *ctypes, *lcc, *fcc;
290  const uschar *ptr;  const uschar *ptr;
291  const uschar *end_code;  const uschar *end_code, *first_op;
292    
293  int active_count, new_count, match_count;  int active_count, new_count, match_count;
294    
# Line 288  const uschar *start_subject = md->start_ Line 299  const uschar *start_subject = md->start_
299  const uschar *end_subject = md->end_subject;  const uschar *end_subject = md->end_subject;
300  const uschar *start_code = md->start_code;  const uschar *start_code = md->start_code;
301    
302    #ifdef SUPPORT_UTF8
303  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;
304    #else
305    BOOL utf8 = FALSE;
306    #endif
307    
308  rlevel++;  rlevel++;
309  offsetcount &= (-2);  offsetcount &= (-2);
# Line 311  active_states = (stateblock *)(workspace Line 326  active_states = (stateblock *)(workspace
326  next_new_state = new_states = active_states + wscount;  next_new_state = new_states = active_states + wscount;
327  new_count = 0;  new_count = 0;
328    
329    first_op = this_start_code + 1 + LINK_SIZE +
330      ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
331    
332  /* The first thing in any (sub) pattern is a bracket of some sort. Push all  /* The first thing in any (sub) pattern is a bracket of some sort. Push all
333  the alternative states onto the list, and find out where the end is. This  the alternative states onto the list, and find out where the end is. This
334  makes is possible to use this function recursively, when we want to stop at a  makes is possible to use this function recursively, when we want to stop at a
# Line 320  If the first opcode in the first alterna Line 338  If the first opcode in the first alterna
338  a backward assertion. In that case, we have to find out the maximum amount to  a backward assertion. In that case, we have to find out the maximum amount to
339  move back, and set up each alternative appropriately. */  move back, and set up each alternative appropriately. */
340    
341  if (this_start_code[1+LINK_SIZE] == OP_REVERSE)  if (*first_op == OP_REVERSE)
342    {    {
343    int max_back = 0;    int max_back = 0;
344    int gone_back;    int gone_back;
# Line 402  else Line 420  else
420    
421    else    else
422      {      {
423        int length = 1 + LINK_SIZE +
424          ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
425      do      do
426        {        {
427        ADD_NEW(end_code - start_code + 1 + LINK_SIZE, 0);        ADD_NEW(end_code - start_code + length, 0);
428        end_code += GET(end_code, 1);        end_code += GET(end_code, 1);
429          length = 1 + LINK_SIZE;
430        }        }
431      while (*end_code == OP_ALT);      while (*end_code == OP_ALT);
432      }      }
# Line 421  ptr = current_subject; Line 442  ptr = current_subject;
442  for (;;)  for (;;)
443    {    {
444    int i, j;    int i, j;
445    int c, d, clen, dlen;    int clen, dlen;
446      unsigned int c, d;
447    
448    /* Make the new state list into the active state list and empty the    /* Make the new state list into the active state list and empty the
449    new state list. */    new state list. */
# Line 457  for (;;) Line 479  for (;;)
479    
480    if (ptr < end_subject)    if (ptr < end_subject)
481      {      {
482      clen = 1;      clen = 1;        /* Number of bytes in the character */
483  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
484      if (utf8) { GETCHARLEN(c, ptr, clen); } else      if (utf8) { GETCHARLEN(c, ptr, clen); } else
485  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
# Line 465  for (;;) Line 487  for (;;)
487      }      }
488    else    else
489      {      {
490      clen = 0;    /* At end subject */      clen = 0;        /* This indicates the end of the subject */
491      c = -1;      c = NOTACHAR;    /* This value should never actually be used */
492      }      }
493    
494    /* Scan up the active states and act on each one. The result of an action    /* Scan up the active states and act on each one. The result of an action
# Line 480  for (;;) Line 502  for (;;)
502      const uschar *code;      const uschar *code;
503      int state_offset = current_state->offset;      int state_offset = current_state->offset;
504      int count, codevalue;      int count, codevalue;
505      int chartype, othercase;  #ifdef SUPPORT_UCP
506        int chartype, script;
507    #endif
508    
509  #ifdef DEBUG  #ifdef DEBUG
510      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
511      if (c < 0) printf("-1\n");      if (clen == 0) printf("EOL\n");
512        else if (c > 32 && c < 127) printf("'%c'\n", c);        else if (c > 32 && c < 127) printf("'%c'\n", c);
513          else printf("0x%02x\n", c);          else printf("0x%02x\n", c);
514  #endif  #endif
# Line 528  for (;;) Line 552  for (;;)
552    
553      code = start_code + state_offset;      code = start_code + state_offset;
554      codevalue = *code;      codevalue = *code;
     if (codevalue >= OP_BRA) codevalue = OP_BRA; /* All brackets are equal */  
555    
556      /* If this opcode is followed by an inline character, load it. It is      /* If this opcode is followed by an inline character, load it. It is
557      tempting to test for the presence of a subject character here, but that      tempting to test for the presence of a subject character here, but that
# Line 550  for (;;) Line 573  for (;;)
573        d = code[coptable[codevalue]];        d = code[coptable[codevalue]];
574        if (codevalue >= OP_TYPESTAR)        if (codevalue >= OP_TYPESTAR)
575          {          {
576          if (d == OP_ANYBYTE) return PCRE_ERROR_DFA_UITEM;          switch(d)
577          if (d >= OP_NOTPROP)            {
578            codevalue += (d == OP_EXTUNI)? OP_EXTUNI_EXTRA : OP_PROP_EXTRA;            case OP_ANYBYTE: return PCRE_ERROR_DFA_UITEM;
579              case OP_NOTPROP:
580              case OP_PROP: codevalue += OP_PROP_EXTRA; break;
581              case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
582              case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
583              default: break;
584              }
585          }          }
586        }        }
587      else      else
588        {        {
589        dlen = 0;         /* Not strictly necessary, but compilers moan */        dlen = 0;         /* Not strictly necessary, but compilers moan */
590        d = -1;           /* if these variables are not set. */        d = NOTACHAR;     /* if these variables are not set. */
591        }        }
592    
593    
# Line 620  for (;;) Line 649  for (;;)
649    
650        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
651        case OP_BRA:        case OP_BRA:
652          case OP_SBRA:
653        do        do
654          {          {
655          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
# Line 629  for (;;) Line 659  for (;;)
659        break;        break;
660    
661        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
662          case OP_CBRA:
663          case OP_SCBRA:
664          ADD_ACTIVE(code - start_code + 3 + LINK_SIZE,  0);
665          code += GET(code, 1);
666          while (*code == OP_ALT)
667            {
668            ADD_ACTIVE(code - start_code + 1 + LINK_SIZE,  0);
669            code += GET(code, 1);
670            }
671          break;
672    
673          /*-----------------------------------------------------------------*/
674        case OP_BRAZERO:        case OP_BRAZERO:
675        case OP_BRAMINZERO:        case OP_BRAMINZERO:
676        ADD_ACTIVE(state_offset + 1, 0);        ADD_ACTIVE(state_offset + 1, 0);
# Line 638  for (;;) Line 680  for (;;)
680        break;        break;
681    
682        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
       case OP_BRANUMBER:  
       ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);  
       break;  
   
       /*-----------------------------------------------------------------*/  
683        case OP_CIRC:        case OP_CIRC:
684        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
685            ((ims & PCRE_MULTILINE) != 0 && ptr[-1] == NEWLINE))            ((ims & PCRE_MULTILINE) != 0 &&
686                ptr != end_subject &&
687                WAS_NEWLINE(ptr)))
688          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
689        break;        break;
690    
# Line 679  for (;;) Line 718  for (;;)
718    
719        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
720        case OP_ANY:        case OP_ANY:
721        if (clen > 0 && (c != NEWLINE || (ims & PCRE_DOTALL) != 0))        if (clen > 0 && ((ims & PCRE_DOTALL) != 0 || !IS_NEWLINE(ptr)))
722          { ADD_NEW(state_offset + 1, 0); }          { ADD_NEW(state_offset + 1, 0); }
723        break;        break;
724    
725        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
726        case OP_EODN:        case OP_EODN:
727        if (clen == 0 || (c == NEWLINE && ptr + 1 == end_subject))        if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
728          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
729        break;        break;
730    
# Line 693  for (;;) Line 732  for (;;)
732        case OP_DOLL:        case OP_DOLL:
733        if ((md->moptions & PCRE_NOTEOL) == 0)        if ((md->moptions & PCRE_NOTEOL) == 0)
734          {          {
735          if (clen == 0 || (c == NEWLINE && (ptr + 1 == end_subject ||          if (clen == 0 ||
736                                  (ims & PCRE_MULTILINE) != 0)))              (IS_NEWLINE(ptr) &&
737                   ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)
738                ))
739            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
740          }          }
741        else if (c == NEWLINE && (ims & PCRE_MULTILINE) != 0)        else if ((ims & PCRE_MULTILINE) != 0 && IS_NEWLINE(ptr))
742          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
743        break;        break;
744    
# Line 746  for (;;) Line 787  for (;;)
787        break;        break;
788    
789    
 #ifdef SUPPORT_UCP  
   
790        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
791        /* Check the next character by Unicode property. We will get here only        /* Check the next character by Unicode property. We will get here only
792        if the support is in the binary; otherwise a compile-time error occurs.        if the support is in the binary; otherwise a compile-time error occurs.
793        */        */
794    
795    #ifdef SUPPORT_UCP
796        case OP_PROP:        case OP_PROP:
797        case OP_NOTPROP:        case OP_NOTPROP:
798        if (clen > 0)        if (clen > 0)
799          {          {
800          int rqdtype, category;          BOOL OK;
801          category = ucp_findchar(c, &chartype, &othercase);          int category = _pcre_ucp_findprop(c, &chartype, &script);
802          rqdtype = code[1];          switch(code[1])
         if (rqdtype >= 128)  
           {  
           if ((rqdtype - 128 == category) == (codevalue == OP_PROP))  
             { ADD_NEW(state_offset + 2, 0); }  
           }  
         else  
803            {            {
804            if ((rqdtype == chartype) == (codevalue == OP_PROP))            case PT_ANY:
805              { ADD_NEW(state_offset + 2, 0); }            OK = TRUE;
806              break;
807    
808              case PT_LAMP:
809              OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
810              break;
811    
812              case PT_GC:
813              OK = category == code[2];
814              break;
815    
816              case PT_PC:
817              OK = chartype == code[2];
818              break;
819    
820              case PT_SC:
821              OK = script == code[2];
822              break;
823    
824              /* Should never occur, but keep compilers from grumbling. */
825    
826              default:
827              OK = codevalue != OP_PROP;
828              break;
829            }            }
830    
831            if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); }
832          }          }
833        break;        break;
834  #endif  #endif
# Line 784  for (;;) Line 843  for (;;)
843    
844        case OP_TYPEPLUS:        case OP_TYPEPLUS:
845        case OP_TYPEMINPLUS:        case OP_TYPEMINPLUS:
846          case OP_TYPEPOSPLUS:
847        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
848        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
849        if (clen > 0)        if (clen > 0)
850          {          {
851          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
852              (c < 256 &&              (c < 256 &&
853                (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&                (d != OP_ANY ||
854                   (ims & PCRE_DOTALL) != 0 ||
855                   !IS_NEWLINE(ptr)
856                  ) &&
857                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
858            {            {
859              if (count > 0 && codevalue == OP_TYPEPOSPLUS)
860                {
861                active_count--;            /* Remove non-match possibility */
862                next_active_state--;
863                }
864            count++;            count++;
865            ADD_NEW(state_offset, count);            ADD_NEW(state_offset, count);
866            }            }
# Line 802  for (;;) Line 870  for (;;)
870        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
871        case OP_TYPEQUERY:        case OP_TYPEQUERY:
872        case OP_TYPEMINQUERY:        case OP_TYPEMINQUERY:
873          case OP_TYPEPOSQUERY:
874        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
875        if (clen > 0)        if (clen > 0)
876          {          {
877          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
878              (c < 256 &&              (c < 256 &&
879                (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&                (d != OP_ANY ||
880                   (ims & PCRE_DOTALL) != 0 ||
881                   !IS_NEWLINE(ptr)
882                  ) &&
883                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
884            {            {
885              if (codevalue == OP_TYPEPOSQUERY)
886                {
887                active_count--;            /* Remove non-match possibility */
888                next_active_state--;
889                }
890            ADD_NEW(state_offset + 2, 0);            ADD_NEW(state_offset + 2, 0);
891            }            }
892          }          }
# Line 818  for (;;) Line 895  for (;;)
895        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
896        case OP_TYPESTAR:        case OP_TYPESTAR:
897        case OP_TYPEMINSTAR:        case OP_TYPEMINSTAR:
898          case OP_TYPEPOSSTAR:
899        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
900        if (clen > 0)        if (clen > 0)
901          {          {
902          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
903              (c < 256 &&              (c < 256 &&
904                (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&                (d != OP_ANY ||
905                   (ims & PCRE_DOTALL) != 0 ||
906                   !IS_NEWLINE(ptr)
907                  ) &&
908                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
909            {            {
910              if (codevalue == OP_TYPEPOSSTAR)
911                {
912                active_count--;            /* Remove non-match possibility */
913                next_active_state--;
914                }
915            ADD_NEW(state_offset, 0);            ADD_NEW(state_offset, 0);
916            }            }
917          }          }
# Line 833  for (;;) Line 919  for (;;)
919    
920        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
921        case OP_TYPEEXACT:        case OP_TYPEEXACT:
922          count = current_state->count;  /* Number already matched */
923          if (clen > 0)
924            {
925            if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
926                (c < 256 &&
927                  (d != OP_ANY ||
928                   (ims & PCRE_DOTALL) != 0 ||
929                   !IS_NEWLINE(ptr)
930                  ) &&
931                  ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
932              {
933              if (++count >= GET2(code, 1))
934                { ADD_NEW(state_offset + 4, 0); }
935              else
936                { ADD_NEW(state_offset, count); }
937              }
938            }
939          break;
940    
941          /*-----------------------------------------------------------------*/
942        case OP_TYPEUPTO:        case OP_TYPEUPTO:
943        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
944        if (codevalue != OP_TYPEEXACT)        case OP_TYPEPOSUPTO:
945          { ADD_ACTIVE(state_offset + 4, 0); }        ADD_ACTIVE(state_offset + 4, 0);
946        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
947        if (clen > 0)        if (clen > 0)
948          {          {
949          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
950              (c < 256 &&              (c < 256 &&
951                (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&                (d != OP_ANY ||
952                   (ims & PCRE_DOTALL) != 0 ||
953                   !IS_NEWLINE(ptr)
954                  ) &&
955                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
956            {            {
957              if (codevalue == OP_TYPEPOSUPTO)
958                {
959                active_count--;           /* Remove non-match possibility */
960                next_active_state--;
961                }
962            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
963              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 4, 0); }
964            else            else
# Line 855  for (;;) Line 969  for (;;)
969    
970  /* ========================================================================== */  /* ========================================================================== */
971        /* These are virtual opcodes that are used when something like        /* These are virtual opcodes that are used when something like
972        OP_TYPEPLUS has OP_PROP, OP_NOTPROP, or OP_EXTUNI as its argument. It        OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its
973        keeps the code above fast for the other cases. The argument is in the        argument. It keeps the code above fast for the other cases. The argument
974        d variable. */        is in the d variable. */
975    
976    #ifdef SUPPORT_UCP
977        case OP_PROP_EXTRA + OP_TYPEPLUS:        case OP_PROP_EXTRA + OP_TYPEPLUS:
978        case OP_PROP_EXTRA + OP_TYPEMINPLUS:        case OP_PROP_EXTRA + OP_TYPEMINPLUS:
979          case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
980        count = current_state->count;           /* Already matched */        count = current_state->count;           /* Already matched */
981        if (count > 0) { ADD_ACTIVE(state_offset + 3, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
982        if (clen > 0)        if (clen > 0)
983          {          {
984          int category = ucp_findchar(c, &chartype, &othercase);          BOOL OK;
985          int rqdtype = code[2];          int category = _pcre_ucp_findprop(c, &chartype, &script);
986          if ((d == OP_PROP) ==          switch(code[2])
987              (rqdtype == ((rqdtype >= 128)? (category + 128) : chartype)))            {
988            { count++; ADD_NEW(state_offset, count); }            case PT_ANY:
989              OK = TRUE;
990              break;
991    
992              case PT_LAMP:
993              OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
994              break;
995    
996              case PT_GC:
997              OK = category == code[3];
998              break;
999    
1000              case PT_PC:
1001              OK = chartype == code[3];
1002              break;
1003    
1004              case PT_SC:
1005              OK = script == code[3];
1006              break;
1007    
1008              /* Should never occur, but keep compilers from grumbling. */
1009    
1010              default:
1011              OK = codevalue != OP_PROP;
1012              break;
1013              }
1014    
1015            if (OK == (d == OP_PROP))
1016              {
1017              if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
1018                {
1019                active_count--;           /* Remove non-match possibility */
1020                next_active_state--;
1021                }
1022              count++;
1023              ADD_NEW(state_offset, count);
1024              }
1025          }          }
1026        break;        break;
1027    
1028        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1029        case OP_EXTUNI_EXTRA + OP_TYPEPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
1030        case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
1031          case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1032        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1033        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1034        if (clen > 0 && ucp_findchar(c, &chartype, &othercase) != ucp_M)        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
1035          {          {
1036          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1037          int ncount = 0;          int ncount = 0;
1038            if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
1039              {
1040              active_count--;           /* Remove non-match possibility */
1041              next_active_state--;
1042              }
1043          while (nptr < end_subject)          while (nptr < end_subject)
1044            {            {
1045            int nd;            int nd;
1046            int ndlen = 1;            int ndlen = 1;
1047            GETCHARLEN(nd, nptr, ndlen);            GETCHARLEN(nd, nptr, ndlen);
1048            if (ucp_findchar(nd, &chartype, &othercase) != ucp_M) break;            if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
1049            ncount++;            ncount++;
1050            nptr += ndlen;            nptr += ndlen;
1051            }            }
# Line 895  for (;;) Line 1053  for (;;)
1053          ADD_NEW_DATA(-state_offset, count, ncount);          ADD_NEW_DATA(-state_offset, count, ncount);
1054          }          }
1055        break;        break;
1056    #endif
1057    
1058          /*-----------------------------------------------------------------*/
1059          case OP_ANYNL_EXTRA + OP_TYPEPLUS:
1060          case OP_ANYNL_EXTRA + OP_TYPEMINPLUS:
1061          case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS:
1062          count = current_state->count;  /* Already matched */
1063          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1064          if (clen > 0)
1065            {
1066            int ncount = 0;
1067            switch (c)
1068              {
1069              case 0x000d:
1070              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1071              /* Fall through */
1072              case 0x000a:
1073              case 0x000b:
1074              case 0x000c:
1075              case 0x0085:
1076              case 0x2028:
1077              case 0x2029:
1078              if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1079                {
1080                active_count--;           /* Remove non-match possibility */
1081                next_active_state--;
1082                }
1083              count++;
1084              ADD_NEW_DATA(-state_offset, count, ncount);
1085              break;
1086              default:
1087              break;
1088              }
1089            }
1090          break;
1091    
1092        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1093    #ifdef SUPPORT_UCP
1094        case OP_PROP_EXTRA + OP_TYPEQUERY:        case OP_PROP_EXTRA + OP_TYPEQUERY:
1095        case OP_PROP_EXTRA + OP_TYPEMINQUERY:        case OP_PROP_EXTRA + OP_TYPEMINQUERY:
1096        count = 3;        case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
1097          count = 4;
1098        goto QS1;        goto QS1;
1099    
1100        case OP_PROP_EXTRA + OP_TYPESTAR:        case OP_PROP_EXTRA + OP_TYPESTAR:
1101        case OP_PROP_EXTRA + OP_TYPEMINSTAR:        case OP_PROP_EXTRA + OP_TYPEMINSTAR:
1102          case OP_PROP_EXTRA + OP_TYPEPOSSTAR:
1103        count = 0;        count = 0;
1104    
1105        QS1:        QS1:
1106    
1107        ADD_ACTIVE(state_offset + 3, 0);        ADD_ACTIVE(state_offset + 4, 0);
1108        if (clen > 0)        if (clen > 0)
1109          {          {
1110          int category = ucp_findchar(c, &chartype, &othercase);          BOOL OK;
1111          int rqdtype = code[2];          int category = _pcre_ucp_findprop(c, &chartype, &script);
1112          if ((d == OP_PROP) ==          switch(code[2])
1113              (rqdtype == ((rqdtype >= 128)? (category + 128) : chartype)))            {
1114            { ADD_NEW(state_offset + count, 0); }            case PT_ANY:
1115              OK = TRUE;
1116              break;
1117    
1118              case PT_LAMP:
1119              OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
1120              break;
1121    
1122              case PT_GC:
1123              OK = category == code[3];
1124              break;
1125    
1126              case PT_PC:
1127              OK = chartype == code[3];
1128              break;
1129    
1130              case PT_SC:
1131              OK = script == code[3];
1132              break;
1133    
1134              /* Should never occur, but keep compilers from grumbling. */
1135    
1136              default:
1137              OK = codevalue != OP_PROP;
1138              break;
1139              }
1140    
1141            if (OK == (d == OP_PROP))
1142              {
1143              if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR ||
1144                  codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
1145                {
1146                active_count--;           /* Remove non-match possibility */
1147                next_active_state--;
1148                }
1149              ADD_NEW(state_offset + count, 0);
1150              }
1151          }          }
1152        break;        break;
1153    
1154        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1155        case OP_EXTUNI_EXTRA + OP_TYPEQUERY:        case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
1156        case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:        case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
1157          case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY:
1158        count = 2;        count = 2;
1159        goto QS2;        goto QS2;
1160    
1161        case OP_EXTUNI_EXTRA + OP_TYPESTAR:        case OP_EXTUNI_EXTRA + OP_TYPESTAR:
1162        case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:        case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
1163          case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR:
1164        count = 0;        count = 0;
1165    
1166        QS2:        QS2:
1167    
1168        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1169        if (clen > 0 && ucp_findchar(c, &chartype, &othercase) != ucp_M)        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
1170          {          {
1171          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1172          int ncount = 0;          int ncount = 0;
1173            if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
1174                codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
1175              {
1176              active_count--;           /* Remove non-match possibility */
1177              next_active_state--;
1178              }
1179          while (nptr < end_subject)          while (nptr < end_subject)
1180            {            {
1181            int nd;            int nd;
1182            int ndlen = 1;            int ndlen = 1;
1183            GETCHARLEN(nd, nptr, ndlen);            GETCHARLEN(nd, nptr, ndlen);
1184            if (ucp_findchar(nd, &chartype, &othercase) != ucp_M) break;            if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
1185            ncount++;            ncount++;
1186            nptr += ndlen;            nptr += ndlen;
1187            }            }
1188          ADD_NEW_DATA(-(state_offset + count), 0, ncount);          ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1189          }          }
1190        break;        break;
1191    #endif
1192    
1193          /*-----------------------------------------------------------------*/
1194          case OP_ANYNL_EXTRA + OP_TYPEQUERY:
1195          case OP_ANYNL_EXTRA + OP_TYPEMINQUERY:
1196          case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY:
1197          count = 2;
1198          goto QS3;
1199    
1200          case OP_ANYNL_EXTRA + OP_TYPESTAR:
1201          case OP_ANYNL_EXTRA + OP_TYPEMINSTAR:
1202          case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR:
1203          count = 0;
1204    
1205          QS3:
1206          ADD_ACTIVE(state_offset + 2, 0);
1207          if (clen > 0)
1208            {
1209            int ncount = 0;
1210            switch (c)
1211              {
1212              case 0x000d:
1213              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1214              /* Fall through */
1215              case 0x000a:
1216              case 0x000b:
1217              case 0x000c:
1218              case 0x0085:
1219              case 0x2028:
1220              case 0x2029:
1221              if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1222                  codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1223                {
1224                active_count--;           /* Remove non-match possibility */
1225                next_active_state--;
1226                }
1227              ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1228              break;
1229              default:
1230              break;
1231              }
1232            }
1233          break;
1234    
1235        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1236    #ifdef SUPPORT_UCP
1237        case OP_PROP_EXTRA + OP_TYPEEXACT:        case OP_PROP_EXTRA + OP_TYPEEXACT:
1238        case OP_PROP_EXTRA + OP_TYPEUPTO:        case OP_PROP_EXTRA + OP_TYPEUPTO:
1239        case OP_PROP_EXTRA + OP_TYPEMINUPTO:        case OP_PROP_EXTRA + OP_TYPEMINUPTO:
1240          case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
1241        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1242          { ADD_ACTIVE(state_offset + 5, 0); }          { ADD_ACTIVE(state_offset + 6, 0); }
1243        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1244        if (clen > 0)        if (clen > 0)
1245          {          {
1246          int category = ucp_findchar(c, &chartype, &othercase);          BOOL OK;
1247          int rqdtype = code[4];          int category = _pcre_ucp_findprop(c, &chartype, &script);
1248          if ((d == OP_PROP) ==          switch(code[4])
             (rqdtype == ((rqdtype >= 128)? (category + 128) : chartype)))  
1249            {            {
1250              case PT_ANY:
1251              OK = TRUE;
1252              break;
1253    
1254              case PT_LAMP:
1255              OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
1256              break;
1257    
1258              case PT_GC:
1259              OK = category == code[5];
1260              break;
1261    
1262              case PT_PC:
1263              OK = chartype == code[5];
1264              break;
1265    
1266              case PT_SC:
1267              OK = script == code[5];
1268              break;
1269    
1270              /* Should never occur, but keep compilers from grumbling. */
1271    
1272              default:
1273              OK = codevalue != OP_PROP;
1274              break;
1275              }
1276    
1277            if (OK == (d == OP_PROP))
1278              {
1279              if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
1280                {
1281                active_count--;           /* Remove non-match possibility */
1282                next_active_state--;
1283                }
1284            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1285              { ADD_NEW(state_offset + 5, 0); }              { ADD_NEW(state_offset + 6, 0); }
1286            else            else
1287              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
1288            }            }
# Line 975  for (;;) Line 1293  for (;;)
1293        case OP_EXTUNI_EXTRA + OP_TYPEEXACT:        case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
1294        case OP_EXTUNI_EXTRA + OP_TYPEUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
1295        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
1296          case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
1297        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1298          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 4, 0); }
1299        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1300        if (clen > 0 && ucp_findchar(c, &chartype, &othercase) != ucp_M)        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
1301          {          {
1302          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1303          int ncount = 0;          int ncount = 0;
1304            if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
1305              {
1306              active_count--;           /* Remove non-match possibility */
1307              next_active_state--;
1308              }
1309          while (nptr < end_subject)          while (nptr < end_subject)
1310            {            {
1311            int nd;            int nd;
1312            int ndlen = 1;            int ndlen = 1;
1313            GETCHARLEN(nd, nptr, ndlen);            GETCHARLEN(nd, nptr, ndlen);
1314            if (ucp_findchar(nd, &chartype, &othercase) != ucp_M) break;            if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
1315            ncount++;            ncount++;
1316            nptr += ndlen;            nptr += ndlen;
1317            }            }
# Line 997  for (;;) Line 1321  for (;;)
1321            { ADD_NEW_DATA(-state_offset, count, ncount); }            { ADD_NEW_DATA(-state_offset, count, ncount); }
1322          }          }
1323        break;        break;
1324    #endif
1325    
1326          /*-----------------------------------------------------------------*/
1327          case OP_ANYNL_EXTRA + OP_TYPEEXACT:
1328          case OP_ANYNL_EXTRA + OP_TYPEUPTO:
1329          case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
1330          case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
1331          if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
1332            { ADD_ACTIVE(state_offset + 4, 0); }
1333          count = current_state->count;  /* Number already matched */
1334          if (clen > 0)
1335            {
1336            int ncount = 0;
1337            switch (c)
1338              {
1339              case 0x000d:
1340              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1341              /* Fall through */
1342              case 0x000a:
1343              case 0x000b:
1344              case 0x000c:
1345              case 0x0085:
1346              case 0x2028:
1347              case 0x2029:
1348              if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
1349                {
1350                active_count--;           /* Remove non-match possibility */
1351                next_active_state--;
1352                }
1353              if (++count >= GET2(code, 1))
1354                { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }
1355              else
1356                { ADD_NEW_DATA(-state_offset, count, ncount); }
1357              break;
1358              default:
1359              break;
1360              }
1361            }
1362          break;
1363    
1364  /* ========================================================================== */  /* ========================================================================== */
1365        /* These opcodes are followed by a character that is usually compared        /* These opcodes are followed by a character that is usually compared
# Line 1018  for (;;) Line 1381  for (;;)
1381          {          {
1382          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
1383            {            {
1384              unsigned int othercase;
1385            if (c < 128) othercase = fcc[c]; else            if (c < 128) othercase = fcc[c]; else
1386    
1387            /* If we have Unicode property support, we can use it to test the            /* If we have Unicode property support, we can use it to test the
1388            other case of the character, if there is one. The result of            other case of the character. */
           ucp_findchar() is < 0 if the char isn't found, and othercase is  
           returned as zero if there isn't another case. */  
1389    
1390  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1391            if (ucp_findchar(c, &chartype, &othercase) < 0)            othercase = _pcre_ucp_othercase(c);
1392    #else
1393              othercase = NOTACHAR;
1394  #endif  #endif
             othercase = -1;  
1395    
1396            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
1397            }            }
# Line 1050  for (;;) Line 1413  for (;;)
1413        to wait for them to pass before continuing. */        to wait for them to pass before continuing. */
1414    
1415        case OP_EXTUNI:        case OP_EXTUNI:
1416        if (clen > 0 && ucp_findchar(c, &chartype, &othercase) != ucp_M)        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
1417          {          {
1418          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1419          int ncount = 0;          int ncount = 0;
# Line 1058  for (;;) Line 1421  for (;;)
1421            {            {
1422            int nclen = 1;            int nclen = 1;
1423            GETCHARLEN(c, nptr, nclen);            GETCHARLEN(c, nptr, nclen);
1424            if (ucp_findchar(c, &chartype, &othercase) != ucp_M) break;            if (_pcre_ucp_findprop(c, &chartype, &script) != ucp_M) break;
1425            ncount++;            ncount++;
1426            nptr += nclen;            nptr += nclen;
1427            }            }
# Line 1068  for (;;) Line 1431  for (;;)
1431  #endif  #endif
1432    
1433        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1434          /* This is a tricky like EXTUNI because it too can match more than one
1435          character (when CR is followed by LF). In this case, set up a negative
1436          state to wait for one character to pass before continuing. */
1437    
1438          case OP_ANYNL:
1439          if (clen > 0) switch(c)
1440            {
1441            case 0x000a:
1442            case 0x000b:
1443            case 0x000c:
1444            case 0x0085:
1445            case 0x2028:
1446            case 0x2029:
1447            ADD_NEW(state_offset + 1, 0);
1448            break;
1449            case 0x000d:
1450            if (ptr + 1 < end_subject && ptr[1] == 0x0a)
1451              {
1452              ADD_NEW_DATA(-(state_offset + 1), 0, 1);
1453              }
1454            else
1455              {
1456              ADD_NEW(state_offset + 1, 0);
1457              }
1458            break;
1459            }
1460          break;
1461    
1462          /*-----------------------------------------------------------------*/
1463        /* Match a negated single character. This is only used for one-byte        /* Match a negated single character. This is only used for one-byte
1464        characters, that is, we know that d < 256. The character we are        characters, that is, we know that d < 256. The character we are
1465        checking (c) can be multibyte. */        checking (c) can be multibyte. */
# Line 1075  for (;;) Line 1467  for (;;)
1467        case OP_NOT:        case OP_NOT:
1468        if (clen > 0)        if (clen > 0)
1469          {          {
1470          int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;          unsigned int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;
1471          if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }          if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }
1472          }          }
1473        break;        break;
# Line 1083  for (;;) Line 1475  for (;;)
1475        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1476        case OP_PLUS:        case OP_PLUS:
1477        case OP_MINPLUS:        case OP_MINPLUS:
1478          case OP_POSPLUS:
1479        case OP_NOTPLUS:        case OP_NOTPLUS:
1480        case OP_NOTMINPLUS:        case OP_NOTMINPLUS:
1481          case OP_NOTPOSPLUS:
1482        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1483        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
1484        if (clen > 0)        if (clen > 0)
1485          {          {
1486          int otherd = -1;          unsigned int otherd = NOTACHAR;
1487          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1488            {            {
1489  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1490            if (utf8 && c >= 128)            if (utf8 && d >= 128)
1491              {              {
1492  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1493              if (ucp_findchar(d, &chartype, &otherd) < 0) otherd = -1;              otherd = _pcre_ucp_othercase(d);
1494  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
1495              }              }
1496            else            else
# Line 1104  for (;;) Line 1498  for (;;)
1498            otherd = fcc[d];            otherd = fcc[d];
1499            }            }
1500          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1501            { count++; ADD_NEW(state_offset, count); }            {
1502              if (count > 0 &&
1503                  (codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS))
1504                {
1505                active_count--;             /* Remove non-match possibility */
1506                next_active_state--;
1507                }
1508              count++;
1509              ADD_NEW(state_offset, count);
1510              }
1511          }          }
1512        break;        break;
1513    
1514        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1515        case OP_QUERY:        case OP_QUERY:
1516        case OP_MINQUERY:        case OP_MINQUERY:
1517          case OP_POSQUERY:
1518        case OP_NOTQUERY:        case OP_NOTQUERY:
1519        case OP_NOTMINQUERY:        case OP_NOTMINQUERY:
1520          case OP_NOTPOSQUERY:
1521        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
1522        if (clen > 0)        if (clen > 0)
1523          {          {
1524          int otherd = -1;          unsigned int otherd = NOTACHAR;
1525          if ((ims && PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1526            {            {
1527  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1528            if (utf8 && c >= 128)            if (utf8 && d >= 128)
1529              {              {
1530  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1531              if (ucp_findchar(c, &chartype, &otherd) < 0) otherd = -1;              otherd = _pcre_ucp_othercase(d);
1532  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
1533              }              }
1534            else            else
# Line 1131  for (;;) Line 1536  for (;;)
1536            otherd = fcc[d];            otherd = fcc[d];
1537            }            }
1538          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1539            { ADD_NEW(state_offset + dlen + 1, 0); }            {
1540              if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY)
1541                {
1542                active_count--;            /* Remove non-match possibility */
1543                next_active_state--;
1544                }
1545              ADD_NEW(state_offset + dlen + 1, 0);
1546              }
1547          }          }
1548        break;        break;
1549    
1550        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1551        case OP_STAR:        case OP_STAR:
1552        case OP_MINSTAR:        case OP_MINSTAR:
1553          case OP_POSSTAR:
1554        case OP_NOTSTAR:        case OP_NOTSTAR:
1555        case OP_NOTMINSTAR:        case OP_NOTMINSTAR:
1556          case OP_NOTPOSSTAR:
1557        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
1558        if (clen > 0)        if (clen > 0)
1559          {          {
1560          int otherd = -1;          unsigned int otherd = NOTACHAR;
1561          if ((ims && PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1562            {            {
1563  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1564            if (utf8 && c >= 128)            if (utf8 && d >= 128)
1565              {              {
1566  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1567              if (ucp_findchar(c, &chartype, &otherd) < 0) otherd = -1;              otherd = _pcre_ucp_othercase(d);
1568  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
1569              }              }
1570            else            else
# Line 1158  for (;;) Line 1572  for (;;)
1572            otherd = fcc[d];            otherd = fcc[d];
1573            }            }
1574          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1575            { ADD_NEW(state_offset, 0); }            {
1576              if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR)
1577                {
1578                active_count--;            /* Remove non-match possibility */
1579                next_active_state--;
1580                }
1581              ADD_NEW(state_offset, 0);
1582              }
1583          }          }
1584        break;        break;
1585    
1586        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1587        case OP_EXACT:        case OP_EXACT:
1588          case OP_NOTEXACT:
1589          count = current_state->count;  /* Number already matched */
1590          if (clen > 0)
1591            {
1592            unsigned int otherd = NOTACHAR;
1593            if ((ims & PCRE_CASELESS) != 0)
1594              {
1595    #ifdef SUPPORT_UTF8
1596              if (utf8 && d >= 128)
1597                {
1598    #ifdef SUPPORT_UCP
1599                otherd = _pcre_ucp_othercase(d);
1600    #endif  /* SUPPORT_UCP */
1601                }
1602              else
1603    #endif  /* SUPPORT_UTF8 */
1604              otherd = fcc[d];
1605              }
1606            if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1607              {
1608              if (++count >= GET2(code, 1))
1609                { ADD_NEW(state_offset + dlen + 3, 0); }
1610              else
1611                { ADD_NEW(state_offset, count); }
1612              }
1613            }
1614          break;
1615    
1616          /*-----------------------------------------------------------------*/
1617        case OP_UPTO:        case OP_UPTO:
1618        case OP_MINUPTO:        case OP_MINUPTO:
1619        case OP_NOTEXACT:        case OP_POSUPTO:
1620        case OP_NOTUPTO:        case OP_NOTUPTO:
1621        case OP_NOTMINUPTO:        case OP_NOTMINUPTO:
1622        if (codevalue != OP_EXACT && codevalue != OP_NOTEXACT)        case OP_NOTPOSUPTO:
1623          { ADD_ACTIVE(state_offset + dlen + 3, 0); }        ADD_ACTIVE(state_offset + dlen + 3, 0);
1624        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1625        if (clen > 0)        if (clen > 0)
1626          {          {
1627          int otherd = -1;          unsigned int otherd = NOTACHAR;
1628          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1629            {            {
1630  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1631            if (utf8 && c >= 128)            if (utf8 && d >= 128)
1632              {              {
1633  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1634              if (ucp_findchar(d, &chartype, &otherd) < 0) otherd = -1;              otherd = _pcre_ucp_othercase(d);
1635  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
1636              }              }
1637            else            else
# Line 1190  for (;;) Line 1640  for (;;)
1640            }            }
1641          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1642            {            {
1643              if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO)
1644                {
1645                active_count--;             /* Remove non-match possibility */
1646                next_active_state--;
1647                }
1648            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1649              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 3, 0); }
1650            else            else
# Line 1267  for (;;) Line 1722  for (;;)
1722              { ADD_ACTIVE(next_state_offset + 5, 0); }              { ADD_ACTIVE(next_state_offset + 5, 0); }
1723            if (isinclass)            if (isinclass)
1724              {              {
1725              if (++count >= GET2(ecode, 3))              int max = GET2(ecode, 3);
1726                if (++count >= max && max != 0)   /* Max 0 => no limit */
1727                { ADD_NEW(next_state_offset + 5, 0); }                { ADD_NEW(next_state_offset + 5, 0); }
1728              else              else
1729                { ADD_NEW(state_offset, count); }                { ADD_NEW(state_offset, count); }
# Line 1317  for (;;) Line 1773  for (;;)
1773    
1774        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1775        case OP_COND:        case OP_COND:
1776          case OP_SCOND:
1777          {          {
1778          int local_offsets[1000];          int local_offsets[1000];
1779          int local_workspace[1000];          int local_workspace[1000];
1780          int condcode = code[LINK_SIZE+1];          int condcode = code[LINK_SIZE+1];
1781    
1782          /* The only supported version of OP_CREF is for the value 0xffff, which          /* Back reference conditions are not supported */
1783          means "test if in a recursion". */  
1784            if (condcode == OP_CREF) return PCRE_ERROR_DFA_UCOND;
1785    
1786            /* The DEFINE condition is always false */
1787    
1788          if (condcode == OP_CREF)          if (condcode == OP_DEF)
1789              {
1790              ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0);
1791              }
1792    
1793            /* The only supported version of OP_RREF is for the value RREF_ANY,
1794            which means "test if in any recursion". We can't test for specifically
1795            recursed groups. */
1796    
1797            else if (condcode == OP_RREF)
1798            {            {
1799            int value = GET2(code, LINK_SIZE+2);            int value = GET2(code, LINK_SIZE+2);
1800            if (value != 0xffff) return PCRE_ERROR_DFA_UCOND;            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
1801            if (recursing > 0) { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }            if (recursing > 0) { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }
1802              else { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }              else { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }
1803            }            }
# Line 1424  for (;;) Line 1893  for (;;)
1893        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1894        case OP_ONCE:        case OP_ONCE:
1895          {          {
         const uschar *endcode;  
1896          int local_offsets[2];          int local_offsets[2];
1897          int local_workspace[1000];          int local_workspace[1000];
1898    
# Line 1446  for (;;) Line 1914  for (;;)
1914            const uschar *end_subpattern = code;            const uschar *end_subpattern = code;
1915            int charcount = local_offsets[1] - local_offsets[0];            int charcount = local_offsets[1] - local_offsets[0];
1916            int next_state_offset, repeat_state_offset;            int next_state_offset, repeat_state_offset;
           BOOL is_repeated;  
1917    
1918            do { end_subpattern += GET(end_subpattern, 1); }            do { end_subpattern += GET(end_subpattern, 1); }
1919              while (*end_subpattern == OP_ALT);              while (*end_subpattern == OP_ALT);
# Line 1521  for (;;) Line 1988  for (;;)
1988          cb.version          = 1;   /* Version 1 of the callout block */          cb.version          = 1;   /* Version 1 of the callout block */
1989          cb.callout_number   = code[1];          cb.callout_number   = code[1];
1990          cb.offset_vector    = offsets;          cb.offset_vector    = offsets;
1991          cb.subject          = (char *)start_subject;          cb.subject          = (PCRE_SPTR)start_subject;
1992          cb.subject_length   = end_subject - start_subject;          cb.subject_length   = end_subject - start_subject;
1993          cb.start_match      = current_subject - start_subject;          cb.start_match      = current_subject - start_subject;
1994          cb.current_position = ptr - start_subject;          cb.current_position = ptr - start_subject;
# Line 1569  for (;;) Line 2036  for (;;)
2036      DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"      DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
2037        "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,        "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
2038        rlevel*2-2, SP));        rlevel*2-2, SP));
2039      return match_count;      break;        /* In effect, "return", but see the comment below */
2040      }      }
2041    
2042    /* One or more states are active for the next character. */    /* One or more states are active for the next character. */
# Line 1577  for (;;) Line 2044  for (;;)
2044    ptr += clen;    /* Advance to next subject character */    ptr += clen;    /* Advance to next subject character */
2045    }               /* Loop to move along the subject string */    }               /* Loop to move along the subject string */
2046    
2047  /* Control never gets here, but we must keep the compiler happy. */  /* Control gets here from "break" a few lines above. We do it this way because
2048    if we use "return" above, we have compiler trouble. Some compilers warn if
2049    there's nothing here because they think the function doesn't return a value. On
2050    the other hand, if we put a dummy statement here, some more clever compilers
2051    complain that it can't be reached. Sigh. */
2052    
2053  DPRINTF(("%.*s+++ Unexpected end of internal_dfa_exec %d +++\n"  return match_count;
   "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, rlevel*2-2, SP));  
 return PCRE_ERROR_NOMATCH;  
2054  }  }
2055    
2056    
# Line 1597  is not anchored. Line 2066  is not anchored.
2066    
2067  Arguments:  Arguments:
2068    argument_re     points to the compiled expression    argument_re     points to the compiled expression
2069    extra_data      points to extra data or is NULL (not currently used)    extra_data      points to extra data or is NULL
2070    subject         points to the subject string    subject         points to the subject string
2071    length          length of subject string (may contain binary zeros)    length          length of subject string (may contain binary zeros)
2072    start_offset    where to start in the subject string    start_offset    where to start in the subject string
# Line 1613  Returns:          > 0 => number of match Line 2082  Returns:          > 0 => number of match
2082                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
2083  */  */
2084    
2085  EXPORT int  PCRE_EXP_DEFN int
2086  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
2087    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
2088    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
2089  {  {
2090  real_pcre *re = (real_pcre *)argument_re;  real_pcre *re = (real_pcre *)argument_re;
2091  dfa_match_data match_block;  dfa_match_data match_block;
2092    dfa_match_data *md = &match_block;
2093  BOOL utf8, anchored, startline, firstline;  BOOL utf8, anchored, startline, firstline;
2094  const uschar *current_subject, *end_subject, *lcc;  const uschar *current_subject, *end_subject, *lcc;
2095    
# Line 1634  BOOL req_byte_caseless = FALSE; Line 2104  BOOL req_byte_caseless = FALSE;
2104  int first_byte = -1;  int first_byte = -1;
2105  int req_byte = -1;  int req_byte = -1;
2106  int req_byte2 = -1;  int req_byte2 = -1;
2107    int newline;
2108    
2109  /* Plausibility checks */  /* Plausibility checks */
2110    
# Line 1648  flipping, so we scan the extra_data bloc Line 2119  flipping, so we scan the extra_data bloc
2119  match block, so we must initialize them beforehand. However, the other fields  match block, so we must initialize them beforehand. However, the other fields
2120  in the match block must not be set until after the byte flipping. */  in the match block must not be set until after the byte flipping. */
2121    
2122  match_block.tables = re->tables;  md->tables = re->tables;
2123  match_block.callout_data = NULL;  md->callout_data = NULL;
2124    
2125  if (extra_data != NULL)  if (extra_data != NULL)
2126    {    {
# Line 1657  if (extra_data != NULL) Line 2128  if (extra_data != NULL)
2128    if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)    if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
2129      study = (const pcre_study_data *)extra_data->study_data;      study = (const pcre_study_data *)extra_data->study_data;
2130    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;
2131      if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
2132        return PCRE_ERROR_DFA_UMLIMIT;
2133    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
2134      match_block.callout_data = extra_data->callout_data;      md->callout_data = extra_data->callout_data;
2135    if ((flags & PCRE_EXTRA_TABLES) != 0)    if ((flags & PCRE_EXTRA_TABLES) != 0)
2136      match_block.tables = extra_data->tables;      md->tables = extra_data->tables;
2137    }    }
2138    
2139  /* Check that the first field in the block is the magic number. If it is not,  /* Check that the first field in the block is the magic number. If it is not,
# Line 1681  current_subject = (const unsigned char * Line 2154  current_subject = (const unsigned char *
2154  end_subject = (const unsigned char *)subject + length;  end_subject = (const unsigned char *)subject + length;
2155  req_byte_ptr = current_subject - 1;  req_byte_ptr = current_subject - 1;
2156    
2157    #ifdef SUPPORT_UTF8
2158  utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = (re->options & PCRE_UTF8) != 0;
2159  anchored = (options & PCRE_ANCHORED) != 0 || (re->options & PCRE_ANCHORED) != 0;  #else
2160    utf8 = FALSE;
2161    #endif
2162    
2163    anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
2164      (re->options & PCRE_ANCHORED) != 0;
2165    
2166  /* The remaining fixed data for passing around. */  /* The remaining fixed data for passing around. */
2167    
2168  match_block.start_code = (const uschar *)argument_re +  md->start_code = (const uschar *)argument_re +
2169      re->name_table_offset + re->name_count * re->name_entry_size;      re->name_table_offset + re->name_count * re->name_entry_size;
2170  match_block.start_subject = (const unsigned char *)subject;  md->start_subject = (const unsigned char *)subject;
2171  match_block.end_subject = end_subject;  md->end_subject = end_subject;
2172  match_block.moptions = options;  md->moptions = options;
2173  match_block.poptions = re->options;  md->poptions = re->options;
2174    
2175    /* Handle different types of newline. The three bits give eight cases. If
2176    nothing is set at run time, whatever was used at compile time applies. */
2177    
2178    switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
2179             PCRE_NEWLINE_BITS)
2180      {
2181      case 0: newline = NEWLINE; break;   /* Compile-time default */
2182      case PCRE_NEWLINE_CR: newline = '\r'; break;
2183      case PCRE_NEWLINE_LF: newline = '\n'; break;
2184      case PCRE_NEWLINE_CR+
2185           PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
2186      case PCRE_NEWLINE_ANY: newline = -1; break;
2187      case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
2188      default: return PCRE_ERROR_BADNEWLINE;
2189      }
2190    
2191    if (newline == -2)
2192      {
2193      md->nltype = NLTYPE_ANYCRLF;
2194      }
2195    else if (newline < 0)
2196      {
2197      md->nltype = NLTYPE_ANY;
2198      }
2199    else
2200      {
2201      md->nltype = NLTYPE_FIXED;
2202      if (newline > 255)
2203        {
2204        md->nllen = 2;
2205        md->nl[0] = (newline >> 8) & 255;
2206        md->nl[1] = newline & 255;
2207        }
2208      else
2209        {
2210        md->nllen = 1;
2211        md->nl[0] = newline;
2212        }
2213      }
2214    
2215  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
2216  back the character offset. */  back the character offset. */
# Line 1717  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 2236  if (utf8 && (options & PCRE_NO_UTF8_CHEC
2236  is a feature that makes it possible to save compiled regex and re-use them  is a feature that makes it possible to save compiled regex and re-use them
2237  in other programs later. */  in other programs later. */
2238    
2239  if (match_block.tables == NULL) match_block.tables = _pcre_default_tables;  if (md->tables == NULL) md->tables = _pcre_default_tables;
2240    
2241  /* The lower casing table and the "must be at the start of a line" flag are  /* The lower casing table and the "must be at the start of a line" flag are
2242  used in a loop when finding where to start. */  used in a loop when finding where to start. */
2243    
2244  lcc = match_block.tables + lcc_offset;  lcc = md->tables + lcc_offset;
2245  startline = (re->options & PCRE_STARTLINE) != 0;  startline = (re->options & PCRE_STARTLINE) != 0;
2246  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
2247    
# Line 1755  if ((re->options & PCRE_REQCHSET) != 0) Line 2274  if ((re->options & PCRE_REQCHSET) != 0)
2274    {    {
2275    req_byte = re->req_byte & 255;    req_byte = re->req_byte & 255;
2276    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
2277    req_byte2 = (match_block.tables + fcc_offset)[req_byte];  /* case flipped */    req_byte2 = (md->tables + fcc_offset)[req_byte];  /* case flipped */
2278    }    }
2279    
2280  /* Call the main matching function, looping for a non-anchored regex after a  /* Call the main matching function, looping for a non-anchored regex after a
# Line 1773  for (;;) Line 2292  for (;;)
2292    
2293      /* Advance to a unique first char if possible. If firstline is TRUE, the      /* Advance to a unique first char if possible. If firstline is TRUE, the
2294      start of the match is constrained to the first line of a multiline string.      start of the match is constrained to the first line of a multiline string.
2295      Implement this by temporarily adjusting end_subject so that we stop scanning      Implement this by temporarily adjusting end_subject so that we stop
2296      at a newline. If the match fails at the newline, later code breaks this loop.      scanning at a newline. If the match fails at the newline, later code breaks
2297      */      this loop. */
2298    
2299      if (firstline)      if (firstline)
2300        {        {
2301        const uschar *t = current_subject;        const uschar *t = current_subject;
2302        while (t < save_end_subject && *t != '\n') t++;        while (t < md->end_subject && !IS_NEWLINE(t)) t++;
2303        end_subject = t;        end_subject = t;
2304        }        }
2305    
# Line 1795  for (;;) Line 2314  for (;;)
2314            current_subject++;            current_subject++;
2315        }        }
2316    
2317      /* Or to just after \n for a multiline match if possible */      /* Or to just after a linebreak for a multiline match if possible */
2318    
2319      else if (startline)      else if (startline)
2320        {        {
2321        if (current_subject > match_block.start_subject + start_offset)        if (current_subject > md->start_subject + start_offset)
2322          {          {
2323          while (current_subject < end_subject && current_subject[-1] != NEWLINE)          while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))
2324              current_subject++;
2325    
2326            /* If we have just passed a CR and the newline option is ANY or
2327            ANYCRLF, and we are now at a LF, advance the match position by one more
2328            character. */
2329    
2330            if (current_subject[-1] == '\r' &&
2331                 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
2332                 current_subject < end_subject &&
2333                 *current_subject == '\n')
2334            current_subject++;            current_subject++;
2335          }          }
2336        }        }
# Line 1882  for (;;) Line 2411  for (;;)
2411    /* OK, now we can do the business */    /* OK, now we can do the business */
2412    
2413    rc = internal_dfa_exec(    rc = internal_dfa_exec(
2414      &match_block,                              /* fixed match data */      md,                                /* fixed match data */
2415      match_block.start_code,                    /* this subexpression's code */      md->start_code,                    /* this subexpression's code */
2416      current_subject,                           /* where we currently are */      current_subject,                   /* where we currently are */
2417      start_offset,                              /* start offset in subject */      start_offset,                      /* start offset in subject */
2418      offsets,                                   /* offset vector */      offsets,                           /* offset vector */
2419      offsetcount,                               /* size of same */      offsetcount,                       /* size of same */
2420      workspace,                                 /* workspace vector */      workspace,                         /* workspace vector */
2421      wscount,                                   /* size of same */      wscount,                           /* size of same */
2422      re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL), /* ims flags */      re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL), /* ims flags */
2423      0,                                         /* function recurse level */      0,                                 /* function recurse level */
2424      0);                                        /* regex recurse level */      0);                                /* regex recurse level */
2425    
2426    /* Anything other than "no match" means we are done, always; otherwise, carry    /* Anything other than "no match" means we are done, always; otherwise, carry
2427    on only if not anchored. */    on only if not anchored. */
# Line 1902  for (;;) Line 2431  for (;;)
2431    /* Advance to the next subject character unless we are at the end of a line    /* Advance to the next subject character unless we are at the end of a line
2432    and firstline is set. */    and firstline is set. */
2433    
2434    if (firstline && *current_subject == NEWLINE) break;    if (firstline && IS_NEWLINE(current_subject)) break;
2435    current_subject++;    current_subject++;
   
 #ifdef SUPPORT_UTF8  
2436    if (utf8)    if (utf8)
2437      {      {
2438      while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)      while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)
2439        current_subject++;        current_subject++;
2440      }      }
 #endif  
   
2441    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
2442    }  
2443      /* If we have just passed a CR and the newline option is CRLF or ANY or
2444      ANYCRLF, and we are now at a LF, advance the match position by one more
2445      character. */
2446    
2447      if (current_subject[-1] == '\r' &&
2448           (md->nltype == NLTYPE_ANY ||
2449            md->nltype == NLTYPE_ANYCRLF ||
2450            md->nllen == 2) &&
2451           current_subject < end_subject &&
2452           *current_subject == '\n')
2453        current_subject++;
2454    
2455      }   /* "Bumpalong" loop */
2456    
2457  return PCRE_ERROR_NOMATCH;  return PCRE_ERROR_NOMATCH;
2458  }  }

Legend:
Removed from v.77  
changed lines
  Added in v.168

  ViewVC Help
Powered by ViewVC 1.1.5