/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 77 by nigel, Sat Feb 24 21:40:45 2007 UTC revision 152 by ph10, Tue Apr 17 15:55:53 2007 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2005 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 39  POSSIBILITY OF SUCH DAMAGE. Line 39  POSSIBILITY OF SUCH DAMAGE.
39    
40    
41  /* This module contains the external function pcre_dfa_exec(), which is an  /* This module contains the external function pcre_dfa_exec(), which is an
42  alternative matching function that uses a DFA algorithm. This is NOT Perl-  alternative matching function that uses a sort of DFA algorithm (not a true
43  compatible, but it has advantages in certain applications. */  FSM). This is NOT Perl- compatible, but it has advantages in certain
44    applications. */
45    
46    
47    #define NLBLOCK md             /* Block containing newline information */
48    #define PSSTART start_subject  /* Field containing processed string start */
49    #define PSEND   end_subject    /* Field containing processed string end */
50    
51  #include "pcre_internal.h"  #include "pcre_internal.h"
52    
53    
# Line 57  compatible, but it has advantages in cer Line 62  compatible, but it has advantages in cer
62  *************************************************/  *************************************************/
63    
64  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
65  into others, under special conditions. A gap of 10 between the blocks should be  into others, under special conditions. A gap of 20 between the blocks should be
66  enough. */  enough. */
67    
68  #define OP_PROP_EXTRA    (EXTRACT_BASIC_MAX+1)  #define OP_PROP_EXTRA 100
69  #define OP_EXTUNI_EXTRA  (EXTRACT_BASIC_MAX+11)  #define OP_EXTUNI_EXTRA 120
70    #define OP_ANYNL_EXTRA 140
71    
72    
73  /* This table identifies those opcodes that are followed immediately by a  /* This table identifies those opcodes that are followed immediately by a
# Line 74  static uschar coptable[] = { Line 80  static uschar coptable[] = {
80    0,                             /* End                                    */    0,                             /* End                                    */
81    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* \A, \G, \B, \b, \D, \d, \S, \s, \W, \w */    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* \A, \G, \B, \b, \D, \d, \S, \s, \W, \w */
82    0, 0,                          /* Any, Anybyte                           */    0, 0,                          /* Any, Anybyte                           */
83    0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */    0, 0, 0, 0,                    /* NOTPROP, PROP, EXTUNI, ANYNL           */
84    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */
85    1,                             /* Char                                   */    1,                             /* Char                                   */
86    1,                             /* Charnc                                 */    1,                             /* Charnc                                 */
# Line 82  static uschar coptable[] = { Line 88  static uschar coptable[] = {
88    /* Positive single-char repeats                                          */    /* Positive single-char repeats                                          */
89    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
90    3, 3, 3,                       /* upto, minupto, exact                   */    3, 3, 3,                       /* upto, minupto, exact                   */
91      1, 1, 1, 3,                    /* *+, ++, ?+, upto+                      */
92    /* Negative single-char repeats - only for chars < 256                   */    /* Negative single-char repeats - only for chars < 256                   */
93    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
94    3, 3, 3,                       /* NOT upto, minupto, exact               */    3, 3, 3,                       /* NOT upto, minupto, exact               */
95      1, 1, 1, 3,                    /* NOT *+, ++, ?+, updo+                  */
96    /* Positive type repeats                                                 */    /* Positive type repeats                                                 */
97    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
98    3, 3, 3,                       /* Type upto, minupto, exact              */    3, 3, 3,                       /* Type upto, minupto, exact              */
99      1, 1, 1, 3,                    /* Type *+, ++, ?+, upto+                 */
100    /* Character class & ref repeats                                         */    /* Character class & ref repeats                                         */
101    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */
102    0, 0,                          /* CRRANGE, CRMINRANGE                    */    0, 0,                          /* CRRANGE, CRMINRANGE                    */
# Line 106  static uschar coptable[] = { Line 115  static uschar coptable[] = {
115    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
116    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
117    0,                             /* Reverse                                */    0,                             /* Reverse                                */
118    0,                             /* Once                                   */    0, 0, 0, 0,                    /* ONCE, BRA, CBRA, COND                  */
119    0,                             /* COND                                   */    0, 0, 0,                       /* SBRA, SCBRA, SCOND                     */
120    0,                             /* CREF                                   */    0,                             /* CREF                                   */
121    0, 0,                          /* BRAZERO, BRAMINZERO                    */    0,                             /* RREF                                   */
122    0,                             /* BRANUMBER                              */    0,                             /* DEF                                    */
123    0                              /* BRA                                    */    0, 0                           /* BRAZERO, BRAMINZERO                    */
124  };  };
125    
126  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
# Line 277  stateblock *next_active_state, *next_new Line 286  stateblock *next_active_state, *next_new
286    
287  const uschar *ctypes, *lcc, *fcc;  const uschar *ctypes, *lcc, *fcc;
288  const uschar *ptr;  const uschar *ptr;
289  const uschar *end_code;  const uschar *end_code, *first_op;
290    
291  int active_count, new_count, match_count;  int active_count, new_count, match_count;
292    
# Line 288  const uschar *start_subject = md->start_ Line 297  const uschar *start_subject = md->start_
297  const uschar *end_subject = md->end_subject;  const uschar *end_subject = md->end_subject;
298  const uschar *start_code = md->start_code;  const uschar *start_code = md->start_code;
299    
300    #ifdef SUPPORT_UTF8
301  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;
302    #else
303    BOOL utf8 = FALSE;
304    #endif
305    
306  rlevel++;  rlevel++;
307  offsetcount &= (-2);  offsetcount &= (-2);
# Line 311  active_states = (stateblock *)(workspace Line 324  active_states = (stateblock *)(workspace
324  next_new_state = new_states = active_states + wscount;  next_new_state = new_states = active_states + wscount;
325  new_count = 0;  new_count = 0;
326    
327    first_op = this_start_code + 1 + LINK_SIZE +
328      ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
329    
330  /* The first thing in any (sub) pattern is a bracket of some sort. Push all  /* The first thing in any (sub) pattern is a bracket of some sort. Push all
331  the alternative states onto the list, and find out where the end is. This  the alternative states onto the list, and find out where the end is. This
332  makes is possible to use this function recursively, when we want to stop at a  makes is possible to use this function recursively, when we want to stop at a
# Line 320  If the first opcode in the first alterna Line 336  If the first opcode in the first alterna
336  a backward assertion. In that case, we have to find out the maximum amount to  a backward assertion. In that case, we have to find out the maximum amount to
337  move back, and set up each alternative appropriately. */  move back, and set up each alternative appropriately. */
338    
339  if (this_start_code[1+LINK_SIZE] == OP_REVERSE)  if (*first_op == OP_REVERSE)
340    {    {
341    int max_back = 0;    int max_back = 0;
342    int gone_back;    int gone_back;
# Line 402  else Line 418  else
418    
419    else    else
420      {      {
421        int length = 1 + LINK_SIZE +
422          ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);
423      do      do
424        {        {
425        ADD_NEW(end_code - start_code + 1 + LINK_SIZE, 0);        ADD_NEW(end_code - start_code + length, 0);
426        end_code += GET(end_code, 1);        end_code += GET(end_code, 1);
427          length = 1 + LINK_SIZE;
428        }        }
429      while (*end_code == OP_ALT);      while (*end_code == OP_ALT);
430      }      }
# Line 421  ptr = current_subject; Line 440  ptr = current_subject;
440  for (;;)  for (;;)
441    {    {
442    int i, j;    int i, j;
443    int c, d, clen, dlen;    int clen, dlen;
444      unsigned int c, d;
445    
446    /* Make the new state list into the active state list and empty the    /* Make the new state list into the active state list and empty the
447    new state list. */    new state list. */
# Line 457  for (;;) Line 477  for (;;)
477    
478    if (ptr < end_subject)    if (ptr < end_subject)
479      {      {
480      clen = 1;      clen = 1;        /* Number of bytes in the character */
481  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
482      if (utf8) { GETCHARLEN(c, ptr, clen); } else      if (utf8) { GETCHARLEN(c, ptr, clen); } else
483  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF8 */
# Line 465  for (;;) Line 485  for (;;)
485      }      }
486    else    else
487      {      {
488      clen = 0;    /* At end subject */      clen = 0;        /* This indicates the end of the subject */
489      c = -1;      c = NOTACHAR;    /* This value should never actually be used */
490      }      }
491    
492    /* Scan up the active states and act on each one. The result of an action    /* Scan up the active states and act on each one. The result of an action
# Line 480  for (;;) Line 500  for (;;)
500      const uschar *code;      const uschar *code;
501      int state_offset = current_state->offset;      int state_offset = current_state->offset;
502      int count, codevalue;      int count, codevalue;
503      int chartype, othercase;  #ifdef SUPPORT_UCP
504        int chartype, script;
505    #endif
506    
507  #ifdef DEBUG  #ifdef DEBUG
508      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
509      if (c < 0) printf("-1\n");      if (clen == 0) printf("EOL\n");
510        else if (c > 32 && c < 127) printf("'%c'\n", c);        else if (c > 32 && c < 127) printf("'%c'\n", c);
511          else printf("0x%02x\n", c);          else printf("0x%02x\n", c);
512  #endif  #endif
# Line 528  for (;;) Line 550  for (;;)
550    
551      code = start_code + state_offset;      code = start_code + state_offset;
552      codevalue = *code;      codevalue = *code;
     if (codevalue >= OP_BRA) codevalue = OP_BRA; /* All brackets are equal */  
553    
554      /* If this opcode is followed by an inline character, load it. It is      /* If this opcode is followed by an inline character, load it. It is
555      tempting to test for the presence of a subject character here, but that      tempting to test for the presence of a subject character here, but that
# Line 550  for (;;) Line 571  for (;;)
571        d = code[coptable[codevalue]];        d = code[coptable[codevalue]];
572        if (codevalue >= OP_TYPESTAR)        if (codevalue >= OP_TYPESTAR)
573          {          {
574          if (d == OP_ANYBYTE) return PCRE_ERROR_DFA_UITEM;          switch(d)
575          if (d >= OP_NOTPROP)            {
576            codevalue += (d == OP_EXTUNI)? OP_EXTUNI_EXTRA : OP_PROP_EXTRA;            case OP_ANYBYTE: return PCRE_ERROR_DFA_UITEM;
577              case OP_NOTPROP:
578              case OP_PROP: codevalue += OP_PROP_EXTRA; break;
579              case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
580              case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
581              default: break;
582              }
583          }          }
584        }        }
585      else      else
586        {        {
587        dlen = 0;         /* Not strictly necessary, but compilers moan */        dlen = 0;         /* Not strictly necessary, but compilers moan */
588        d = -1;           /* if these variables are not set. */        d = NOTACHAR;     /* if these variables are not set. */
589        }        }
590    
591    
# Line 620  for (;;) Line 647  for (;;)
647    
648        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
649        case OP_BRA:        case OP_BRA:
650          case OP_SBRA:
651        do        do
652          {          {
653          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
# Line 629  for (;;) Line 657  for (;;)
657        break;        break;
658    
659        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
660          case OP_CBRA:
661          case OP_SCBRA:
662          ADD_ACTIVE(code - start_code + 3 + LINK_SIZE,  0);
663          code += GET(code, 1);
664          while (*code == OP_ALT)
665            {
666            ADD_ACTIVE(code - start_code + 1 + LINK_SIZE,  0);
667            code += GET(code, 1);
668            }
669          break;
670    
671          /*-----------------------------------------------------------------*/
672        case OP_BRAZERO:        case OP_BRAZERO:
673        case OP_BRAMINZERO:        case OP_BRAMINZERO:
674        ADD_ACTIVE(state_offset + 1, 0);        ADD_ACTIVE(state_offset + 1, 0);
# Line 638  for (;;) Line 678  for (;;)
678        break;        break;
679    
680        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
       case OP_BRANUMBER:  
       ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);  
       break;  
   
       /*-----------------------------------------------------------------*/  
681        case OP_CIRC:        case OP_CIRC:
682        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
683            ((ims & PCRE_MULTILINE) != 0 && ptr[-1] == NEWLINE))            ((ims & PCRE_MULTILINE) != 0 &&
684                ptr != end_subject &&
685                WAS_NEWLINE(ptr)))
686          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
687        break;        break;
688    
# Line 679  for (;;) Line 716  for (;;)
716    
717        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
718        case OP_ANY:        case OP_ANY:
719        if (clen > 0 && (c != NEWLINE || (ims & PCRE_DOTALL) != 0))        if (clen > 0 && ((ims & PCRE_DOTALL) != 0 || !IS_NEWLINE(ptr)))
720          { ADD_NEW(state_offset + 1, 0); }          { ADD_NEW(state_offset + 1, 0); }
721        break;        break;
722    
723        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
724        case OP_EODN:        case OP_EODN:
725        if (clen == 0 || (c == NEWLINE && ptr + 1 == end_subject))        if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
726          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
727        break;        break;
728    
# Line 693  for (;;) Line 730  for (;;)
730        case OP_DOLL:        case OP_DOLL:
731        if ((md->moptions & PCRE_NOTEOL) == 0)        if ((md->moptions & PCRE_NOTEOL) == 0)
732          {          {
733          if (clen == 0 || (c == NEWLINE && (ptr + 1 == end_subject ||          if (clen == 0 ||
734                                  (ims & PCRE_MULTILINE) != 0)))              (IS_NEWLINE(ptr) &&
735                   ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)
736                ))
737            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
738          }          }
739        else if (c == NEWLINE && (ims & PCRE_MULTILINE) != 0)        else if ((ims & PCRE_MULTILINE) != 0 && IS_NEWLINE(ptr))
740          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
741        break;        break;
742    
# Line 746  for (;;) Line 785  for (;;)
785        break;        break;
786    
787    
 #ifdef SUPPORT_UCP  
   
788        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
789        /* Check the next character by Unicode property. We will get here only        /* Check the next character by Unicode property. We will get here only
790        if the support is in the binary; otherwise a compile-time error occurs.        if the support is in the binary; otherwise a compile-time error occurs.
791        */        */
792    
793    #ifdef SUPPORT_UCP
794        case OP_PROP:        case OP_PROP:
795        case OP_NOTPROP:        case OP_NOTPROP:
796        if (clen > 0)        if (clen > 0)
797          {          {
798          int rqdtype, category;          BOOL OK;
799          category = ucp_findchar(c, &chartype, &othercase);          int category = _pcre_ucp_findprop(c, &chartype, &script);
800          rqdtype = code[1];          switch(code[1])
         if (rqdtype >= 128)  
           {  
           if ((rqdtype - 128 == category) == (codevalue == OP_PROP))  
             { ADD_NEW(state_offset + 2, 0); }  
           }  
         else  
801            {            {
802            if ((rqdtype == chartype) == (codevalue == OP_PROP))            case PT_ANY:
803              { ADD_NEW(state_offset + 2, 0); }            OK = TRUE;
804              break;
805    
806              case PT_LAMP:
807              OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
808              break;
809    
810              case PT_GC:
811              OK = category == code[2];
812              break;
813    
814              case PT_PC:
815              OK = chartype == code[2];
816              break;
817    
818              case PT_SC:
819              OK = script == code[2];
820              break;
821    
822              /* Should never occur, but keep compilers from grumbling. */
823    
824              default:
825              OK = codevalue != OP_PROP;
826              break;
827            }            }
828    
829            if (OK == (codevalue == OP_PROP)) { ADD_NEW(state_offset + 3, 0); }
830          }          }
831        break;        break;
832  #endif  #endif
# Line 784  for (;;) Line 841  for (;;)
841    
842        case OP_TYPEPLUS:        case OP_TYPEPLUS:
843        case OP_TYPEMINPLUS:        case OP_TYPEMINPLUS:
844          case OP_TYPEPOSPLUS:
845        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
846        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
847        if (clen > 0)        if (clen > 0)
848          {          {
849          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
850              (c < 256 &&              (c < 256 &&
851                (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&                (d != OP_ANY ||
852                   (ims & PCRE_DOTALL) != 0 ||
853                   !IS_NEWLINE(ptr)
854                  ) &&
855                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
856            {            {
857              if (count > 0 && codevalue == OP_TYPEPOSPLUS)
858                {
859                active_count--;            /* Remove non-match possibility */
860                next_active_state--;
861                }
862            count++;            count++;
863            ADD_NEW(state_offset, count);            ADD_NEW(state_offset, count);
864            }            }
# Line 802  for (;;) Line 868  for (;;)
868        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
869        case OP_TYPEQUERY:        case OP_TYPEQUERY:
870        case OP_TYPEMINQUERY:        case OP_TYPEMINQUERY:
871          case OP_TYPEPOSQUERY:
872        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
873        if (clen > 0)        if (clen > 0)
874          {          {
875          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
876              (c < 256 &&              (c < 256 &&
877                (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&                (d != OP_ANY ||
878                   (ims & PCRE_DOTALL) != 0 ||
879                   !IS_NEWLINE(ptr)
880                  ) &&
881                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
882            {            {
883              if (codevalue == OP_TYPEPOSQUERY)
884                {
885                active_count--;            /* Remove non-match possibility */
886                next_active_state--;
887                }
888            ADD_NEW(state_offset + 2, 0);            ADD_NEW(state_offset + 2, 0);
889            }            }
890          }          }
# Line 818  for (;;) Line 893  for (;;)
893        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
894        case OP_TYPESTAR:        case OP_TYPESTAR:
895        case OP_TYPEMINSTAR:        case OP_TYPEMINSTAR:
896          case OP_TYPEPOSSTAR:
897        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
898        if (clen > 0)        if (clen > 0)
899          {          {
900          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
901              (c < 256 &&              (c < 256 &&
902                (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&                (d != OP_ANY ||
903                   (ims & PCRE_DOTALL) != 0 ||
904                   !IS_NEWLINE(ptr)
905                  ) &&
906                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
907            {            {
908              if (codevalue == OP_TYPEPOSSTAR)
909                {
910                active_count--;            /* Remove non-match possibility */
911                next_active_state--;
912                }
913            ADD_NEW(state_offset, 0);            ADD_NEW(state_offset, 0);
914            }            }
915          }          }
# Line 833  for (;;) Line 917  for (;;)
917    
918        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
919        case OP_TYPEEXACT:        case OP_TYPEEXACT:
920          count = current_state->count;  /* Number already matched */
921          if (clen > 0)
922            {
923            if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
924                (c < 256 &&
925                  (d != OP_ANY ||
926                   (ims & PCRE_DOTALL) != 0 ||
927                   !IS_NEWLINE(ptr)
928                  ) &&
929                  ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
930              {
931              if (++count >= GET2(code, 1))
932                { ADD_NEW(state_offset + 4, 0); }
933              else
934                { ADD_NEW(state_offset, count); }
935              }
936            }
937          break;
938    
939          /*-----------------------------------------------------------------*/
940        case OP_TYPEUPTO:        case OP_TYPEUPTO:
941        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
942        if (codevalue != OP_TYPEEXACT)        case OP_TYPEPOSUPTO:
943          { ADD_ACTIVE(state_offset + 4, 0); }        ADD_ACTIVE(state_offset + 4, 0);
944        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
945        if (clen > 0)        if (clen > 0)
946          {          {
947          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
948              (c < 256 &&              (c < 256 &&
949                (d != OP_ANY || c != '\n' || (ims & PCRE_DOTALL) != 0) &&                (d != OP_ANY ||
950                   (ims & PCRE_DOTALL) != 0 ||
951                   !IS_NEWLINE(ptr)
952                  ) &&
953                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
954            {            {
955              if (codevalue == OP_TYPEPOSUPTO)
956                {
957                active_count--;           /* Remove non-match possibility */
958                next_active_state--;
959                }
960            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
961              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 4, 0); }
962            else            else
# Line 855  for (;;) Line 967  for (;;)
967    
968  /* ========================================================================== */  /* ========================================================================== */
969        /* These are virtual opcodes that are used when something like        /* These are virtual opcodes that are used when something like
970        OP_TYPEPLUS has OP_PROP, OP_NOTPROP, or OP_EXTUNI as its argument. It        OP_TYPEPLUS has OP_PROP, OP_NOTPROP, OP_ANYNL, or OP_EXTUNI as its
971        keeps the code above fast for the other cases. The argument is in the        argument. It keeps the code above fast for the other cases. The argument
972        d variable. */        is in the d variable. */
973    
974    #ifdef SUPPORT_UCP
975        case OP_PROP_EXTRA + OP_TYPEPLUS:        case OP_PROP_EXTRA + OP_TYPEPLUS:
976        case OP_PROP_EXTRA + OP_TYPEMINPLUS:        case OP_PROP_EXTRA + OP_TYPEMINPLUS:
977          case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
978        count = current_state->count;           /* Already matched */        count = current_state->count;           /* Already matched */
979        if (count > 0) { ADD_ACTIVE(state_offset + 3, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 4, 0); }
980        if (clen > 0)        if (clen > 0)
981          {          {
982          int category = ucp_findchar(c, &chartype, &othercase);          BOOL OK;
983          int rqdtype = code[2];          int category = _pcre_ucp_findprop(c, &chartype, &script);
984          if ((d == OP_PROP) ==          switch(code[2])
985              (rqdtype == ((rqdtype >= 128)? (category + 128) : chartype)))            {
986            { count++; ADD_NEW(state_offset, count); }            case PT_ANY:
987              OK = TRUE;
988              break;
989    
990              case PT_LAMP:
991              OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
992              break;
993    
994              case PT_GC:
995              OK = category == code[3];
996              break;
997    
998              case PT_PC:
999              OK = chartype == code[3];
1000              break;
1001    
1002              case PT_SC:
1003              OK = script == code[3];
1004              break;
1005    
1006              /* Should never occur, but keep compilers from grumbling. */
1007    
1008              default:
1009              OK = codevalue != OP_PROP;
1010              break;
1011              }
1012    
1013            if (OK == (d == OP_PROP))
1014              {
1015              if (count > 0 && codevalue == OP_PROP_EXTRA + OP_TYPEPOSPLUS)
1016                {
1017                active_count--;           /* Remove non-match possibility */
1018                next_active_state--;
1019                }
1020              count++;
1021              ADD_NEW(state_offset, count);
1022              }
1023          }          }
1024        break;        break;
1025    
1026        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1027        case OP_EXTUNI_EXTRA + OP_TYPEPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEPLUS:
1028        case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEMINPLUS:
1029          case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1030        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1031        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1032        if (clen > 0 && ucp_findchar(c, &chartype, &othercase) != ucp_M)        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
1033          {          {
1034          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1035          int ncount = 0;          int ncount = 0;
1036            if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
1037              {
1038              active_count--;           /* Remove non-match possibility */
1039              next_active_state--;
1040              }
1041          while (nptr < end_subject)          while (nptr < end_subject)
1042            {            {
1043            int nd;            int nd;
1044            int ndlen = 1;            int ndlen = 1;
1045            GETCHARLEN(nd, nptr, ndlen);            GETCHARLEN(nd, nptr, ndlen);
1046            if (ucp_findchar(nd, &chartype, &othercase) != ucp_M) break;            if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
1047            ncount++;            ncount++;
1048            nptr += ndlen;            nptr += ndlen;
1049            }            }
# Line 895  for (;;) Line 1051  for (;;)
1051          ADD_NEW_DATA(-state_offset, count, ncount);          ADD_NEW_DATA(-state_offset, count, ncount);
1052          }          }
1053        break;        break;
1054    #endif
1055    
1056          /*-----------------------------------------------------------------*/
1057          case OP_ANYNL_EXTRA + OP_TYPEPLUS:
1058          case OP_ANYNL_EXTRA + OP_TYPEMINPLUS:
1059          case OP_ANYNL_EXTRA + OP_TYPEPOSPLUS:
1060          count = current_state->count;  /* Already matched */
1061          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1062          if (clen > 0)
1063            {
1064            int ncount = 0;
1065            switch (c)
1066              {
1067              case 0x000d:
1068              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1069              /* Fall through */
1070              case 0x000a:
1071              case 0x000b:
1072              case 0x000c:
1073              case 0x0085:
1074              case 0x2028:
1075              case 0x2029:
1076              if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1077                {
1078                active_count--;           /* Remove non-match possibility */
1079                next_active_state--;
1080                }
1081              count++;
1082              ADD_NEW_DATA(-state_offset, count, ncount);
1083              break;
1084              default:
1085              break;
1086              }
1087            }
1088          break;
1089    
1090        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1091    #ifdef SUPPORT_UCP
1092        case OP_PROP_EXTRA + OP_TYPEQUERY:        case OP_PROP_EXTRA + OP_TYPEQUERY:
1093        case OP_PROP_EXTRA + OP_TYPEMINQUERY:        case OP_PROP_EXTRA + OP_TYPEMINQUERY:
1094        count = 3;        case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
1095          count = 4;
1096        goto QS1;        goto QS1;
1097    
1098        case OP_PROP_EXTRA + OP_TYPESTAR:        case OP_PROP_EXTRA + OP_TYPESTAR:
1099        case OP_PROP_EXTRA + OP_TYPEMINSTAR:        case OP_PROP_EXTRA + OP_TYPEMINSTAR:
1100          case OP_PROP_EXTRA + OP_TYPEPOSSTAR:
1101        count = 0;        count = 0;
1102    
1103        QS1:        QS1:
1104    
1105        ADD_ACTIVE(state_offset + 3, 0);        ADD_ACTIVE(state_offset + 4, 0);
1106        if (clen > 0)        if (clen > 0)
1107          {          {
1108          int category = ucp_findchar(c, &chartype, &othercase);          BOOL OK;
1109          int rqdtype = code[2];          int category = _pcre_ucp_findprop(c, &chartype, &script);
1110          if ((d == OP_PROP) ==          switch(code[2])
1111              (rqdtype == ((rqdtype >= 128)? (category + 128) : chartype)))            {
1112            { ADD_NEW(state_offset + count, 0); }            case PT_ANY:
1113              OK = TRUE;
1114              break;
1115    
1116              case PT_LAMP:
1117              OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
1118              break;
1119    
1120              case PT_GC:
1121              OK = category == code[3];
1122              break;
1123    
1124              case PT_PC:
1125              OK = chartype == code[3];
1126              break;
1127    
1128              case PT_SC:
1129              OK = script == code[3];
1130              break;
1131    
1132              /* Should never occur, but keep compilers from grumbling. */
1133    
1134              default:
1135              OK = codevalue != OP_PROP;
1136              break;
1137              }
1138    
1139            if (OK == (d == OP_PROP))
1140              {
1141              if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSSTAR ||
1142                  codevalue == OP_PROP_EXTRA + OP_TYPEPOSQUERY)
1143                {
1144                active_count--;           /* Remove non-match possibility */
1145                next_active_state--;
1146                }
1147              ADD_NEW(state_offset + count, 0);
1148              }
1149          }          }
1150        break;        break;
1151    
1152        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1153        case OP_EXTUNI_EXTRA + OP_TYPEQUERY:        case OP_EXTUNI_EXTRA + OP_TYPEQUERY:
1154        case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:        case OP_EXTUNI_EXTRA + OP_TYPEMINQUERY:
1155          case OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY:
1156        count = 2;        count = 2;
1157        goto QS2;        goto QS2;
1158    
1159        case OP_EXTUNI_EXTRA + OP_TYPESTAR:        case OP_EXTUNI_EXTRA + OP_TYPESTAR:
1160        case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:        case OP_EXTUNI_EXTRA + OP_TYPEMINSTAR:
1161          case OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR:
1162        count = 0;        count = 0;
1163    
1164        QS2:        QS2:
1165    
1166        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1167        if (clen > 0 && ucp_findchar(c, &chartype, &othercase) != ucp_M)        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
1168          {          {
1169          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1170          int ncount = 0;          int ncount = 0;
1171            if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
1172                codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
1173              {
1174              active_count--;           /* Remove non-match possibility */
1175              next_active_state--;
1176              }
1177          while (nptr < end_subject)          while (nptr < end_subject)
1178            {            {
1179            int nd;            int nd;
1180            int ndlen = 1;            int ndlen = 1;
1181            GETCHARLEN(nd, nptr, ndlen);            GETCHARLEN(nd, nptr, ndlen);
1182            if (ucp_findchar(nd, &chartype, &othercase) != ucp_M) break;            if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
1183            ncount++;            ncount++;
1184            nptr += ndlen;            nptr += ndlen;
1185            }            }
1186          ADD_NEW_DATA(-(state_offset + count), 0, ncount);          ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1187          }          }
1188        break;        break;
1189    #endif
1190    
1191          /*-----------------------------------------------------------------*/
1192          case OP_ANYNL_EXTRA + OP_TYPEQUERY:
1193          case OP_ANYNL_EXTRA + OP_TYPEMINQUERY:
1194          case OP_ANYNL_EXTRA + OP_TYPEPOSQUERY:
1195          count = 2;
1196          goto QS3;
1197    
1198          case OP_ANYNL_EXTRA + OP_TYPESTAR:
1199          case OP_ANYNL_EXTRA + OP_TYPEMINSTAR:
1200          case OP_ANYNL_EXTRA + OP_TYPEPOSSTAR:
1201          count = 0;
1202    
1203          QS3:
1204          ADD_ACTIVE(state_offset + 2, 0);
1205          if (clen > 0)
1206            {
1207            int ncount = 0;
1208            switch (c)
1209              {
1210              case 0x000d:
1211              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1212              /* Fall through */
1213              case 0x000a:
1214              case 0x000b:
1215              case 0x000c:
1216              case 0x0085:
1217              case 0x2028:
1218              case 0x2029:
1219              if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1220                  codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1221                {
1222                active_count--;           /* Remove non-match possibility */
1223                next_active_state--;
1224                }
1225              ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1226              break;
1227              default:
1228              break;
1229              }
1230            }
1231          break;
1232    
1233        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1234    #ifdef SUPPORT_UCP
1235        case OP_PROP_EXTRA + OP_TYPEEXACT:        case OP_PROP_EXTRA + OP_TYPEEXACT:
1236        case OP_PROP_EXTRA + OP_TYPEUPTO:        case OP_PROP_EXTRA + OP_TYPEUPTO:
1237        case OP_PROP_EXTRA + OP_TYPEMINUPTO:        case OP_PROP_EXTRA + OP_TYPEMINUPTO:
1238          case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
1239        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1240          { ADD_ACTIVE(state_offset + 5, 0); }          { ADD_ACTIVE(state_offset + 6, 0); }
1241        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1242        if (clen > 0)        if (clen > 0)
1243          {          {
1244          int category = ucp_findchar(c, &chartype, &othercase);          BOOL OK;
1245          int rqdtype = code[4];          int category = _pcre_ucp_findprop(c, &chartype, &script);
1246          if ((d == OP_PROP) ==          switch(code[4])
             (rqdtype == ((rqdtype >= 128)? (category + 128) : chartype)))  
1247            {            {
1248              case PT_ANY:
1249              OK = TRUE;
1250              break;
1251    
1252              case PT_LAMP:
1253              OK = chartype == ucp_Lu || chartype == ucp_Ll || chartype == ucp_Lt;
1254              break;
1255    
1256              case PT_GC:
1257              OK = category == code[5];
1258              break;
1259    
1260              case PT_PC:
1261              OK = chartype == code[5];
1262              break;
1263    
1264              case PT_SC:
1265              OK = script == code[5];
1266              break;
1267    
1268              /* Should never occur, but keep compilers from grumbling. */
1269    
1270              default:
1271              OK = codevalue != OP_PROP;
1272              break;
1273              }
1274    
1275            if (OK == (d == OP_PROP))
1276              {
1277              if (codevalue == OP_PROP_EXTRA + OP_TYPEPOSUPTO)
1278                {
1279                active_count--;           /* Remove non-match possibility */
1280                next_active_state--;
1281                }
1282            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1283              { ADD_NEW(state_offset + 5, 0); }              { ADD_NEW(state_offset + 6, 0); }
1284            else            else
1285              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
1286            }            }
# Line 975  for (;;) Line 1291  for (;;)
1291        case OP_EXTUNI_EXTRA + OP_TYPEEXACT:        case OP_EXTUNI_EXTRA + OP_TYPEEXACT:
1292        case OP_EXTUNI_EXTRA + OP_TYPEUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEUPTO:
1293        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
1294          case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
1295        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1296          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 4, 0); }
1297        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1298        if (clen > 0 && ucp_findchar(c, &chartype, &othercase) != ucp_M)        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
1299          {          {
1300          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1301          int ncount = 0;          int ncount = 0;
1302            if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
1303              {
1304              active_count--;           /* Remove non-match possibility */
1305              next_active_state--;
1306              }
1307          while (nptr < end_subject)          while (nptr < end_subject)
1308            {            {
1309            int nd;            int nd;
1310            int ndlen = 1;            int ndlen = 1;
1311            GETCHARLEN(nd, nptr, ndlen);            GETCHARLEN(nd, nptr, ndlen);
1312            if (ucp_findchar(nd, &chartype, &othercase) != ucp_M) break;            if (_pcre_ucp_findprop(nd, &chartype, &script) != ucp_M) break;
1313            ncount++;            ncount++;
1314            nptr += ndlen;            nptr += ndlen;
1315            }            }
# Line 997  for (;;) Line 1319  for (;;)
1319            { ADD_NEW_DATA(-state_offset, count, ncount); }            { ADD_NEW_DATA(-state_offset, count, ncount); }
1320          }          }
1321        break;        break;
1322    #endif
1323    
1324          /*-----------------------------------------------------------------*/
1325          case OP_ANYNL_EXTRA + OP_TYPEEXACT:
1326          case OP_ANYNL_EXTRA + OP_TYPEUPTO:
1327          case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
1328          case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
1329          if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
1330            { ADD_ACTIVE(state_offset + 4, 0); }
1331          count = current_state->count;  /* Number already matched */
1332          if (clen > 0)
1333            {
1334            int ncount = 0;
1335            switch (c)
1336              {
1337              case 0x000d:
1338              if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1339              /* Fall through */
1340              case 0x000a:
1341              case 0x000b:
1342              case 0x000c:
1343              case 0x0085:
1344              case 0x2028:
1345              case 0x2029:
1346              if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
1347                {
1348                active_count--;           /* Remove non-match possibility */
1349                next_active_state--;
1350                }
1351              if (++count >= GET2(code, 1))
1352                { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }
1353              else
1354                { ADD_NEW_DATA(-state_offset, count, ncount); }
1355              break;
1356              default:
1357              break;
1358              }
1359            }
1360          break;
1361    
1362  /* ========================================================================== */  /* ========================================================================== */
1363        /* These opcodes are followed by a character that is usually compared        /* These opcodes are followed by a character that is usually compared
# Line 1018  for (;;) Line 1379  for (;;)
1379          {          {
1380          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
1381            {            {
1382              unsigned int othercase;
1383            if (c < 128) othercase = fcc[c]; else            if (c < 128) othercase = fcc[c]; else
1384    
1385            /* If we have Unicode property support, we can use it to test the            /* If we have Unicode property support, we can use it to test the
1386            other case of the character, if there is one. The result of            other case of the character. */
           ucp_findchar() is < 0 if the char isn't found, and othercase is  
           returned as zero if there isn't another case. */  
1387    
1388  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1389            if (ucp_findchar(c, &chartype, &othercase) < 0)            othercase = _pcre_ucp_othercase(c);
1390    #else
1391              othercase = NOTACHAR;
1392  #endif  #endif
             othercase = -1;  
1393    
1394            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
1395            }            }
# Line 1050  for (;;) Line 1411  for (;;)
1411        to wait for them to pass before continuing. */        to wait for them to pass before continuing. */
1412    
1413        case OP_EXTUNI:        case OP_EXTUNI:
1414        if (clen > 0 && ucp_findchar(c, &chartype, &othercase) != ucp_M)        if (clen > 0 && _pcre_ucp_findprop(c, &chartype, &script) != ucp_M)
1415          {          {
1416          const uschar *nptr = ptr + clen;          const uschar *nptr = ptr + clen;
1417          int ncount = 0;          int ncount = 0;
# Line 1058  for (;;) Line 1419  for (;;)
1419            {            {
1420            int nclen = 1;            int nclen = 1;
1421            GETCHARLEN(c, nptr, nclen);            GETCHARLEN(c, nptr, nclen);
1422            if (ucp_findchar(c, &chartype, &othercase) != ucp_M) break;            if (_pcre_ucp_findprop(c, &chartype, &script) != ucp_M) break;
1423            ncount++;            ncount++;
1424            nptr += nclen;            nptr += nclen;
1425            }            }
# Line 1068  for (;;) Line 1429  for (;;)
1429  #endif  #endif
1430    
1431        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1432          /* This is a tricky like EXTUNI because it too can match more than one
1433          character (when CR is followed by LF). In this case, set up a negative
1434          state to wait for one character to pass before continuing. */
1435    
1436          case OP_ANYNL:
1437          if (clen > 0) switch(c)
1438            {
1439            case 0x000a:
1440            case 0x000b:
1441            case 0x000c:
1442            case 0x0085:
1443            case 0x2028:
1444            case 0x2029:
1445            ADD_NEW(state_offset + 1, 0);
1446            break;
1447            case 0x000d:
1448            if (ptr + 1 < end_subject && ptr[1] == 0x0a)
1449              {
1450              ADD_NEW_DATA(-(state_offset + 1), 0, 1);
1451              }
1452            else
1453              {
1454              ADD_NEW(state_offset + 1, 0);
1455              }
1456            break;
1457            }
1458          break;
1459    
1460          /*-----------------------------------------------------------------*/
1461        /* Match a negated single character. This is only used for one-byte        /* Match a negated single character. This is only used for one-byte
1462        characters, that is, we know that d < 256. The character we are        characters, that is, we know that d < 256. The character we are
1463        checking (c) can be multibyte. */        checking (c) can be multibyte. */
# Line 1075  for (;;) Line 1465  for (;;)
1465        case OP_NOT:        case OP_NOT:
1466        if (clen > 0)        if (clen > 0)
1467          {          {
1468          int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;          unsigned int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;
1469          if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }          if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }
1470          }          }
1471        break;        break;
# Line 1083  for (;;) Line 1473  for (;;)
1473        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1474        case OP_PLUS:        case OP_PLUS:
1475        case OP_MINPLUS:        case OP_MINPLUS:
1476          case OP_POSPLUS:
1477        case OP_NOTPLUS:        case OP_NOTPLUS:
1478        case OP_NOTMINPLUS:        case OP_NOTMINPLUS:
1479          case OP_NOTPOSPLUS:
1480        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1481        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
1482        if (clen > 0)        if (clen > 0)
1483          {          {
1484          int otherd = -1;          unsigned int otherd = NOTACHAR;
1485          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1486            {            {
1487  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1488            if (utf8 && c >= 128)            if (utf8 && d >= 128)
1489              {              {
1490  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1491              if (ucp_findchar(d, &chartype, &otherd) < 0) otherd = -1;              otherd = _pcre_ucp_othercase(d);
1492  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
1493              }              }
1494            else            else
# Line 1104  for (;;) Line 1496  for (;;)
1496            otherd = fcc[d];            otherd = fcc[d];
1497            }            }
1498          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1499            { count++; ADD_NEW(state_offset, count); }            {
1500              if (count > 0 &&
1501                  (codevalue == OP_POSPLUS || codevalue == OP_NOTPOSPLUS))
1502                {
1503                active_count--;             /* Remove non-match possibility */
1504                next_active_state--;
1505                }
1506              count++;
1507              ADD_NEW(state_offset, count);
1508              }
1509          }          }
1510        break;        break;
1511    
1512        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1513        case OP_QUERY:        case OP_QUERY:
1514        case OP_MINQUERY:        case OP_MINQUERY:
1515          case OP_POSQUERY:
1516        case OP_NOTQUERY:        case OP_NOTQUERY:
1517        case OP_NOTMINQUERY:        case OP_NOTMINQUERY:
1518          case OP_NOTPOSQUERY:
1519        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
1520        if (clen > 0)        if (clen > 0)
1521          {          {
1522          int otherd = -1;          unsigned int otherd = NOTACHAR;
1523          if ((ims && PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1524            {            {
1525  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1526            if (utf8 && c >= 128)            if (utf8 && d >= 128)
1527              {              {
1528  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1529              if (ucp_findchar(c, &chartype, &otherd) < 0) otherd = -1;              otherd = _pcre_ucp_othercase(d);
1530  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
1531              }              }
1532            else            else
# Line 1131  for (;;) Line 1534  for (;;)
1534            otherd = fcc[d];            otherd = fcc[d];
1535            }            }
1536          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1537            { ADD_NEW(state_offset + dlen + 1, 0); }            {
1538              if (codevalue == OP_POSQUERY || codevalue == OP_NOTPOSQUERY)
1539                {
1540                active_count--;            /* Remove non-match possibility */
1541                next_active_state--;
1542                }
1543              ADD_NEW(state_offset + dlen + 1, 0);
1544              }
1545          }          }
1546        break;        break;
1547    
1548        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1549        case OP_STAR:        case OP_STAR:
1550        case OP_MINSTAR:        case OP_MINSTAR:
1551          case OP_POSSTAR:
1552        case OP_NOTSTAR:        case OP_NOTSTAR:
1553        case OP_NOTMINSTAR:        case OP_NOTMINSTAR:
1554          case OP_NOTPOSSTAR:
1555        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
1556        if (clen > 0)        if (clen > 0)
1557          {          {
1558          int otherd = -1;          unsigned int otherd = NOTACHAR;
1559          if ((ims && PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1560            {            {
1561  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1562            if (utf8 && c >= 128)            if (utf8 && d >= 128)
1563              {              {
1564  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1565              if (ucp_findchar(c, &chartype, &otherd) < 0) otherd = -1;              otherd = _pcre_ucp_othercase(d);
1566  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
1567              }              }
1568            else            else
# Line 1158  for (;;) Line 1570  for (;;)
1570            otherd = fcc[d];            otherd = fcc[d];
1571            }            }
1572          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1573            { ADD_NEW(state_offset, 0); }            {
1574              if (codevalue == OP_POSSTAR || codevalue == OP_NOTPOSSTAR)
1575                {
1576                active_count--;            /* Remove non-match possibility */
1577                next_active_state--;
1578                }
1579              ADD_NEW(state_offset, 0);
1580              }
1581          }          }
1582        break;        break;
1583    
1584        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1585        case OP_EXACT:        case OP_EXACT:
1586          case OP_NOTEXACT:
1587          count = current_state->count;  /* Number already matched */
1588          if (clen > 0)
1589            {
1590            unsigned int otherd = NOTACHAR;
1591            if ((ims & PCRE_CASELESS) != 0)
1592              {
1593    #ifdef SUPPORT_UTF8
1594              if (utf8 && d >= 128)
1595                {
1596    #ifdef SUPPORT_UCP
1597                otherd = _pcre_ucp_othercase(d);
1598    #endif  /* SUPPORT_UCP */
1599                }
1600              else
1601    #endif  /* SUPPORT_UTF8 */
1602              otherd = fcc[d];
1603              }
1604            if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1605              {
1606              if (++count >= GET2(code, 1))
1607                { ADD_NEW(state_offset + dlen + 3, 0); }
1608              else
1609                { ADD_NEW(state_offset, count); }
1610              }
1611            }
1612          break;
1613    
1614          /*-----------------------------------------------------------------*/
1615        case OP_UPTO:        case OP_UPTO:
1616        case OP_MINUPTO:        case OP_MINUPTO:
1617        case OP_NOTEXACT:        case OP_POSUPTO:
1618        case OP_NOTUPTO:        case OP_NOTUPTO:
1619        case OP_NOTMINUPTO:        case OP_NOTMINUPTO:
1620        if (codevalue != OP_EXACT && codevalue != OP_NOTEXACT)        case OP_NOTPOSUPTO:
1621          { ADD_ACTIVE(state_offset + dlen + 3, 0); }        ADD_ACTIVE(state_offset + dlen + 3, 0);
1622        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1623        if (clen > 0)        if (clen > 0)
1624          {          {
1625          int otherd = -1;          unsigned int otherd = NOTACHAR;
1626          if ((ims & PCRE_CASELESS) != 0)          if ((ims & PCRE_CASELESS) != 0)
1627            {            {
1628  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
1629            if (utf8 && c >= 128)            if (utf8 && d >= 128)
1630              {              {
1631  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
1632              if (ucp_findchar(d, &chartype, &otherd) < 0) otherd = -1;              otherd = _pcre_ucp_othercase(d);
1633  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
1634              }              }
1635            else            else
# Line 1190  for (;;) Line 1638  for (;;)
1638            }            }
1639          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
1640            {            {
1641              if (codevalue == OP_POSUPTO || codevalue == OP_NOTPOSUPTO)
1642                {
1643                active_count--;             /* Remove non-match possibility */
1644                next_active_state--;
1645                }
1646            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1647              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 3, 0); }
1648            else            else
# Line 1267  for (;;) Line 1720  for (;;)
1720              { ADD_ACTIVE(next_state_offset + 5, 0); }              { ADD_ACTIVE(next_state_offset + 5, 0); }
1721            if (isinclass)            if (isinclass)
1722              {              {
1723              if (++count >= GET2(ecode, 3))              int max = GET2(ecode, 3);
1724                if (++count >= max && max != 0)   /* Max 0 => no limit */
1725                { ADD_NEW(next_state_offset + 5, 0); }                { ADD_NEW(next_state_offset + 5, 0); }
1726              else              else
1727                { ADD_NEW(state_offset, count); }                { ADD_NEW(state_offset, count); }
# Line 1317  for (;;) Line 1771  for (;;)
1771    
1772        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1773        case OP_COND:        case OP_COND:
1774          case OP_SCOND:
1775          {          {
1776          int local_offsets[1000];          int local_offsets[1000];
1777          int local_workspace[1000];          int local_workspace[1000];
1778          int condcode = code[LINK_SIZE+1];          int condcode = code[LINK_SIZE+1];
1779    
1780          /* The only supported version of OP_CREF is for the value 0xffff, which          /* Back reference conditions are not supported */
1781          means "test if in a recursion". */  
1782            if (condcode == OP_CREF) return PCRE_ERROR_DFA_UCOND;
1783    
1784            /* The DEFINE condition is always false */
1785    
1786          if (condcode == OP_CREF)          if (condcode == OP_DEF)
1787              {
1788              ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0);
1789              }
1790    
1791            /* The only supported version of OP_RREF is for the value RREF_ANY,
1792            which means "test if in any recursion". We can't test for specifically
1793            recursed groups. */
1794    
1795            else if (condcode == OP_RREF)
1796            {            {
1797            int value = GET2(code, LINK_SIZE+2);            int value = GET2(code, LINK_SIZE+2);
1798            if (value != 0xffff) return PCRE_ERROR_DFA_UCOND;            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
1799            if (recursing > 0) { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }            if (recursing > 0) { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }
1800              else { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }              else { ADD_ACTIVE(state_offset + GET(code, 1) + LINK_SIZE + 1, 0); }
1801            }            }
# Line 1424  for (;;) Line 1891  for (;;)
1891        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1892        case OP_ONCE:        case OP_ONCE:
1893          {          {
         const uschar *endcode;  
1894          int local_offsets[2];          int local_offsets[2];
1895          int local_workspace[1000];          int local_workspace[1000];
1896    
# Line 1446  for (;;) Line 1912  for (;;)
1912            const uschar *end_subpattern = code;            const uschar *end_subpattern = code;
1913            int charcount = local_offsets[1] - local_offsets[0];            int charcount = local_offsets[1] - local_offsets[0];
1914            int next_state_offset, repeat_state_offset;            int next_state_offset, repeat_state_offset;
           BOOL is_repeated;  
1915    
1916            do { end_subpattern += GET(end_subpattern, 1); }            do { end_subpattern += GET(end_subpattern, 1); }
1917              while (*end_subpattern == OP_ALT);              while (*end_subpattern == OP_ALT);
# Line 1521  for (;;) Line 1986  for (;;)
1986          cb.version          = 1;   /* Version 1 of the callout block */          cb.version          = 1;   /* Version 1 of the callout block */
1987          cb.callout_number   = code[1];          cb.callout_number   = code[1];
1988          cb.offset_vector    = offsets;          cb.offset_vector    = offsets;
1989          cb.subject          = (char *)start_subject;          cb.subject          = (PCRE_SPTR)start_subject;
1990          cb.subject_length   = end_subject - start_subject;          cb.subject_length   = end_subject - start_subject;
1991          cb.start_match      = current_subject - start_subject;          cb.start_match      = current_subject - start_subject;
1992          cb.current_position = ptr - start_subject;          cb.current_position = ptr - start_subject;
# Line 1569  for (;;) Line 2034  for (;;)
2034      DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"      DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
2035        "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,        "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
2036        rlevel*2-2, SP));        rlevel*2-2, SP));
2037      return match_count;      break;        /* In effect, "return", but see the comment below */
2038      }      }
2039    
2040    /* One or more states are active for the next character. */    /* One or more states are active for the next character. */
# Line 1577  for (;;) Line 2042  for (;;)
2042    ptr += clen;    /* Advance to next subject character */    ptr += clen;    /* Advance to next subject character */
2043    }               /* Loop to move along the subject string */    }               /* Loop to move along the subject string */
2044    
2045  /* Control never gets here, but we must keep the compiler happy. */  /* Control gets here from "break" a few lines above. We do it this way because
2046    if we use "return" above, we have compiler trouble. Some compilers warn if
2047    there's nothing here because they think the function doesn't return a value. On
2048    the other hand, if we put a dummy statement here, some more clever compilers
2049    complain that it can't be reached. Sigh. */
2050    
2051  DPRINTF(("%.*s+++ Unexpected end of internal_dfa_exec %d +++\n"  return match_count;
   "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, rlevel*2-2, SP));  
 return PCRE_ERROR_NOMATCH;  
2052  }  }
2053    
2054    
# Line 1597  is not anchored. Line 2064  is not anchored.
2064    
2065  Arguments:  Arguments:
2066    argument_re     points to the compiled expression    argument_re     points to the compiled expression
2067    extra_data      points to extra data or is NULL (not currently used)    extra_data      points to extra data or is NULL
2068    subject         points to the subject string    subject         points to the subject string
2069    length          length of subject string (may contain binary zeros)    length          length of subject string (may contain binary zeros)
2070    start_offset    where to start in the subject string    start_offset    where to start in the subject string
# Line 1613  Returns:          > 0 => number of match Line 2080  Returns:          > 0 => number of match
2080                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
2081  */  */
2082    
2083  EXPORT int  PCRE_EXP_DEFN int
2084  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
2085    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
2086    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
2087  {  {
2088  real_pcre *re = (real_pcre *)argument_re;  real_pcre *re = (real_pcre *)argument_re;
2089  dfa_match_data match_block;  dfa_match_data match_block;
2090    dfa_match_data *md = &match_block;
2091  BOOL utf8, anchored, startline, firstline;  BOOL utf8, anchored, startline, firstline;
2092  const uschar *current_subject, *end_subject, *lcc;  const uschar *current_subject, *end_subject, *lcc;
2093    
# Line 1634  BOOL req_byte_caseless = FALSE; Line 2102  BOOL req_byte_caseless = FALSE;
2102  int first_byte = -1;  int first_byte = -1;
2103  int req_byte = -1;  int req_byte = -1;
2104  int req_byte2 = -1;  int req_byte2 = -1;
2105    int newline;
2106    
2107  /* Plausibility checks */  /* Plausibility checks */
2108    
# Line 1648  flipping, so we scan the extra_data bloc Line 2117  flipping, so we scan the extra_data bloc
2117  match block, so we must initialize them beforehand. However, the other fields  match block, so we must initialize them beforehand. However, the other fields
2118  in the match block must not be set until after the byte flipping. */  in the match block must not be set until after the byte flipping. */
2119    
2120  match_block.tables = re->tables;  md->tables = re->tables;
2121  match_block.callout_data = NULL;  md->callout_data = NULL;
2122    
2123  if (extra_data != NULL)  if (extra_data != NULL)
2124    {    {
# Line 1657  if (extra_data != NULL) Line 2126  if (extra_data != NULL)
2126    if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)    if ((flags & PCRE_EXTRA_STUDY_DATA) != 0)
2127      study = (const pcre_study_data *)extra_data->study_data;      study = (const pcre_study_data *)extra_data->study_data;
2128    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;    if ((flags & PCRE_EXTRA_MATCH_LIMIT) != 0) return PCRE_ERROR_DFA_UMLIMIT;
2129      if ((flags & PCRE_EXTRA_MATCH_LIMIT_RECURSION) != 0)
2130        return PCRE_ERROR_DFA_UMLIMIT;
2131    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)    if ((flags & PCRE_EXTRA_CALLOUT_DATA) != 0)
2132      match_block.callout_data = extra_data->callout_data;      md->callout_data = extra_data->callout_data;
2133    if ((flags & PCRE_EXTRA_TABLES) != 0)    if ((flags & PCRE_EXTRA_TABLES) != 0)
2134      match_block.tables = extra_data->tables;      md->tables = extra_data->tables;
2135    }    }
2136    
2137  /* Check that the first field in the block is the magic number. If it is not,  /* Check that the first field in the block is the magic number. If it is not,
# Line 1681  current_subject = (const unsigned char * Line 2152  current_subject = (const unsigned char *
2152  end_subject = (const unsigned char *)subject + length;  end_subject = (const unsigned char *)subject + length;
2153  req_byte_ptr = current_subject - 1;  req_byte_ptr = current_subject - 1;
2154    
2155    #ifdef SUPPORT_UTF8
2156  utf8 = (re->options & PCRE_UTF8) != 0;  utf8 = (re->options & PCRE_UTF8) != 0;
2157  anchored = (options & PCRE_ANCHORED) != 0 || (re->options & PCRE_ANCHORED) != 0;  #else
2158    utf8 = FALSE;
2159    #endif
2160    
2161    anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
2162      (re->options & PCRE_ANCHORED) != 0;
2163    
2164  /* The remaining fixed data for passing around. */  /* The remaining fixed data for passing around. */
2165    
2166  match_block.start_code = (const uschar *)argument_re +  md->start_code = (const uschar *)argument_re +
2167      re->name_table_offset + re->name_count * re->name_entry_size;      re->name_table_offset + re->name_count * re->name_entry_size;
2168  match_block.start_subject = (const unsigned char *)subject;  md->start_subject = (const unsigned char *)subject;
2169  match_block.end_subject = end_subject;  md->end_subject = end_subject;
2170  match_block.moptions = options;  md->moptions = options;
2171  match_block.poptions = re->options;  md->poptions = re->options;
2172    
2173    /* Handle different types of newline. The three bits give eight cases. If
2174    nothing is set at run time, whatever was used at compile time applies. */
2175    
2176    switch ((((options & PCRE_NEWLINE_BITS) == 0)? re->options : (pcre_uint32)options) &
2177             PCRE_NEWLINE_BITS)
2178      {
2179      case 0: newline = NEWLINE; break;   /* Compile-time default */
2180      case PCRE_NEWLINE_CR: newline = '\r'; break;
2181      case PCRE_NEWLINE_LF: newline = '\n'; break;
2182      case PCRE_NEWLINE_CR+
2183           PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
2184      case PCRE_NEWLINE_ANY: newline = -1; break;
2185      case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
2186      default: return PCRE_ERROR_BADNEWLINE;
2187      }
2188    
2189    if (newline == -2)
2190      {
2191      md->nltype = NLTYPE_ANYCRLF;
2192      }
2193    else if (newline < 0)
2194      {
2195      md->nltype = NLTYPE_ANY;
2196      }
2197    else
2198      {
2199      md->nltype = NLTYPE_FIXED;
2200      if (newline > 255)
2201        {
2202        md->nllen = 2;
2203        md->nl[0] = (newline >> 8) & 255;
2204        md->nl[1] = newline & 255;
2205        }
2206      else
2207        {
2208        md->nllen = 1;
2209        md->nl[0] = newline;
2210        }
2211      }
2212    
2213  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
2214  back the character offset. */  back the character offset. */
# Line 1717  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 2234  if (utf8 && (options & PCRE_NO_UTF8_CHEC
2234  is a feature that makes it possible to save compiled regex and re-use them  is a feature that makes it possible to save compiled regex and re-use them
2235  in other programs later. */  in other programs later. */
2236    
2237  if (match_block.tables == NULL) match_block.tables = _pcre_default_tables;  if (md->tables == NULL) md->tables = _pcre_default_tables;
2238    
2239  /* The lower casing table and the "must be at the start of a line" flag are  /* The lower casing table and the "must be at the start of a line" flag are
2240  used in a loop when finding where to start. */  used in a loop when finding where to start. */
2241    
2242  lcc = match_block.tables + lcc_offset;  lcc = md->tables + lcc_offset;
2243  startline = (re->options & PCRE_STARTLINE) != 0;  startline = (re->options & PCRE_STARTLINE) != 0;
2244  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
2245    
# Line 1755  if ((re->options & PCRE_REQCHSET) != 0) Line 2272  if ((re->options & PCRE_REQCHSET) != 0)
2272    {    {
2273    req_byte = re->req_byte & 255;    req_byte = re->req_byte & 255;
2274    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
2275    req_byte2 = (match_block.tables + fcc_offset)[req_byte];  /* case flipped */    req_byte2 = (md->tables + fcc_offset)[req_byte];  /* case flipped */
2276    }    }
2277    
2278  /* Call the main matching function, looping for a non-anchored regex after a  /* Call the main matching function, looping for a non-anchored regex after a
# Line 1773  for (;;) Line 2290  for (;;)
2290    
2291      /* Advance to a unique first char if possible. If firstline is TRUE, the      /* Advance to a unique first char if possible. If firstline is TRUE, the
2292      start of the match is constrained to the first line of a multiline string.      start of the match is constrained to the first line of a multiline string.
2293      Implement this by temporarily adjusting end_subject so that we stop scanning      Implement this by temporarily adjusting end_subject so that we stop
2294      at a newline. If the match fails at the newline, later code breaks this loop.      scanning at a newline. If the match fails at the newline, later code breaks
2295      */      this loop. */
2296    
2297      if (firstline)      if (firstline)
2298        {        {
2299        const uschar *t = current_subject;        const uschar *t = current_subject;
2300        while (t < save_end_subject && *t != '\n') t++;        while (t < md->end_subject && !IS_NEWLINE(t)) t++;
2301        end_subject = t;        end_subject = t;
2302        }        }
2303    
# Line 1795  for (;;) Line 2312  for (;;)
2312            current_subject++;            current_subject++;
2313        }        }
2314    
2315      /* Or to just after \n for a multiline match if possible */      /* Or to just after a linebreak for a multiline match if possible */
2316    
2317      else if (startline)      else if (startline)
2318        {        {
2319        if (current_subject > match_block.start_subject + start_offset)        if (current_subject > md->start_subject + start_offset)
2320          {          {
2321          while (current_subject < end_subject && current_subject[-1] != NEWLINE)          while (current_subject <= end_subject && !WAS_NEWLINE(current_subject))
2322              current_subject++;
2323    
2324            /* If we have just passed a CR and the newline option is ANY or
2325            ANYCRLF, and we are now at a LF, advance the match position by one more
2326            character. */
2327    
2328            if (current_subject[-1] == '\r' &&
2329                 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
2330                 current_subject < end_subject &&
2331                 *current_subject == '\n')
2332            current_subject++;            current_subject++;
2333          }          }
2334        }        }
# Line 1882  for (;;) Line 2409  for (;;)
2409    /* OK, now we can do the business */    /* OK, now we can do the business */
2410    
2411    rc = internal_dfa_exec(    rc = internal_dfa_exec(
2412      &match_block,                              /* fixed match data */      md,                                /* fixed match data */
2413      match_block.start_code,                    /* this subexpression's code */      md->start_code,                    /* this subexpression's code */
2414      current_subject,                           /* where we currently are */      current_subject,                   /* where we currently are */
2415      start_offset,                              /* start offset in subject */      start_offset,                      /* start offset in subject */
2416      offsets,                                   /* offset vector */      offsets,                           /* offset vector */
2417      offsetcount,                               /* size of same */      offsetcount,                       /* size of same */
2418      workspace,                                 /* workspace vector */      workspace,                         /* workspace vector */
2419      wscount,                                   /* size of same */      wscount,                           /* size of same */
2420      re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL), /* ims flags */      re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL), /* ims flags */
2421      0,                                         /* function recurse level */      0,                                 /* function recurse level */
2422      0);                                        /* regex recurse level */      0);                                /* regex recurse level */
2423    
2424    /* Anything other than "no match" means we are done, always; otherwise, carry    /* Anything other than "no match" means we are done, always; otherwise, carry
2425    on only if not anchored. */    on only if not anchored. */
# Line 1902  for (;;) Line 2429  for (;;)
2429    /* Advance to the next subject character unless we are at the end of a line    /* Advance to the next subject character unless we are at the end of a line
2430    and firstline is set. */    and firstline is set. */
2431    
2432    if (firstline && *current_subject == NEWLINE) break;    if (firstline && IS_NEWLINE(current_subject)) break;
2433    current_subject++;    current_subject++;
   
 #ifdef SUPPORT_UTF8  
2434    if (utf8)    if (utf8)
2435      {      {
2436      while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)      while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)
2437        current_subject++;        current_subject++;
2438      }      }
 #endif  
   
2439    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
2440    }  
2441      /* If we have just passed a CR and the newline option is CRLF or ANY or
2442      ANYCRLF, and we are now at a LF, advance the match position by one more
2443      character. */
2444    
2445      if (current_subject[-1] == '\r' &&
2446           (md->nltype == NLTYPE_ANY ||
2447            md->nltype == NLTYPE_ANYCRLF ||
2448            md->nllen == 2) &&
2449           current_subject < end_subject &&
2450           *current_subject == '\n')
2451        current_subject++;
2452    
2453      }   /* "Bumpalong" loop */
2454    
2455  return PCRE_ERROR_NOMATCH;  return PCRE_ERROR_NOMATCH;
2456  }  }

Legend:
Removed from v.77  
changed lines
  Added in v.152

  ViewVC Help
Powered by ViewVC 1.1.5