/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 428 by ph10, Mon Aug 31 17:10:26 2009 UTC revision 1376 by ph10, Sat Oct 12 18:02:11 2013 UTC
# Line 7  and semantics are as close as possible t Line 7  and semantics are as close as possible t
7  below for why this module is different).  below for why this module is different).
8    
9                         Written by Philip Hazel                         Written by Philip Hazel
10             Copyright (c) 1997-2009 University of Cambridge             Copyright (c) 1997-2013 University of Cambridge
11    
12  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
13  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 38  POSSIBILITY OF SUCH DAMAGE. Line 38  POSSIBILITY OF SUCH DAMAGE.
38  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
39  */  */
40    
   
41  /* This module contains the external function pcre_dfa_exec(), which is an  /* This module contains the external function pcre_dfa_exec(), which is an
42  alternative matching function that uses a sort of DFA algorithm (not a true  alternative matching function that uses a sort of DFA algorithm (not a true
43  FSM). This is NOT Perl- compatible, but it has advantages in certain  FSM). This is NOT Perl-compatible, but it has advantages in certain
44  applications. */  applications. */
45    
46    
47    /* NOTE ABOUT PERFORMANCE: A user of this function sent some code that improved
48    the performance of his patterns greatly. I could not use it as it stood, as it
49    was not thread safe, and made assumptions about pattern sizes. Also, it caused
50    test 7 to loop, and test 9 to crash with a segfault.
51    
52    The issue is the check for duplicate states, which is done by a simple linear
53    search up the state list. (Grep for "duplicate" below to find the code.) For
54    many patterns, there will never be many states active at one time, so a simple
55    linear search is fine. In patterns that have many active states, it might be a
56    bottleneck. The suggested code used an indexing scheme to remember which states
57    had previously been used for each character, and avoided the linear search when
58    it knew there was no chance of a duplicate. This was implemented when adding
59    states to the state lists.
60    
61    I wrote some thread-safe, not-limited code to try something similar at the time
62    of checking for duplicates (instead of when adding states), using index vectors
63    on the stack. It did give a 13% improvement with one specially constructed
64    pattern for certain subject strings, but on other strings and on many of the
65    simpler patterns in the test suite it did worse. The major problem, I think,
66    was the extra time to initialize the index. This had to be done for each call
67    of internal_dfa_exec(). (The supplied patch used a static vector, initialized
68    only once - I suspect this was the cause of the problems with the tests.)
69    
70    Overall, I concluded that the gains in some cases did not outweigh the losses
71    in others, so I abandoned this code. */
72    
73    
74    
75  #ifdef HAVE_CONFIG_H  #ifdef HAVE_CONFIG_H
76  #include "config.h"  #include "config.h"
77  #endif  #endif
# Line 78  never stored, so we push them well clear Line 105  never stored, so we push them well clear
105    
106    
107  /* This table identifies those opcodes that are followed immediately by a  /* This table identifies those opcodes that are followed immediately by a
108  character that is to be tested in some way. This makes is possible to  character that is to be tested in some way. This makes it possible to
109  centralize the loading of these characters. In the case of Type * etc, the  centralize the loading of these characters. In the case of Type * etc, the
110  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
111  small value. ***NOTE*** If the start of this table is modified, the two tables  small value. Non-zero values in the table are the offsets from the opcode where
112  that follow must also be modified. */  the character is to be found. ***NOTE*** If the start of this table is
113    modified, the three tables that follow must also be modified. */
114    
115  static const uschar coptable[] = {  static const pcre_uint8 coptable[] = {
116    0,                             /* End                                    */    0,                             /* End                                    */
117    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
118    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
119    0, 0, 0,                       /* Any, AllAny, Anybyte                   */    0, 0, 0,                       /* Any, AllAny, Anybyte                   */
120    0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */    0, 0,                          /* \P, \p                                 */
121    0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */    0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
122    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */    0,                             /* \X                                     */
123      0, 0, 0, 0, 0, 0,              /* \Z, \z, $, $M, ^, ^M                   */
124    1,                             /* Char                                   */    1,                             /* Char                                   */
125    1,                             /* Charnc                                 */    1,                             /* Chari                                  */
126    1,                             /* not                                    */    1,                             /* not                                    */
127      1,                             /* noti                                   */
128    /* Positive single-char repeats                                          */    /* Positive single-char repeats                                          */
129    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
130    3, 3, 3,                       /* upto, minupto, exact                   */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto, minupto                          */
131    1, 1, 1, 3,                    /* *+, ++, ?+, upto+                      */    1+IMM2_SIZE,                   /* exact                                  */
132      1, 1, 1, 1+IMM2_SIZE,          /* *+, ++, ?+, upto+                      */
133      1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */
134      1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto I, minupto I                      */
135      1+IMM2_SIZE,                   /* exact I                                */
136      1, 1, 1, 1+IMM2_SIZE,          /* *+I, ++I, ?+I, upto+I                  */
137    /* Negative single-char repeats - only for chars < 256                   */    /* Negative single-char repeats - only for chars < 256                   */
138    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
139    3, 3, 3,                       /* NOT upto, minupto, exact               */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto, minupto                      */
140    1, 1, 1, 3,                    /* NOT *+, ++, ?+, updo+                  */    1+IMM2_SIZE,                   /* NOT exact                              */
141      1, 1, 1, 1+IMM2_SIZE,          /* NOT *+, ++, ?+, upto+                  */
142      1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */
143      1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto I, minupto I                  */
144      1+IMM2_SIZE,                   /* NOT exact I                            */
145      1, 1, 1, 1+IMM2_SIZE,          /* NOT *+I, ++I, ?+I, upto+I              */
146    /* Positive type repeats                                                 */    /* Positive type repeats                                                 */
147    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
148    3, 3, 3,                       /* Type upto, minupto, exact              */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* Type upto, minupto                     */
149    1, 1, 1, 3,                    /* Type *+, ++, ?+, upto+                 */    1+IMM2_SIZE,                   /* Type exact                             */
150      1, 1, 1, 1+IMM2_SIZE,          /* Type *+, ++, ?+, upto+                 */
151    /* Character class & ref repeats                                         */    /* Character class & ref repeats                                         */
152    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */
153    0, 0,                          /* CRRANGE, CRMINRANGE                    */    0, 0,                          /* CRRANGE, CRMINRANGE                    */
# Line 114  static const uschar coptable[] = { Line 155  static const uschar coptable[] = {
155    0,                             /* NCLASS                                 */    0,                             /* NCLASS                                 */
156    0,                             /* XCLASS - variable length               */    0,                             /* XCLASS - variable length               */
157    0,                             /* REF                                    */    0,                             /* REF                                    */
158      0,                             /* REFI                                   */
159      0,                             /* DNREF                                  */
160      0,                             /* DNREFI                                 */
161    0,                             /* RECURSE                                */    0,                             /* RECURSE                                */
162    0,                             /* CALLOUT                                */    0,                             /* CALLOUT                                */
163    0,                             /* Alt                                    */    0,                             /* Alt                                    */
164    0,                             /* Ket                                    */    0,                             /* Ket                                    */
165    0,                             /* KetRmax                                */    0,                             /* KetRmax                                */
166    0,                             /* KetRmin                                */    0,                             /* KetRmin                                */
167      0,                             /* KetRpos                                */
168      0,                             /* Reverse                                */
169    0,                             /* Assert                                 */    0,                             /* Assert                                 */
170    0,                             /* Assert not                             */    0,                             /* Assert not                             */
171    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
172    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
173      0, 0,                          /* ONCE, ONCE_NC                          */
174      0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
175      0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
176      0, 0,                          /* CREF, DNCREF                           */
177      0, 0,                          /* RREF, DNRREF                           */
178      0,                             /* DEF                                    */
179      0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
180      0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
181      0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
182      0, 0, 0, 0,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */
183      0, 0                           /* CLOSE, SKIPZERO  */
184    };
185    
186    /* This table identifies those opcodes that inspect a character. It is used to
187    remember the fact that a character could have been inspected when the end of
188    the subject is reached. ***NOTE*** If the start of this table is modified, the
189    two tables that follow must also be modified. */
190    
191    static const pcre_uint8 poptable[] = {
192      0,                             /* End                                    */
193      0, 0, 0, 1, 1,                 /* \A, \G, \K, \B, \b                     */
194      1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */
195      1, 1, 1,                       /* Any, AllAny, Anybyte                   */
196      1, 1,                          /* \P, \p                                 */
197      1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */
198      1,                             /* \X                                     */
199      0, 0, 0, 0, 0, 0,              /* \Z, \z, $, $M, ^, ^M                   */
200      1,                             /* Char                                   */
201      1,                             /* Chari                                  */
202      1,                             /* not                                    */
203      1,                             /* noti                                   */
204      /* Positive single-char repeats                                          */
205      1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
206      1, 1, 1,                       /* upto, minupto, exact                   */
207      1, 1, 1, 1,                    /* *+, ++, ?+, upto+                      */
208      1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */
209      1, 1, 1,                       /* upto I, minupto I, exact I             */
210      1, 1, 1, 1,                    /* *+I, ++I, ?+I, upto+I                  */
211      /* Negative single-char repeats - only for chars < 256                   */
212      1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
213      1, 1, 1,                       /* NOT upto, minupto, exact               */
214      1, 1, 1, 1,                    /* NOT *+, ++, ?+, upto+                  */
215      1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */
216      1, 1, 1,                       /* NOT upto I, minupto I, exact I         */
217      1, 1, 1, 1,                    /* NOT *+I, ++I, ?+I, upto+I              */
218      /* Positive type repeats                                                 */
219      1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
220      1, 1, 1,                       /* Type upto, minupto, exact              */
221      1, 1, 1, 1,                    /* Type *+, ++, ?+, upto+                 */
222      /* Character class & ref repeats                                         */
223      1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
224      1, 1,                          /* CRRANGE, CRMINRANGE                    */
225      1,                             /* CLASS                                  */
226      1,                             /* NCLASS                                 */
227      1,                             /* XCLASS - variable length               */
228      0,                             /* REF                                    */
229      0,                             /* REFI                                   */
230      0,                             /* DNREF                                  */
231      0,                             /* DNREFI                                 */
232      0,                             /* RECURSE                                */
233      0,                             /* CALLOUT                                */
234      0,                             /* Alt                                    */
235      0,                             /* Ket                                    */
236      0,                             /* KetRmax                                */
237      0,                             /* KetRmin                                */
238      0,                             /* KetRpos                                */
239    0,                             /* Reverse                                */    0,                             /* Reverse                                */
240    0, 0, 0, 0,                    /* ONCE, BRA, CBRA, COND                  */    0,                             /* Assert                                 */
241    0, 0, 0,                       /* SBRA, SCBRA, SCOND                     */    0,                             /* Assert not                             */
242    0,                             /* CREF                                   */    0,                             /* Assert behind                          */
243    0,                             /* RREF                                   */    0,                             /* Assert behind not                      */
244      0, 0,                          /* ONCE, ONCE_NC                          */
245      0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
246      0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
247      0, 0,                          /* CREF, DNCREF                           */
248      0, 0,                          /* RREF, DNRREF                           */
249    0,                             /* DEF                                    */    0,                             /* DEF                                    */
250    0, 0,                          /* BRAZERO, BRAMINZERO                    */    0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
251    0, 0, 0, 0,                    /* PRUNE, SKIP, THEN, COMMIT              */    0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
252    0, 0, 0                        /* FAIL, ACCEPT, SKIPZERO                 */    0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
253      0, 0, 0, 0,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */
254      0, 0                           /* CLOSE, SKIPZERO                        */
255  };  };
256    
257  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
258  and \w */  and \w */
259    
260  static const uschar toptable1[] = {  static const pcre_uint8 toptable1[] = {
261    0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
262    ctype_digit, ctype_digit,    ctype_digit, ctype_digit,
263    ctype_space, ctype_space,    ctype_space, ctype_space,
# Line 146  static const uschar toptable1[] = { Line 265  static const uschar toptable1[] = {
265    0, 0                            /* OP_ANY, OP_ALLANY */    0, 0                            /* OP_ANY, OP_ALLANY */
266  };  };
267    
268  static const uschar toptable2[] = {  static const pcre_uint8 toptable2[] = {
269    0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
270    ctype_digit, 0,    ctype_digit, 0,
271    ctype_space, 0,    ctype_space, 0,
# Line 163  these structures in, is a vector of ints Line 282  these structures in, is a vector of ints
282  typedef struct stateblock {  typedef struct stateblock {
283    int offset;                     /* Offset to opcode */    int offset;                     /* Offset to opcode */
284    int count;                      /* Count for repeats */    int count;                      /* Count for repeats */
   int ims;                        /* ims flag bits */  
285    int data;                       /* Some use extra data */    int data;                       /* Some use extra data */
286  } stateblock;  } stateblock;
287    
288  #define INTS_PER_STATEBLOCK  (sizeof(stateblock)/sizeof(int))  #define INTS_PER_STATEBLOCK  (int)(sizeof(stateblock)/sizeof(int))
289    
290    
291  #ifdef DEBUG  #ifdef PCRE_DEBUG
292  /*************************************************  /*************************************************
293  *             Print character string             *  *             Print character string             *
294  *************************************************/  *************************************************/
# Line 186  Returns:       nothing Line 304  Returns:       nothing
304  */  */
305    
306  static void  static void
307  pchars(unsigned char *p, int length, FILE *f)  pchars(const pcre_uchar *p, int length, FILE *f)
308  {  {
309  int c;  pcre_uint32 c;
310  while (length-- > 0)  while (length-- > 0)
311    {    {
312    if (isprint(c = *(p++)))    if (isprint(c = *(p++)))
313      fprintf(f, "%c", c);      fprintf(f, "%c", c);
314    else    else
315      fprintf(f, "\\x%02x", c);      fprintf(f, "\\x{%02x}", c);
316    }    }
317  }  }
318  #endif  #endif
# Line 219  Arguments: Line 337  Arguments:
337    offsetcount       size of same    offsetcount       size of same
338    workspace         vector of workspace    workspace         vector of workspace
339    wscount           size of same    wscount           size of same
   ims               the current ims flags  
340    rlevel            function call recursion level    rlevel            function call recursion level
   recursing         regex recursive call level  
341    
342  Returns:            > 0 => number of match offset pairs placed in offsets  Returns:            > 0 => number of match offset pairs placed in offsets
343                      = 0 => offsets overflowed; longest matches are present                      = 0 => offsets overflowed; longest matches are present
# Line 236  for the current character, one for the f Line 352  for the current character, one for the f
352      { \      { \
353      next_active_state->offset = (x); \      next_active_state->offset = (x); \
354      next_active_state->count  = (y); \      next_active_state->count  = (y); \
     next_active_state->ims    = ims; \  
355      next_active_state++; \      next_active_state++; \
356      DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \      DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
357      } \      } \
# Line 247  for the current character, one for the f Line 362  for the current character, one for the f
362      { \      { \
363      next_active_state->offset = (x); \      next_active_state->offset = (x); \
364      next_active_state->count  = (y); \      next_active_state->count  = (y); \
     next_active_state->ims    = ims; \  
365      next_active_state->data   = (z); \      next_active_state->data   = (z); \
366      next_active_state++; \      next_active_state++; \
367      DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \      DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
# Line 259  for the current character, one for the f Line 373  for the current character, one for the f
373      { \      { \
374      next_new_state->offset = (x); \      next_new_state->offset = (x); \
375      next_new_state->count  = (y); \      next_new_state->count  = (y); \
     next_new_state->ims    = ims; \  
376      next_new_state++; \      next_new_state++; \
377      DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \      DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
378      } \      } \
# Line 270  for the current character, one for the f Line 383  for the current character, one for the f
383      { \      { \
384      next_new_state->offset = (x); \      next_new_state->offset = (x); \
385      next_new_state->count  = (y); \      next_new_state->count  = (y); \
     next_new_state->ims    = ims; \  
386      next_new_state->data   = (z); \      next_new_state->data   = (z); \
387      next_new_state++; \      next_new_state++; \
388      DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \      DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d) line %d\n", rlevel*2-2, SP, \
389          (x), (y), (z), __LINE__)); \
390      } \      } \
391    else return PCRE_ERROR_DFA_WSSIZE    else return PCRE_ERROR_DFA_WSSIZE
392    
# Line 282  for the current character, one for the f Line 395  for the current character, one for the f
395  static int  static int
396  internal_dfa_exec(  internal_dfa_exec(
397    dfa_match_data *md,    dfa_match_data *md,
398    const uschar *this_start_code,    const pcre_uchar *this_start_code,
399    const uschar *current_subject,    const pcre_uchar *current_subject,
400    int start_offset,    int start_offset,
401    int *offsets,    int *offsets,
402    int offsetcount,    int offsetcount,
403    int *workspace,    int *workspace,
404    int wscount,    int wscount,
405    int ims,    int  rlevel)
   int  rlevel,  
   int  recursing)  
406  {  {
407  stateblock *active_states, *new_states, *temp_states;  stateblock *active_states, *new_states, *temp_states;
408  stateblock *next_active_state, *next_new_state;  stateblock *next_active_state, *next_new_state;
409    
410  const uschar *ctypes, *lcc, *fcc;  const pcre_uint8 *ctypes, *lcc, *fcc;
411  const uschar *ptr;  const pcre_uchar *ptr;
412  const uschar *end_code, *first_op;  const pcre_uchar *end_code, *first_op;
413    
414    dfa_recursion_info new_recursive;
415    
416  int active_count, new_count, match_count;  int active_count, new_count, match_count;
417    
418  /* Some fields in the md block are frequently referenced, so we load them into  /* Some fields in the md block are frequently referenced, so we load them into
419  independent variables in the hope that this will perform better. */  independent variables in the hope that this will perform better. */
420    
421  const uschar *start_subject = md->start_subject;  const pcre_uchar *start_subject = md->start_subject;
422  const uschar *end_subject = md->end_subject;  const pcre_uchar *end_subject = md->end_subject;
423  const uschar *start_code = md->start_code;  const pcre_uchar *start_code = md->start_code;
424    
425  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
426  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;  BOOL utf = (md->poptions & PCRE_UTF8) != 0;
427  #else  #else
428  BOOL utf8 = FALSE;  BOOL utf = FALSE;
429  #endif  #endif
430    
431    BOOL reset_could_continue = FALSE;
432    
433  rlevel++;  rlevel++;
434  offsetcount &= (-2);  offsetcount &= (-2);
435    
# Line 323  wscount = (wscount - (wscount % (INTS_PE Line 438  wscount = (wscount - (wscount % (INTS_PE
438            (2 * INTS_PER_STATEBLOCK);            (2 * INTS_PER_STATEBLOCK);
439    
440  DPRINTF(("\n%.*s---------------------\n"  DPRINTF(("\n%.*s---------------------\n"
441    "%.*sCall to internal_dfa_exec f=%d r=%d\n",    "%.*sCall to internal_dfa_exec f=%d\n",
442    rlevel*2-2, SP, rlevel*2-2, SP, rlevel, recursing));    rlevel*2-2, SP, rlevel*2-2, SP, rlevel));
443    
444  ctypes = md->tables + ctypes_offset;  ctypes = md->tables + ctypes_offset;
445  lcc = md->tables + lcc_offset;  lcc = md->tables + lcc_offset;
# Line 337  next_new_state = new_states = active_sta Line 452  next_new_state = new_states = active_sta
452  new_count = 0;  new_count = 0;
453    
454  first_op = this_start_code + 1 + LINK_SIZE +  first_op = this_start_code + 1 + LINK_SIZE +
455    ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);    ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
456        *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
457        ? IMM2_SIZE:0);
458    
459  /* The first thing in any (sub) pattern is a bracket of some sort. Push all  /* The first thing in any (sub) pattern is a bracket of some sort. Push all
460  the alternative states onto the list, and find out where the end is. This  the alternative states onto the list, and find out where the end is. This
# Line 365  if (*first_op == OP_REVERSE) Line 482  if (*first_op == OP_REVERSE)
482    /* If we can't go back the amount required for the longest lookbehind    /* If we can't go back the amount required for the longest lookbehind
483    pattern, go back as far as we can; some alternatives may still be viable. */    pattern, go back as far as we can; some alternatives may still be viable. */
484    
485  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
486    /* In character mode we have to step back character by character */    /* In character mode we have to step back character by character */
487    
488    if (utf8)    if (utf)
489      {      {
490      for (gone_back = 0; gone_back < max_back; gone_back++)      for (gone_back = 0; gone_back < max_back; gone_back++)
491        {        {
492        if (current_subject <= start_subject) break;        if (current_subject <= start_subject) break;
493        current_subject--;        current_subject--;
494        while (current_subject > start_subject &&        ACROSSCHAR(current_subject > start_subject, *current_subject, current_subject--);
              (*current_subject & 0xc0) == 0x80)  
         current_subject--;  
495        }        }
496      }      }
497    else    else
# Line 386  if (*first_op == OP_REVERSE) Line 501  if (*first_op == OP_REVERSE)
501    
502      {      {
503      gone_back = (current_subject - max_back < start_subject)?      gone_back = (current_subject - max_back < start_subject)?
504        current_subject - start_subject : max_back;        (int)(current_subject - start_subject) : max_back;
505      current_subject -= gone_back;      current_subject -= gone_back;
506      }      }
507    
508      /* Save the earliest consulted character */
509    
510      if (current_subject < md->start_used_ptr)
511        md->start_used_ptr = current_subject;
512    
513    /* Now we can process the individual branches. */    /* Now we can process the individual branches. */
514    
515    end_code = this_start_code;    end_code = this_start_code;
# Line 398  if (*first_op == OP_REVERSE) Line 518  if (*first_op == OP_REVERSE)
518      int back = GET(end_code, 2+LINK_SIZE);      int back = GET(end_code, 2+LINK_SIZE);
519      if (back <= gone_back)      if (back <= gone_back)
520        {        {
521        int bstate = end_code - start_code + 2 + 2*LINK_SIZE;        int bstate = (int)(end_code - start_code + 2 + 2*LINK_SIZE);
522        ADD_NEW_DATA(-bstate, 0, gone_back - back);        ADD_NEW_DATA(-bstate, 0, gone_back - back);
523        }        }
524      end_code += GET(end_code, 1);      end_code += GET(end_code, 1);
# Line 431  else Line 551  else
551    else    else
552      {      {
553      int length = 1 + LINK_SIZE +      int length = 1 + LINK_SIZE +
554        ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);        ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
555            *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
556            ? IMM2_SIZE:0);
557      do      do
558        {        {
559        ADD_NEW(end_code - start_code + length, 0);        ADD_NEW((int)(end_code - start_code + length), 0);
560        end_code += GET(end_code, 1);        end_code += GET(end_code, 1);
561        length = 1 + LINK_SIZE;        length = 1 + LINK_SIZE;
562        }        }
# Line 444  else Line 566  else
566    
567  workspace[0] = 0;    /* Bit indicating which vector is current */  workspace[0] = 0;    /* Bit indicating which vector is current */
568    
569  DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, end_code - start_code));  DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, (int)(end_code - start_code)));
570    
571  /* Loop for scanning the subject */  /* Loop for scanning the subject */
572    
# Line 453  for (;;) Line 575  for (;;)
575    {    {
576    int i, j;    int i, j;
577    int clen, dlen;    int clen, dlen;
578    unsigned int c, d;    pcre_uint32 c, d;
579    int forced_fail = 0;    int forced_fail = 0;
580    int reached_end = 0;    BOOL partial_newline = FALSE;
581      BOOL could_continue = reset_could_continue;
582      reset_could_continue = FALSE;
583    
584    /* Make the new state list into the active state list and empty the    /* Make the new state list into the active state list and empty the
585    new state list. */    new state list. */
# Line 469  for (;;) Line 593  for (;;)
593    workspace[0] ^= 1;              /* Remember for the restarting feature */    workspace[0] ^= 1;              /* Remember for the restarting feature */
594    workspace[1] = active_count;    workspace[1] = active_count;
595    
596  #ifdef DEBUG  #ifdef PCRE_DEBUG
597    printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);    printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
598    pchars((uschar *)ptr, strlen((char *)ptr), stdout);    pchars(ptr, STRLEN_UC(ptr), stdout);
599    printf("\"\n");    printf("\"\n");
600    
601    printf("%.*sActive states: ", rlevel*2-2, SP);    printf("%.*sActive states: ", rlevel*2-2, SP);
# Line 491  for (;;) Line 615  for (;;)
615    
616    if (ptr < end_subject)    if (ptr < end_subject)
617      {      {
618      clen = 1;        /* Number of bytes in the character */      clen = 1;        /* Number of data items in the character */
619  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
620      if (utf8) { GETCHARLEN(c, ptr, clen); } else      GETCHARLENTEST(c, ptr, clen);
621  #endif  /* SUPPORT_UTF8 */  #else
622      c = *ptr;      c = *ptr;
623    #endif  /* SUPPORT_UTF */
624      }      }
625    else    else
626      {      {
# Line 511  for (;;) Line 636  for (;;)
636    for (i = 0; i < active_count; i++)    for (i = 0; i < active_count; i++)
637      {      {
638      stateblock *current_state = active_states + i;      stateblock *current_state = active_states + i;
639      const uschar *code;      BOOL caseless = FALSE;
640        const pcre_uchar *code;
641      int state_offset = current_state->offset;      int state_offset = current_state->offset;
642      int count, codevalue, rrc;      int codevalue, rrc;
643        int count;
644    
645  #ifdef DEBUG  #ifdef PCRE_DEBUG
646      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
647      if (clen == 0) printf("EOL\n");      if (clen == 0) printf("EOL\n");
648        else if (c > 32 && c < 127) printf("'%c'\n", c);        else if (c > 32 && c < 127) printf("'%c'\n", c);
649          else printf("0x%02x\n", c);          else printf("0x%02x\n", c);
650  #endif  #endif
651    
     /* This variable is referred to implicity in the ADD_xxx macros. */  
   
     ims = current_state->ims;  
   
652      /* A negative offset is a special case meaning "hold off going to this      /* A negative offset is a special case meaning "hold off going to this
653      (negated) state until the number of characters in the data field have      (negated) state until the number of characters in the data field have
654      been skipped". */      been skipped". If the could_continue flag was passed over from a previous
655        state, arrange for it to passed on. */
656    
657      if (state_offset < 0)      if (state_offset < 0)
658        {        {
# Line 537  for (;;) Line 661  for (;;)
661          DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));          DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
662          ADD_NEW_DATA(state_offset, current_state->count,          ADD_NEW_DATA(state_offset, current_state->count,
663            current_state->data - 1);            current_state->data - 1);
664            if (could_continue) reset_could_continue = TRUE;
665          continue;          continue;
666          }          }
667        else        else
# Line 545  for (;;) Line 670  for (;;)
670          }          }
671        }        }
672    
673      /* Check for a duplicate state with the same count, and skip if found. */      /* Check for a duplicate state with the same count, and skip if found.
674        See the note at the head of this module about the possibility of improving
675        performance here. */
676    
677      for (j = 0; j < i; j++)      for (j = 0; j < i; j++)
678        {        {
# Line 562  for (;;) Line 689  for (;;)
689      code = start_code + state_offset;      code = start_code + state_offset;
690      codevalue = *code;      codevalue = *code;
691    
692        /* If this opcode inspects a character, but we are at the end of the
693        subject, remember the fact for use when testing for a partial match. */
694    
695        if (clen == 0 && poptable[codevalue] != 0)
696          could_continue = TRUE;
697    
698      /* If this opcode is followed by an inline character, load it. It is      /* If this opcode is followed by an inline character, load it. It is
699      tempting to test for the presence of a subject character here, but that      tempting to test for the presence of a subject character here, but that
700      is wrong, because sometimes zero repetitions of the subject are      is wrong, because sometimes zero repetitions of the subject are
701      permitted.      permitted.
702    
703      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
704      argument that is not a data character - but is always one byte long. We      argument that is not a data character - but is always one byte long because
705      have to take special action to deal with  \P, \p, \H, \h, \V, \v and \X in      the values are small. We have to take special action to deal with  \P, \p,
706      this case. To keep the other cases fast, convert these ones to new opcodes.      \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
707      */      these ones to new opcodes. */
708    
709      if (coptable[codevalue] > 0)      if (coptable[codevalue] > 0)
710        {        {
711        dlen = 1;        dlen = 1;
712  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
713        if (utf8) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else        if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
714  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
715        d = code[coptable[codevalue]];        d = code[coptable[codevalue]];
716        if (codevalue >= OP_TYPESTAR)        if (codevalue >= OP_TYPESTAR)
717          {          {
# Line 608  for (;;) Line 741  for (;;)
741    
742      switch (codevalue)      switch (codevalue)
743        {        {
744    /* ========================================================================== */
745          /* These cases are never obeyed. This is a fudge that causes a compile-
746          time error if the vectors coptable or poptable, which are indexed by
747          opcode, are not the correct length. It seems to be the only way to do
748          such a check at compile time, as the sizeof() operator does not work
749          in the C preprocessor. */
750    
751          case OP_TABLE_LENGTH:
752          case OP_TABLE_LENGTH +
753            ((sizeof(coptable) == OP_TABLE_LENGTH) &&
754             (sizeof(poptable) == OP_TABLE_LENGTH)):
755          break;
756    
757  /* ========================================================================== */  /* ========================================================================== */
758        /* Reached a closing bracket. If not at the end of the pattern, carry        /* Reached a closing bracket. If not at the end of the pattern, carry
759        on with the next opcode. Otherwise, unless we have an empty string and        on with the next opcode. For repeating opcodes, also add the repeat
760        PCRE_NOTEMPTY is set, save the match data, shifting up all previous        state. Note that KETRPOS will always be encountered at the end of the
761          subpattern, because the possessive subpattern repeats are always handled
762          using recursive calls. Thus, it never adds any new states.
763    
764          At the end of the (sub)pattern, unless we have an empty string and
765          PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the
766          start of the subject, save the match data, shifting up all previous
767        matches so we always have the longest first. */        matches so we always have the longest first. */
768    
769        case OP_KET:        case OP_KET:
770        case OP_KETRMIN:        case OP_KETRMIN:
771        case OP_KETRMAX:        case OP_KETRMAX:
772          case OP_KETRPOS:
773        if (code != end_code)        if (code != end_code)
774          {          {
775          ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);          ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);
# Line 626  for (;;) Line 778  for (;;)
778            ADD_ACTIVE(state_offset - GET(code, 1), 0);            ADD_ACTIVE(state_offset - GET(code, 1), 0);
779            }            }
780          }          }
781        else        else
782          {          {
783          reached_end++;    /* Count branches that reach the end */          if (ptr > current_subject ||
784          if (ptr > current_subject || (md->moptions & PCRE_NOTEMPTY) == 0)              ((md->moptions & PCRE_NOTEMPTY) == 0 &&
785                  ((md->moptions & PCRE_NOTEMPTY_ATSTART) == 0 ||
786                    current_subject > start_subject + md->start_offset)))
787            {            {
788            if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;            if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
789              else if (match_count > 0 && ++match_count * 2 >= offsetcount)              else if (match_count > 0 && ++match_count * 2 > offsetcount)
790                match_count = 0;                match_count = 0;
791            count = ((match_count == 0)? offsetcount : match_count * 2) - 2;            count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
792            if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));            if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
793            if (offsetcount >= 2)            if (offsetcount >= 2)
794              {              {
795              offsets[0] = current_subject - start_subject;              offsets[0] = (int)(current_subject - start_subject);
796              offsets[1] = ptr - start_subject;              offsets[1] = (int)(ptr - start_subject);
797              DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,              DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
798                offsets[1] - offsets[0], current_subject));                offsets[1] - offsets[0], (char *)current_subject));
799              }              }
800            if ((md->moptions & PCRE_DFA_SHORTEST) != 0)            if ((md->moptions & PCRE_DFA_SHORTEST) != 0)
801              {              {
# Line 650  for (;;) Line 804  for (;;)
804                match_count, rlevel*2-2, SP));                match_count, rlevel*2-2, SP));
805              return match_count;              return match_count;
806              }              }
807            }            }
808          }          }
809        break;        break;
810    
# Line 661  for (;;) Line 815  for (;;)
815        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
816        case OP_ALT:        case OP_ALT:
817        do { code += GET(code, 1); } while (*code == OP_ALT);        do { code += GET(code, 1); } while (*code == OP_ALT);
818        ADD_ACTIVE(code - start_code, 0);        ADD_ACTIVE((int)(code - start_code), 0);
819        break;        break;
820    
821        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
# Line 669  for (;;) Line 823  for (;;)
823        case OP_SBRA:        case OP_SBRA:
824        do        do
825          {          {
826          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);          ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
827          code += GET(code, 1);          code += GET(code, 1);
828          }          }
829        while (*code == OP_ALT);        while (*code == OP_ALT);
# Line 678  for (;;) Line 832  for (;;)
832        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
833        case OP_CBRA:        case OP_CBRA:
834        case OP_SCBRA:        case OP_SCBRA:
835        ADD_ACTIVE(code - start_code + 3 + LINK_SIZE,  0);        ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE + IMM2_SIZE),  0);
836        code += GET(code, 1);        code += GET(code, 1);
837        while (*code == OP_ALT)        while (*code == OP_ALT)
838          {          {
839          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE,  0);          ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE),  0);
840          code += GET(code, 1);          code += GET(code, 1);
841          }          }
842        break;        break;
# Line 693  for (;;) Line 847  for (;;)
847        ADD_ACTIVE(state_offset + 1, 0);        ADD_ACTIVE(state_offset + 1, 0);
848        code += 1 + GET(code, 2);        code += 1 + GET(code, 2);
849        while (*code == OP_ALT) code += GET(code, 1);        while (*code == OP_ALT) code += GET(code, 1);
850        ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);        ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
851        break;        break;
852    
853        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
854        case OP_SKIPZERO:        case OP_SKIPZERO:
855        code += 1 + GET(code, 2);        code += 1 + GET(code, 2);
856        while (*code == OP_ALT) code += GET(code, 1);        while (*code == OP_ALT) code += GET(code, 1);
857        ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);        ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
858        break;        break;
859    
860        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
861        case OP_CIRC:        case OP_CIRC:
862        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||        if (ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0)
           ((ims & PCRE_MULTILINE) != 0 &&  
             ptr != end_subject &&  
             WAS_NEWLINE(ptr)))  
863          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
864        break;        break;
865    
866        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
867        case OP_EOD:        case OP_CIRCM:
868        if (ptr >= end_subject) { ADD_ACTIVE(state_offset + 1, 0); }        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
869              (ptr != end_subject && WAS_NEWLINE(ptr)))
870            { ADD_ACTIVE(state_offset + 1, 0); }
871        break;        break;
872    
873        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
874        case OP_OPT:        case OP_EOD:
875        ims = code[1];        if (ptr >= end_subject)
876        ADD_ACTIVE(state_offset + 2, 0);          {
877            if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
878              could_continue = TRUE;
879            else { ADD_ACTIVE(state_offset + 1, 0); }
880            }
881        break;        break;
882    
883        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
# Line 743  for (;;) Line 900  for (;;)
900        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
901        case OP_ANY:        case OP_ANY:
902        if (clen > 0 && !IS_NEWLINE(ptr))        if (clen > 0 && !IS_NEWLINE(ptr))
903          { ADD_NEW(state_offset + 1, 0); }          {
904            if (ptr + 1 >= md->end_subject &&
905                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
906                NLBLOCK->nltype == NLTYPE_FIXED &&
907                NLBLOCK->nllen == 2 &&
908                c == NLBLOCK->nl[0])
909              {
910              could_continue = partial_newline = TRUE;
911              }
912            else
913              {
914              ADD_NEW(state_offset + 1, 0);
915              }
916            }
917        break;        break;
918    
919        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
# Line 754  for (;;) Line 924  for (;;)
924    
925        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
926        case OP_EODN:        case OP_EODN:
927        if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))        if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
928            could_continue = TRUE;
929          else if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
930          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
931        break;        break;
932    
# Line 762  for (;;) Line 934  for (;;)
934        case OP_DOLL:        case OP_DOLL:
935        if ((md->moptions & PCRE_NOTEOL) == 0)        if ((md->moptions & PCRE_NOTEOL) == 0)
936          {          {
937          if (clen == 0 ||          if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
938              could_continue = TRUE;
939            else if (clen == 0 ||
940              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&
941                 ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)                 (ptr == end_subject - md->nllen)
942              ))              ))
943            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
944            else if (ptr + 1 >= md->end_subject &&
945                     (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
946                     NLBLOCK->nltype == NLTYPE_FIXED &&
947                     NLBLOCK->nllen == 2 &&
948                     c == NLBLOCK->nl[0])
949              {
950              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
951                {
952                reset_could_continue = TRUE;
953                ADD_NEW_DATA(-(state_offset + 1), 0, 1);
954                }
955              else could_continue = partial_newline = TRUE;
956              }
957            }
958          break;
959    
960          /*-----------------------------------------------------------------*/
961          case OP_DOLLM:
962          if ((md->moptions & PCRE_NOTEOL) == 0)
963            {
964            if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
965              could_continue = TRUE;
966            else if (clen == 0 ||
967                ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
968              { ADD_ACTIVE(state_offset + 1, 0); }
969            else if (ptr + 1 >= md->end_subject &&
970                     (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
971                     NLBLOCK->nltype == NLTYPE_FIXED &&
972                     NLBLOCK->nllen == 2 &&
973                     c == NLBLOCK->nl[0])
974              {
975              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
976                {
977                reset_could_continue = TRUE;
978                ADD_NEW_DATA(-(state_offset + 1), 0, 1);
979                }
980              else could_continue = partial_newline = TRUE;
981              }
982          }          }
983        else if ((ims & PCRE_MULTILINE) != 0 && IS_NEWLINE(ptr))        else if (IS_NEWLINE(ptr))
984          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
985        break;        break;
986    
# Line 799  for (;;) Line 1011  for (;;)
1011    
1012          if (ptr > start_subject)          if (ptr > start_subject)
1013            {            {
1014            const uschar *temp = ptr - 1;            const pcre_uchar *temp = ptr - 1;
1015  #ifdef SUPPORT_UTF8            if (temp < md->start_used_ptr) md->start_used_ptr = temp;
1016            if (utf8) BACKCHAR(temp);  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
1017              if (utf) { BACKCHAR(temp); }
1018  #endif  #endif
1019            GETCHARTEST(d, temp);            GETCHARTEST(d, temp);
1020    #ifdef SUPPORT_UCP
1021              if ((md->poptions & PCRE_UCP) != 0)
1022                {
1023                if (d == '_') left_word = TRUE; else
1024                  {
1025                  int cat = UCD_CATEGORY(d);
1026                  left_word = (cat == ucp_L || cat == ucp_N);
1027                  }
1028                }
1029              else
1030    #endif
1031            left_word = d < 256 && (ctypes[d] & ctype_word) != 0;            left_word = d < 256 && (ctypes[d] & ctype_word) != 0;
1032            }            }
1033          else left_word = 0;          else left_word = FALSE;
1034    
1035          if (clen > 0)          if (clen > 0)
1036              {
1037    #ifdef SUPPORT_UCP
1038              if ((md->poptions & PCRE_UCP) != 0)
1039                {
1040                if (c == '_') right_word = TRUE; else
1041                  {
1042                  int cat = UCD_CATEGORY(c);
1043                  right_word = (cat == ucp_L || cat == ucp_N);
1044                  }
1045                }
1046              else
1047    #endif
1048            right_word = c < 256 && (ctypes[c] & ctype_word) != 0;            right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
1049          else              /* This is a fudge to ensure that if this is the */            }
1050            {               /* last item in the pattern, we don't count it as */          else right_word = FALSE;
           reached_end--;  /* reached, thus disabling a partial match. */  
           right_word = 0;  
           }  
1051    
1052          if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))          if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))
1053            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
# Line 833  for (;;) Line 1066  for (;;)
1066        if (clen > 0)        if (clen > 0)
1067          {          {
1068          BOOL OK;          BOOL OK;
1069            const pcre_uint32 *cp;
1070          const ucd_record * prop = GET_UCD(c);          const ucd_record * prop = GET_UCD(c);
1071          switch(code[1])          switch(code[1])
1072            {            {
# Line 841  for (;;) Line 1075  for (;;)
1075            break;            break;
1076    
1077            case PT_LAMP:            case PT_LAMP:
1078            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1079                   prop->chartype == ucp_Lt;
1080            break;            break;
1081    
1082            case PT_GC:            case PT_GC:
1083            OK = _pcre_ucp_gentype[prop->chartype] == code[2];            OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
1084            break;            break;
1085    
1086            case PT_PC:            case PT_PC:
# Line 856  for (;;) Line 1091  for (;;)
1091            OK = prop->script == code[2];            OK = prop->script == code[2];
1092            break;            break;
1093    
1094              /* These are specials for combination cases. */
1095    
1096              case PT_ALNUM:
1097              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1098                   PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1099              break;
1100    
1101              /* Perl space used to exclude VT, but from Perl 5.18 it is included,
1102              which means that Perl space and POSIX space are now identical. PCRE
1103              was changed at release 8.34. */
1104    
1105              case PT_SPACE:    /* Perl space */
1106              case PT_PXSPACE:  /* POSIX space */
1107              switch(c)
1108                {
1109                HSPACE_CASES:
1110                VSPACE_CASES:
1111                OK = TRUE;
1112                break;
1113    
1114                default:
1115                OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
1116                break;
1117                }
1118              break;
1119    
1120              case PT_WORD:
1121              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1122                   PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1123                   c == CHAR_UNDERSCORE;
1124              break;
1125    
1126              case PT_CLIST:
1127              cp = PRIV(ucd_caseless_sets) + code[2];
1128              for (;;)
1129                {
1130                if (c < *cp) { OK = FALSE; break; }
1131                if (c == *cp++) { OK = TRUE; break; }
1132                }
1133              break;
1134    
1135              case PT_UCNC:
1136              OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1137                   c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1138                   c >= 0xe000;
1139              break;
1140    
1141            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
1142    
1143            default:            default:
# Line 883  for (;;) Line 1165  for (;;)
1165        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1166        if (clen > 0)        if (clen > 0)
1167          {          {
1168          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1169                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1170                NLBLOCK->nltype == NLTYPE_FIXED &&
1171                NLBLOCK->nllen == 2 &&
1172                c == NLBLOCK->nl[0])
1173              {
1174              could_continue = partial_newline = TRUE;
1175              }
1176            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1177              (c < 256 &&              (c < 256 &&
1178                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1179                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 906  for (;;) Line 1196  for (;;)
1196        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1197        if (clen > 0)        if (clen > 0)
1198          {          {
1199          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1200                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1201                NLBLOCK->nltype == NLTYPE_FIXED &&
1202                NLBLOCK->nllen == 2 &&
1203                c == NLBLOCK->nl[0])
1204              {
1205              could_continue = partial_newline = TRUE;
1206              }
1207            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1208              (c < 256 &&              (c < 256 &&
1209                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1210                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 928  for (;;) Line 1226  for (;;)
1226        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1227        if (clen > 0)        if (clen > 0)
1228          {          {
1229          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1230                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1231                NLBLOCK->nltype == NLTYPE_FIXED &&
1232                NLBLOCK->nllen == 2 &&
1233                c == NLBLOCK->nl[0])
1234              {
1235              could_continue = partial_newline = TRUE;
1236              }
1237            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1238              (c < 256 &&              (c < 256 &&
1239                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1240                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 948  for (;;) Line 1254  for (;;)
1254        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1255        if (clen > 0)        if (clen > 0)
1256          {          {
1257          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1258                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1259                NLBLOCK->nltype == NLTYPE_FIXED &&
1260                NLBLOCK->nllen == 2 &&
1261                c == NLBLOCK->nl[0])
1262              {
1263              could_continue = partial_newline = TRUE;
1264              }
1265            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1266              (c < 256 &&              (c < 256 &&
1267                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1268                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1269            {            {
1270            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
1271              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
1272            else            else
1273              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
1274            }            }
# Line 965  for (;;) Line 1279  for (;;)
1279        case OP_TYPEUPTO:        case OP_TYPEUPTO:
1280        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
1281        case OP_TYPEPOSUPTO:        case OP_TYPEPOSUPTO:
1282        ADD_ACTIVE(state_offset + 4, 0);        ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0);
1283        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1284        if (clen > 0)        if (clen > 0)
1285          {          {
1286          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1287                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1288                NLBLOCK->nltype == NLTYPE_FIXED &&
1289                NLBLOCK->nllen == 2 &&
1290                c == NLBLOCK->nl[0])
1291              {
1292              could_continue = partial_newline = TRUE;
1293              }
1294            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1295              (c < 256 &&              (c < 256 &&
1296                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1297                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 979  for (;;) Line 1301  for (;;)
1301              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1302              next_active_state--;              next_active_state--;
1303              }              }
1304            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
1305              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
1306            else            else
1307              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
1308            }            }
# Line 1002  for (;;) Line 1324  for (;;)
1324        if (clen > 0)        if (clen > 0)
1325          {          {
1326          BOOL OK;          BOOL OK;
1327            const pcre_uint32 *cp;
1328          const ucd_record * prop = GET_UCD(c);          const ucd_record * prop = GET_UCD(c);
1329          switch(code[2])          switch(code[2])
1330            {            {
# Line 1010  for (;;) Line 1333  for (;;)
1333            break;            break;
1334    
1335            case PT_LAMP:            case PT_LAMP:
1336            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1337                prop->chartype == ucp_Lt;
1338            break;            break;
1339    
1340            case PT_GC:            case PT_GC:
1341            OK = _pcre_ucp_gentype[prop->chartype] == code[3];            OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1342            break;            break;
1343    
1344            case PT_PC:            case PT_PC:
# Line 1025  for (;;) Line 1349  for (;;)
1349            OK = prop->script == code[3];            OK = prop->script == code[3];
1350            break;            break;
1351    
1352              /* These are specials for combination cases. */
1353    
1354              case PT_ALNUM:
1355              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1356                   PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1357              break;
1358    
1359              /* Perl space used to exclude VT, but from Perl 5.18 it is included,
1360              which means that Perl space and POSIX space are now identical. PCRE
1361              was changed at release 8.34. */
1362    
1363              case PT_SPACE:    /* Perl space */
1364              case PT_PXSPACE:  /* POSIX space */
1365              switch(c)
1366                {
1367                HSPACE_CASES:
1368                VSPACE_CASES:
1369                OK = TRUE;
1370                break;
1371    
1372                default:
1373                OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
1374                break;
1375                }
1376              break;
1377    
1378              case PT_WORD:
1379              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1380                   PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1381                   c == CHAR_UNDERSCORE;
1382              break;
1383    
1384              case PT_CLIST:
1385              cp = PRIV(ucd_caseless_sets) + code[3];
1386              for (;;)
1387                {
1388                if (c < *cp) { OK = FALSE; break; }
1389                if (c == *cp++) { OK = TRUE; break; }
1390                }
1391              break;
1392    
1393              case PT_UCNC:
1394              OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1395                   c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1396                   c >= 0xe000;
1397              break;
1398    
1399            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
1400    
1401            default:            default:
# Line 1051  for (;;) Line 1422  for (;;)
1422        case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:        case OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS:
1423        count = current_state->count;  /* Already matched */        count = current_state->count;  /* Already matched */
1424        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1425        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
1426          {          {
1427          const uschar *nptr = ptr + clen;          int lgb, rgb;
1428            const pcre_uchar *nptr = ptr + clen;
1429          int ncount = 0;          int ncount = 0;
1430          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
1431            {            {
1432            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1433            next_active_state--;            next_active_state--;
1434            }            }
1435            lgb = UCD_GRAPHBREAK(c);
1436          while (nptr < end_subject)          while (nptr < end_subject)
1437            {            {
1438            int nd;            dlen = 1;
1439            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1440            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1441            if (UCD_CATEGORY(nd) != ucp_M) break;            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1442            ncount++;            ncount++;
1443            nptr += ndlen;            lgb = rgb;
1444              nptr += dlen;
1445            }            }
1446          count++;          count++;
1447          ADD_NEW_DATA(-state_offset, count, ncount);          ADD_NEW_DATA(-state_offset, count, ncount);
# Line 1086  for (;;) Line 1460  for (;;)
1460          int ncount = 0;          int ncount = 0;
1461          switch (c)          switch (c)
1462            {            {
1463            case 0x000b:            case CHAR_VT:
1464            case 0x000c:            case CHAR_FF:
1465            case 0x0085:            case CHAR_NEL:
1466    #ifndef EBCDIC
1467            case 0x2028:            case 0x2028:
1468            case 0x2029:            case 0x2029:
1469    #endif  /* Not EBCDIC */
1470            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1471            goto ANYNL01;            goto ANYNL01;
1472    
1473            case 0x000d:            case CHAR_CR:
1474            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;            if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
1475            /* Fall through */            /* Fall through */
1476    
1477            ANYNL01:            ANYNL01:
1478            case 0x000a:            case CHAR_LF:
1479            if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)            if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1480              {              {
1481              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
# Line 1126  for (;;) Line 1502  for (;;)
1502          BOOL OK;          BOOL OK;
1503          switch (c)          switch (c)
1504            {            {
1505            case 0x000a:            VSPACE_CASES:
           case 0x000b:  
           case 0x000c:  
           case 0x000d:  
           case 0x0085:  
           case 0x2028:  
           case 0x2029:  
1506            OK = TRUE;            OK = TRUE;
1507            break;            break;
1508    
# Line 1165  for (;;) Line 1535  for (;;)
1535          BOOL OK;          BOOL OK;
1536          switch (c)          switch (c)
1537            {            {
1538            case 0x09:      /* HT */            HSPACE_CASES:
           case 0x20:      /* SPACE */  
           case 0xa0:      /* NBSP */  
           case 0x1680:    /* OGHAM SPACE MARK */  
           case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
           case 0x2000:    /* EN QUAD */  
           case 0x2001:    /* EM QUAD */  
           case 0x2002:    /* EN SPACE */  
           case 0x2003:    /* EM SPACE */  
           case 0x2004:    /* THREE-PER-EM SPACE */  
           case 0x2005:    /* FOUR-PER-EM SPACE */  
           case 0x2006:    /* SIX-PER-EM SPACE */  
           case 0x2007:    /* FIGURE SPACE */  
           case 0x2008:    /* PUNCTUATION SPACE */  
           case 0x2009:    /* THIN SPACE */  
           case 0x200A:    /* HAIR SPACE */  
           case 0x202f:    /* NARROW NO-BREAK SPACE */  
           case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
           case 0x3000:    /* IDEOGRAPHIC SPACE */  
1539            OK = TRUE;            OK = TRUE;
1540            break;            break;
1541    
# Line 1224  for (;;) Line 1576  for (;;)
1576        if (clen > 0)        if (clen > 0)
1577          {          {
1578          BOOL OK;          BOOL OK;
1579            const pcre_uint32 *cp;
1580          const ucd_record * prop = GET_UCD(c);          const ucd_record * prop = GET_UCD(c);
1581          switch(code[2])          switch(code[2])
1582            {            {
# Line 1232  for (;;) Line 1585  for (;;)
1585            break;            break;
1586    
1587            case PT_LAMP:            case PT_LAMP:
1588            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1589                prop->chartype == ucp_Lt;
1590            break;            break;
1591    
1592            case PT_GC:            case PT_GC:
1593            OK = _pcre_ucp_gentype[prop->chartype] == code[3];            OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1594            break;            break;
1595    
1596            case PT_PC:            case PT_PC:
# Line 1247  for (;;) Line 1601  for (;;)
1601            OK = prop->script == code[3];            OK = prop->script == code[3];
1602            break;            break;
1603    
1604              /* These are specials for combination cases. */
1605    
1606              case PT_ALNUM:
1607              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1608                   PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1609              break;
1610    
1611              /* Perl space used to exclude VT, but from Perl 5.18 it is included,
1612              which means that Perl space and POSIX space are now identical. PCRE
1613              was changed at release 8.34. */
1614    
1615              case PT_SPACE:    /* Perl space */
1616              case PT_PXSPACE:  /* POSIX space */
1617              switch(c)
1618                {
1619                HSPACE_CASES:
1620                VSPACE_CASES:
1621                OK = TRUE;
1622                break;
1623    
1624                default:
1625                OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
1626                break;
1627                }
1628              break;
1629    
1630              case PT_WORD:
1631              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1632                   PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1633                   c == CHAR_UNDERSCORE;
1634              break;
1635    
1636              case PT_CLIST:
1637              cp = PRIV(ucd_caseless_sets) + code[3];
1638              for (;;)
1639                {
1640                if (c < *cp) { OK = FALSE; break; }
1641                if (c == *cp++) { OK = TRUE; break; }
1642                }
1643              break;
1644    
1645              case PT_UCNC:
1646              OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1647                   c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1648                   c >= 0xe000;
1649              break;
1650    
1651            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
1652    
1653            default:            default:
# Line 1282  for (;;) Line 1683  for (;;)
1683        QS2:        QS2:
1684    
1685        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1686        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
1687          {          {
1688          const uschar *nptr = ptr + clen;          int lgb, rgb;
1689            const pcre_uchar *nptr = ptr + clen;
1690          int ncount = 0;          int ncount = 0;
1691          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
1692              codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)              codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
# Line 1292  for (;;) Line 1694  for (;;)
1694            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1695            next_active_state--;            next_active_state--;
1696            }            }
1697            lgb = UCD_GRAPHBREAK(c);
1698          while (nptr < end_subject)          while (nptr < end_subject)
1699            {            {
1700            int nd;            dlen = 1;
1701            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1702            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1703            if (UCD_CATEGORY(nd) != ucp_M) break;            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1704            ncount++;            ncount++;
1705            nptr += ndlen;            lgb = rgb;
1706              nptr += dlen;
1707            }            }
1708          ADD_NEW_DATA(-(state_offset + count), 0, ncount);          ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1709          }          }
# Line 1325  for (;;) Line 1729  for (;;)
1729          int ncount = 0;          int ncount = 0;
1730          switch (c)          switch (c)
1731            {            {
1732            case 0x000b:            case CHAR_VT:
1733            case 0x000c:            case CHAR_FF:
1734            case 0x0085:            case CHAR_NEL:
1735    #ifndef EBCDIC
1736            case 0x2028:            case 0x2028:
1737            case 0x2029:            case 0x2029:
1738    #endif  /* Not EBCDIC */
1739            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1740            goto ANYNL02;            goto ANYNL02;
1741    
1742            case 0x000d:            case CHAR_CR:
1743            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;            if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
1744            /* Fall through */            /* Fall through */
1745    
1746            ANYNL02:            ANYNL02:
1747            case 0x000a:            case CHAR_LF:
1748            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1749                codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)                codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1750              {              {
1751              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1752              next_active_state--;              next_active_state--;
1753              }              }
1754            ADD_NEW_DATA(-(state_offset + count), 0, ncount);            ADD_NEW_DATA(-(state_offset + (int)count), 0, ncount);
1755            break;            break;
1756    
1757            default:            default:
# Line 1373  for (;;) Line 1779  for (;;)
1779          BOOL OK;          BOOL OK;
1780          switch (c)          switch (c)
1781            {            {
1782            case 0x000a:            VSPACE_CASES:
           case 0x000b:  
           case 0x000c:  
           case 0x000d:  
           case 0x0085:  
           case 0x2028:  
           case 0x2029:  
1783            OK = TRUE;            OK = TRUE;
1784            break;            break;
1785    
# Line 1395  for (;;) Line 1795  for (;;)
1795              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1796              next_active_state--;              next_active_state--;
1797              }              }
1798            ADD_NEW_DATA(-(state_offset + count), 0, 0);            ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
1799            }            }
1800          }          }
1801        break;        break;
# Line 1419  for (;;) Line 1819  for (;;)
1819          BOOL OK;          BOOL OK;
1820          switch (c)          switch (c)
1821            {            {
1822            case 0x09:      /* HT */            HSPACE_CASES:
           case 0x20:      /* SPACE */  
           case 0xa0:      /* NBSP */  
           case 0x1680:    /* OGHAM SPACE MARK */  
           case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
           case 0x2000:    /* EN QUAD */  
           case 0x2001:    /* EM QUAD */  
           case 0x2002:    /* EN SPACE */  
           case 0x2003:    /* EM SPACE */  
           case 0x2004:    /* THREE-PER-EM SPACE */  
           case 0x2005:    /* FOUR-PER-EM SPACE */  
           case 0x2006:    /* SIX-PER-EM SPACE */  
           case 0x2007:    /* FIGURE SPACE */  
           case 0x2008:    /* PUNCTUATION SPACE */  
           case 0x2009:    /* THIN SPACE */  
           case 0x200A:    /* HAIR SPACE */  
           case 0x202f:    /* NARROW NO-BREAK SPACE */  
           case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
           case 0x3000:    /* IDEOGRAPHIC SPACE */  
1823            OK = TRUE;            OK = TRUE;
1824            break;            break;
1825    
# Line 1454  for (;;) Line 1836  for (;;)
1836              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1837              next_active_state--;              next_active_state--;
1838              }              }
1839            ADD_NEW_DATA(-(state_offset + count), 0, 0);            ADD_NEW_DATA(-(state_offset + (int)count), 0, 0);
1840            }            }
1841          }          }
1842        break;        break;
# Line 1466  for (;;) Line 1848  for (;;)
1848        case OP_PROP_EXTRA + OP_TYPEMINUPTO:        case OP_PROP_EXTRA + OP_TYPEMINUPTO:
1849        case OP_PROP_EXTRA + OP_TYPEPOSUPTO:        case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
1850        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1851          { ADD_ACTIVE(state_offset + 6, 0); }          { ADD_ACTIVE(state_offset + 1 + IMM2_SIZE + 3, 0); }
1852        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1853        if (clen > 0)        if (clen > 0)
1854          {          {
1855          BOOL OK;          BOOL OK;
1856            const pcre_uint32 *cp;
1857          const ucd_record * prop = GET_UCD(c);          const ucd_record * prop = GET_UCD(c);
1858          switch(code[4])          switch(code[1 + IMM2_SIZE + 1])
1859            {            {
1860            case PT_ANY:            case PT_ANY:
1861            OK = TRUE;            OK = TRUE;
1862            break;            break;
1863    
1864            case PT_LAMP:            case PT_LAMP:
1865            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1866                prop->chartype == ucp_Lt;
1867            break;            break;
1868    
1869            case PT_GC:            case PT_GC:
1870            OK = _pcre_ucp_gentype[prop->chartype] == code[5];            OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
1871            break;            break;
1872    
1873            case PT_PC:            case PT_PC:
1874            OK = prop->chartype == code[5];            OK = prop->chartype == code[1 + IMM2_SIZE + 2];
1875            break;            break;
1876    
1877            case PT_SC:            case PT_SC:
1878            OK = prop->script == code[5];            OK = prop->script == code[1 + IMM2_SIZE + 2];
1879              break;
1880    
1881              /* These are specials for combination cases. */
1882    
1883              case PT_ALNUM:
1884              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1885                   PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1886              break;
1887    
1888              /* Perl space used to exclude VT, but from Perl 5.18 it is included,
1889              which means that Perl space and POSIX space are now identical. PCRE
1890              was changed at release 8.34. */
1891    
1892              case PT_SPACE:    /* Perl space */
1893              case PT_PXSPACE:  /* POSIX space */
1894              switch(c)
1895                {
1896                HSPACE_CASES:
1897                VSPACE_CASES:
1898                OK = TRUE;
1899                break;
1900    
1901                default:
1902                OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z;
1903                break;
1904                }
1905              break;
1906    
1907              case PT_WORD:
1908              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1909                   PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1910                   c == CHAR_UNDERSCORE;
1911              break;
1912    
1913              case PT_CLIST:
1914              cp = PRIV(ucd_caseless_sets) + code[1 + IMM2_SIZE + 2];
1915              for (;;)
1916                {
1917                if (c < *cp) { OK = FALSE; break; }
1918                if (c == *cp++) { OK = TRUE; break; }
1919                }
1920              break;
1921    
1922              case PT_UCNC:
1923              OK = c == CHAR_DOLLAR_SIGN || c == CHAR_COMMERCIAL_AT ||
1924                   c == CHAR_GRAVE_ACCENT || (c >= 0xa0 && c <= 0xd7ff) ||
1925                   c >= 0xe000;
1926            break;            break;
1927    
1928            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
# Line 1508  for (;;) Line 1939  for (;;)
1939              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1940              next_active_state--;              next_active_state--;
1941              }              }
1942            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
1943              { ADD_NEW(state_offset + 6, 0); }              { ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
1944            else            else
1945              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
1946            }            }
# Line 1522  for (;;) Line 1953  for (;;)
1953        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
1954        case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
1955        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1956          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1957        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1958        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
1959          {          {
1960          const uschar *nptr = ptr + clen;          int lgb, rgb;
1961            const pcre_uchar *nptr = ptr + clen;
1962          int ncount = 0;          int ncount = 0;
1963          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
1964            {            {
1965            active_count--;           /* Remove non-match possibility */            active_count--;           /* Remove non-match possibility */
1966            next_active_state--;            next_active_state--;
1967            }            }
1968            lgb = UCD_GRAPHBREAK(c);
1969          while (nptr < end_subject)          while (nptr < end_subject)
1970            {            {
1971            int nd;            dlen = 1;
1972            int ndlen = 1;            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
1973            GETCHARLEN(nd, nptr, ndlen);            rgb = UCD_GRAPHBREAK(d);
1974            if (UCD_CATEGORY(nd) != ucp_M) break;            if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
1975            ncount++;            ncount++;
1976            nptr += ndlen;            lgb = rgb;
1977              nptr += dlen;
1978            }            }
1979          if (++count >= GET2(code, 1))          if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
1980            { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }              reset_could_continue = TRUE;
1981            if (++count >= (int)GET2(code, 1))
1982              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1983          else          else
1984            { ADD_NEW_DATA(-state_offset, count, ncount); }            { ADD_NEW_DATA(-state_offset, count, ncount); }
1985          }          }
# Line 1556  for (;;) Line 1992  for (;;)
1992        case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:        case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
1993        case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:        case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
1994        if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
1995          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1996        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1997        if (clen > 0)        if (clen > 0)
1998          {          {
1999          int ncount = 0;          int ncount = 0;
2000          switch (c)          switch (c)
2001            {            {
2002            case 0x000b:            case CHAR_VT:
2003            case 0x000c:            case CHAR_FF:
2004            case 0x0085:            case CHAR_NEL:
2005    #ifndef EBCDIC
2006            case 0x2028:            case 0x2028:
2007            case 0x2029:            case 0x2029:
2008    #endif  /* Not EBCDIC */
2009            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
2010            goto ANYNL03;            goto ANYNL03;
2011    
2012            case 0x000d:            case CHAR_CR:
2013            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;            if (ptr + 1 < end_subject && RAWUCHARTEST(ptr + 1) == CHAR_LF) ncount = 1;
2014            /* Fall through */            /* Fall through */
2015    
2016            ANYNL03:            ANYNL03:
2017            case 0x000a:            case CHAR_LF:
2018            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
2019              {              {
2020              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
2021              next_active_state--;              next_active_state--;
2022              }              }
2023            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
2024              { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
2025            else            else
2026              { ADD_NEW_DATA(-state_offset, count, ncount); }              { ADD_NEW_DATA(-state_offset, count, ncount); }
2027            break;            break;
# Line 1600  for (;;) Line 2038  for (;;)
2038        case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:        case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
2039        case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:        case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
2040        if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
2041          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
2042        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2043        if (clen > 0)        if (clen > 0)
2044          {          {
2045          BOOL OK;          BOOL OK;
2046          switch (c)          switch (c)
2047            {            {
2048            case 0x000a:            VSPACE_CASES:
           case 0x000b:  
           case 0x000c:  
           case 0x000d:  
           case 0x0085:  
           case 0x2028:  
           case 0x2029:  
2049            OK = TRUE;            OK = TRUE;
2050            break;            break;
2051    
# Line 1628  for (;;) Line 2060  for (;;)
2060              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
2061              next_active_state--;              next_active_state--;
2062              }              }
2063            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
2064              { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
2065            else            else
2066              { ADD_NEW_DATA(-state_offset, count, 0); }              { ADD_NEW_DATA(-state_offset, count, 0); }
2067            }            }
# Line 1642  for (;;) Line 2074  for (;;)
2074        case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:        case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
2075        case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:        case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
2076        if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
2077          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
2078        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2079        if (clen > 0)        if (clen > 0)
2080          {          {
2081          BOOL OK;          BOOL OK;
2082          switch (c)          switch (c)
2083            {            {
2084            case 0x09:      /* HT */            HSPACE_CASES:
           case 0x20:      /* SPACE */  
           case 0xa0:      /* NBSP */  
           case 0x1680:    /* OGHAM SPACE MARK */  
           case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
           case 0x2000:    /* EN QUAD */  
           case 0x2001:    /* EM QUAD */  
           case 0x2002:    /* EN SPACE */  
           case 0x2003:    /* EM SPACE */  
           case 0x2004:    /* THREE-PER-EM SPACE */  
           case 0x2005:    /* FOUR-PER-EM SPACE */  
           case 0x2006:    /* SIX-PER-EM SPACE */  
           case 0x2007:    /* FIGURE SPACE */  
           case 0x2008:    /* PUNCTUATION SPACE */  
           case 0x2009:    /* THIN SPACE */  
           case 0x200A:    /* HAIR SPACE */  
           case 0x202f:    /* NARROW NO-BREAK SPACE */  
           case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
           case 0x3000:    /* IDEOGRAPHIC SPACE */  
2085            OK = TRUE;            OK = TRUE;
2086            break;            break;
2087    
# Line 1683  for (;;) Line 2097  for (;;)
2097              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
2098              next_active_state--;              next_active_state--;
2099              }              }
2100            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
2101              { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
2102            else            else
2103              { ADD_NEW_DATA(-state_offset, count, 0); }              { ADD_NEW_DATA(-state_offset, count, 0); }
2104            }            }
# Line 1703  for (;;) Line 2117  for (;;)
2117        break;        break;
2118    
2119        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2120        case OP_CHARNC:        case OP_CHARI:
2121        if (clen == 0) break;        if (clen == 0) break;
2122    
2123  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2124        if (utf8)        if (utf)
2125          {          {
2126          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
2127            {            {
2128            unsigned int othercase;            unsigned int othercase;
2129            if (c < 128) othercase = fcc[c]; else            if (c < 128)
2130                othercase = fcc[c];
2131            /* If we have Unicode property support, we can use it to test the            else
2132            other case of the character. */              /* If we have Unicode property support, we can use it to test the
2133                other case of the character. */
2134  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2135            othercase = UCD_OTHERCASE(c);              othercase = UCD_OTHERCASE(c);
2136  #else  #else
2137            othercase = NOTACHAR;              othercase = NOTACHAR;
2138  #endif  #endif
2139    
2140            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
2141            }            }
2142          }          }
2143        else        else
2144  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2145          /* Not UTF mode */
       /* Non-UTF-8 mode */  
2146          {          {
2147          if (lcc[c] == lcc[d]) { ADD_NEW(state_offset + 2, 0); }          if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
2148              { ADD_NEW(state_offset + 2, 0); }
2149          }          }
2150        break;        break;
2151    
# Line 1743  for (;;) Line 2157  for (;;)
2157        to wait for them to pass before continuing. */        to wait for them to pass before continuing. */
2158    
2159        case OP_EXTUNI:        case OP_EXTUNI:
2160        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0)
2161          {          {
2162          const uschar *nptr = ptr + clen;          int lgb, rgb;
2163            const pcre_uchar *nptr = ptr + clen;
2164          int ncount = 0;          int ncount = 0;
2165            lgb = UCD_GRAPHBREAK(c);
2166          while (nptr < end_subject)          while (nptr < end_subject)
2167            {            {
2168            int nclen = 1;            dlen = 1;
2169            GETCHARLEN(c, nptr, nclen);            if (!utf) d = *nptr; else { GETCHARLEN(d, nptr, dlen); }
2170            if (UCD_CATEGORY(c) != ucp_M) break;            rgb = UCD_GRAPHBREAK(d);
2171              if ((PRIV(ucp_gbtable)[lgb] & (1 << rgb)) == 0) break;
2172            ncount++;            ncount++;
2173            nptr += nclen;            lgb = rgb;
2174              nptr += dlen;
2175            }            }
2176            if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
2177                reset_could_continue = TRUE;
2178          ADD_NEW_DATA(-(state_offset + 1), 0, ncount);          ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
2179          }          }
2180        break;        break;
# Line 1768  for (;;) Line 2188  for (;;)
2188        case OP_ANYNL:        case OP_ANYNL:
2189        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2190          {          {
2191          case 0x000b:          case CHAR_VT:
2192          case 0x000c:          case CHAR_FF:
2193          case 0x0085:          case CHAR_NEL:
2194    #ifndef EBCDIC
2195          case 0x2028:          case 0x2028:
2196          case 0x2029:          case 0x2029:
2197    #endif  /* Not EBCDIC */
2198          if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;          if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
2199    
2200          case 0x000a:          case CHAR_LF:
2201          ADD_NEW(state_offset + 1, 0);          ADD_NEW(state_offset + 1, 0);
2202          break;          break;
2203    
2204          case 0x000d:          case CHAR_CR:
2205          if (ptr + 1 < end_subject && ptr[1] == 0x0a)          if (ptr + 1 >= end_subject)
2206              {
2207              ADD_NEW(state_offset + 1, 0);
2208              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
2209                reset_could_continue = TRUE;
2210              }
2211            else if (RAWUCHARTEST(ptr + 1) == CHAR_LF)
2212            {            {
2213            ADD_NEW_DATA(-(state_offset + 1), 0, 1);            ADD_NEW_DATA(-(state_offset + 1), 0, 1);
2214            }            }
# Line 1796  for (;;) Line 2224  for (;;)
2224        case OP_NOT_VSPACE:        case OP_NOT_VSPACE:
2225        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2226          {          {
2227          case 0x000a:          VSPACE_CASES:
         case 0x000b:  
         case 0x000c:  
         case 0x000d:  
         case 0x0085:  
         case 0x2028:  
         case 0x2029:  
2228          break;          break;
2229    
2230          default:          default:
# Line 1815  for (;;) Line 2237  for (;;)
2237        case OP_VSPACE:        case OP_VSPACE:
2238        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2239          {          {
2240          case 0x000a:          VSPACE_CASES:
         case 0x000b:  
         case 0x000c:  
         case 0x000d:  
         case 0x0085:  
         case 0x2028:  
         case 0x2029:  
2241          ADD_NEW(state_offset + 1, 0);          ADD_NEW(state_offset + 1, 0);
2242          break;          break;
2243    
2244          default: break;          default:
2245            break;
2246          }          }
2247        break;        break;
2248    
# Line 1833  for (;;) Line 2250  for (;;)
2250        case OP_NOT_HSPACE:        case OP_NOT_HSPACE:
2251        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2252          {          {
2253          case 0x09:      /* HT */          HSPACE_CASES:
         case 0x20:      /* SPACE */  
         case 0xa0:      /* NBSP */  
         case 0x1680:    /* OGHAM SPACE MARK */  
         case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
         case 0x2000:    /* EN QUAD */  
         case 0x2001:    /* EM QUAD */  
         case 0x2002:    /* EN SPACE */  
         case 0x2003:    /* EM SPACE */  
         case 0x2004:    /* THREE-PER-EM SPACE */  
         case 0x2005:    /* FOUR-PER-EM SPACE */  
         case 0x2006:    /* SIX-PER-EM SPACE */  
         case 0x2007:    /* FIGURE SPACE */  
         case 0x2008:    /* PUNCTUATION SPACE */  
         case 0x2009:    /* THIN SPACE */  
         case 0x200A:    /* HAIR SPACE */  
         case 0x202f:    /* NARROW NO-BREAK SPACE */  
         case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
         case 0x3000:    /* IDEOGRAPHIC SPACE */  
2254          break;          break;
2255    
2256          default:          default:
# Line 1864  for (;;) Line 2263  for (;;)
2263        case OP_HSPACE:        case OP_HSPACE:
2264        if (clen > 0) switch(c)        if (clen > 0) switch(c)
2265          {          {
2266          case 0x09:      /* HT */          HSPACE_CASES:
         case 0x20:      /* SPACE */  
         case 0xa0:      /* NBSP */  
         case 0x1680:    /* OGHAM SPACE MARK */  
         case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */  
         case 0x2000:    /* EN QUAD */  
         case 0x2001:    /* EM QUAD */  
         case 0x2002:    /* EN SPACE */  
         case 0x2003:    /* EM SPACE */  
         case 0x2004:    /* THREE-PER-EM SPACE */  
         case 0x2005:    /* FOUR-PER-EM SPACE */  
         case 0x2006:    /* SIX-PER-EM SPACE */  
         case 0x2007:    /* FIGURE SPACE */  
         case 0x2008:    /* PUNCTUATION SPACE */  
         case 0x2009:    /* THIN SPACE */  
         case 0x200A:    /* HAIR SPACE */  
         case 0x202f:    /* NARROW NO-BREAK SPACE */  
         case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */  
         case 0x3000:    /* IDEOGRAPHIC SPACE */  
2267          ADD_NEW(state_offset + 1, 0);          ADD_NEW(state_offset + 1, 0);
2268          break;          break;
2269    
2270            default:
2271            break;
2272          }          }
2273        break;        break;
2274    
2275        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2276        /* Match a negated single character. This is only used for one-byte        /* Match a negated single character casefully. */
       characters, that is, we know that d < 256. The character we are  
       checking (c) can be multibyte. */  
2277    
2278        case OP_NOT:        case OP_NOT:
2279          if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
2280          break;
2281    
2282          /*-----------------------------------------------------------------*/
2283          /* Match a negated single character caselessly. */
2284    
2285          case OP_NOTI:
2286        if (clen > 0)        if (clen > 0)
2287          {          {
2288          unsigned int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;          unsigned int otherd;
2289          if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }  #ifdef SUPPORT_UTF
2290            if (utf && d >= 128)
2291              {
2292    #ifdef SUPPORT_UCP
2293              otherd = UCD_OTHERCASE(d);
2294    #endif  /* SUPPORT_UCP */
2295              }
2296            else
2297    #endif  /* SUPPORT_UTF */
2298            otherd = TABLE_GET(d, fcc, d);
2299            if (c != d && c != otherd)
2300              { ADD_NEW(state_offset + dlen + 1, 0); }
2301          }          }
2302        break;        break;
2303    
2304        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2305          case OP_PLUSI:
2306          case OP_MINPLUSI:
2307          case OP_POSPLUSI:
2308          case OP_NOTPLUSI:
2309          case OP_NOTMINPLUSI:
2310          case OP_NOTPOSPLUSI:
2311          caseless = TRUE;
2312          codevalue -= OP_STARI - OP_STAR;
2313    
2314          /* Fall through */
2315        case OP_PLUS:        case OP_PLUS:
2316        case OP_MINPLUS:        case OP_MINPLUS:
2317        case OP_POSPLUS:        case OP_POSPLUS:
# Line 1912  for (;;) Line 2322  for (;;)
2322        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + dlen + 1, 0); }
2323        if (clen > 0)        if (clen > 0)
2324          {          {
2325          unsigned int otherd = NOTACHAR;          pcre_uint32 otherd = NOTACHAR;
2326          if ((ims & PCRE_CASELESS) != 0)          if (caseless)
2327            {            {
2328  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2329            if (utf8 && d >= 128)            if (utf && d >= 128)
2330              {              {
2331  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2332              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2333  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2334              }              }
2335            else            else
2336  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2337            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2338            }            }
2339          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2340            {            {
# Line 1941  for (;;) Line 2351  for (;;)
2351        break;        break;
2352    
2353        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2354          case OP_QUERYI:
2355          case OP_MINQUERYI:
2356          case OP_POSQUERYI:
2357          case OP_NOTQUERYI:
2358          case OP_NOTMINQUERYI:
2359          case OP_NOTPOSQUERYI:
2360          caseless = TRUE;
2361          codevalue -= OP_STARI - OP_STAR;
2362          /* Fall through */
2363        case OP_QUERY:        case OP_QUERY:
2364        case OP_MINQUERY:        case OP_MINQUERY:
2365        case OP_POSQUERY:        case OP_POSQUERY:
# Line 1950  for (;;) Line 2369  for (;;)
2369        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
2370        if (clen > 0)        if (clen > 0)
2371          {          {
2372          unsigned int otherd = NOTACHAR;          pcre_uint32 otherd = NOTACHAR;
2373          if ((ims & PCRE_CASELESS) != 0)          if (caseless)
2374            {            {
2375  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2376            if (utf8 && d >= 128)            if (utf && d >= 128)
2377              {              {
2378  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2379              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2380  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2381              }              }
2382            else            else
2383  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2384            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2385            }            }
2386          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2387            {            {
# Line 1977  for (;;) Line 2396  for (;;)
2396        break;        break;
2397    
2398        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2399          case OP_STARI:
2400          case OP_MINSTARI:
2401          case OP_POSSTARI:
2402          case OP_NOTSTARI:
2403          case OP_NOTMINSTARI:
2404          case OP_NOTPOSSTARI:
2405          caseless = TRUE;
2406          codevalue -= OP_STARI - OP_STAR;
2407          /* Fall through */
2408        case OP_STAR:        case OP_STAR:
2409        case OP_MINSTAR:        case OP_MINSTAR:
2410        case OP_POSSTAR:        case OP_POSSTAR:
# Line 1986  for (;;) Line 2414  for (;;)
2414        ADD_ACTIVE(state_offset + dlen + 1, 0);        ADD_ACTIVE(state_offset + dlen + 1, 0);
2415        if (clen > 0)        if (clen > 0)
2416          {          {
2417          unsigned int otherd = NOTACHAR;          pcre_uint32 otherd = NOTACHAR;
2418          if ((ims & PCRE_CASELESS) != 0)          if (caseless)
2419            {            {
2420  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2421            if (utf8 && d >= 128)            if (utf && d >= 128)
2422              {              {
2423  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2424              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2425  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2426              }              }
2427            else            else
2428  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2429            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2430            }            }
2431          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2432            {            {
# Line 2013  for (;;) Line 2441  for (;;)
2441        break;        break;
2442    
2443        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2444          case OP_EXACTI:
2445          case OP_NOTEXACTI:
2446          caseless = TRUE;
2447          codevalue -= OP_STARI - OP_STAR;
2448          /* Fall through */
2449        case OP_EXACT:        case OP_EXACT:
2450        case OP_NOTEXACT:        case OP_NOTEXACT:
2451        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2452        if (clen > 0)        if (clen > 0)
2453          {          {
2454          unsigned int otherd = NOTACHAR;          pcre_uint32 otherd = NOTACHAR;
2455          if ((ims & PCRE_CASELESS) != 0)          if (caseless)
2456            {            {
2457  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2458            if (utf8 && d >= 128)            if (utf && d >= 128)
2459              {              {
2460  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2461              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2462  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2463              }              }
2464            else            else
2465  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2466            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2467            }            }
2468          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2469            {            {
2470            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
2471              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2472            else            else
2473              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
2474            }            }
# Line 2043  for (;;) Line 2476  for (;;)
2476        break;        break;
2477    
2478        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2479          case OP_UPTOI:
2480          case OP_MINUPTOI:
2481          case OP_POSUPTOI:
2482          case OP_NOTUPTOI:
2483          case OP_NOTMINUPTOI:
2484          case OP_NOTPOSUPTOI:
2485          caseless = TRUE;
2486          codevalue -= OP_STARI - OP_STAR;
2487          /* Fall through */
2488        case OP_UPTO:        case OP_UPTO:
2489        case OP_MINUPTO:        case OP_MINUPTO:
2490        case OP_POSUPTO:        case OP_POSUPTO:
2491        case OP_NOTUPTO:        case OP_NOTUPTO:
2492        case OP_NOTMINUPTO:        case OP_NOTMINUPTO:
2493        case OP_NOTPOSUPTO:        case OP_NOTPOSUPTO:
2494        ADD_ACTIVE(state_offset + dlen + 3, 0);        ADD_ACTIVE(state_offset + dlen + 1 + IMM2_SIZE, 0);
2495        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2496        if (clen > 0)        if (clen > 0)
2497          {          {
2498          unsigned int otherd = NOTACHAR;          pcre_uint32 otherd = NOTACHAR;
2499          if ((ims & PCRE_CASELESS) != 0)          if (caseless)
2500            {            {
2501  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2502            if (utf8 && d >= 128)            if (utf && d >= 128)
2503              {              {
2504  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2505              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2506  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2507              }              }
2508            else            else
2509  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2510            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2511            }            }
2512          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2513            {            {
# Line 2074  for (;;) Line 2516  for (;;)
2516              active_count--;             /* Remove non-match possibility */              active_count--;             /* Remove non-match possibility */
2517              next_active_state--;              next_active_state--;
2518              }              }
2519            if (++count >= GET2(code, 1))            if (++count >= (int)GET2(code, 1))
2520              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2521            else            else
2522              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
2523            }            }
# Line 2092  for (;;) Line 2534  for (;;)
2534          {          {
2535          BOOL isinclass = FALSE;          BOOL isinclass = FALSE;
2536          int next_state_offset;          int next_state_offset;
2537          const uschar *ecode;          const pcre_uchar *ecode;
2538    
2539          /* For a simple class, there is always just a 32-byte table, and we          /* For a simple class, there is always just a 32-byte table, and we
2540          can set isinclass from it. */          can set isinclass from it. */
2541    
2542          if (codevalue != OP_XCLASS)          if (codevalue != OP_XCLASS)
2543            {            {
2544            ecode = code + 33;            ecode = code + 1 + (32 / sizeof(pcre_uchar));
2545            if (clen > 0)            if (clen > 0)
2546              {              {
2547              isinclass = (c > 255)? (codevalue == OP_NCLASS) :              isinclass = (c > 255)? (codevalue == OP_NCLASS) :
2548                ((code[1 + c/8] & (1 << (c&7))) != 0);                ((((pcre_uint8 *)(code + 1))[c/8] & (1 << (c&7))) != 0);
2549              }              }
2550            }            }
2551    
# Line 2114  for (;;) Line 2556  for (;;)
2556          else          else
2557           {           {
2558           ecode = code + GET(code, 1);           ecode = code + GET(code, 1);
2559           if (clen > 0) isinclass = _pcre_xclass(c, code + 1 + LINK_SIZE);           if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf);
2560           }           }
2561    
2562          /* At this point, isinclass is set for all kinds of class, and ecode          /* At this point, isinclass is set for all kinds of class, and ecode
2563          points to the byte after the end of the class. If there is a          points to the byte after the end of the class. If there is a
2564          quantifier, this is where it will be. */          quantifier, this is where it will be. */
2565    
2566          next_state_offset = ecode - start_code;          next_state_offset = (int)(ecode - start_code);
2567    
2568          switch (*ecode)          switch (*ecode)
2569            {            {
# Line 2147  for (;;) Line 2589  for (;;)
2589            case OP_CRRANGE:            case OP_CRRANGE:
2590            case OP_CRMINRANGE:            case OP_CRMINRANGE:
2591            count = current_state->count;  /* Already matched */            count = current_state->count;  /* Already matched */
2592            if (count >= GET2(ecode, 1))            if (count >= (int)GET2(ecode, 1))
2593              { ADD_ACTIVE(next_state_offset + 5, 0); }              { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2594            if (isinclass)            if (isinclass)
2595              {              {
2596              int max = GET2(ecode, 3);              int max = (int)GET2(ecode, 1 + IMM2_SIZE);
2597              if (++count >= max && max != 0)   /* Max 0 => no limit */              if (++count >= max && max != 0)   /* Max 0 => no limit */
2598                { ADD_NEW(next_state_offset + 5, 0); }                { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2599              else              else
2600                { ADD_NEW(state_offset, count); }                { ADD_NEW(state_offset, count); }
2601              }              }
# Line 2184  for (;;) Line 2626  for (;;)
2626          int rc;          int rc;
2627          int local_offsets[2];          int local_offsets[2];
2628          int local_workspace[1000];          int local_workspace[1000];
2629          const uschar *endasscode = code + GET(code, 1);          const pcre_uchar *endasscode = code + GET(code, 1);
2630    
2631          while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);          while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2632    
# Line 2192  for (;;) Line 2634  for (;;)
2634            md,                                   /* static match data */            md,                                   /* static match data */
2635            code,                                 /* this subexpression's code */            code,                                 /* this subexpression's code */
2636            ptr,                                  /* where we currently are */            ptr,                                  /* where we currently are */
2637            ptr - start_subject,                  /* start offset */            (int)(ptr - start_subject),           /* start offset */
2638            local_offsets,                        /* offset vector */            local_offsets,                        /* offset vector */
2639            sizeof(local_offsets)/sizeof(int),    /* size of same */            sizeof(local_offsets)/sizeof(int),    /* size of same */
2640            local_workspace,                      /* workspace vector */            local_workspace,                      /* workspace vector */
2641            sizeof(local_workspace)/sizeof(int),  /* size of same */            sizeof(local_workspace)/sizeof(int),  /* size of same */
2642            ims,                                  /* the current ims flags */            rlevel);                              /* function recursion level */
           rlevel,                               /* function recursion level */  
           recursing);                           /* pass on regex recursion */  
2643    
2644            if (rc == PCRE_ERROR_DFA_UITEM) return rc;
2645          if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))          if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
2646              { ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); }              { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
2647          }          }
2648        break;        break;
2649    
# Line 2222  for (;;) Line 2663  for (;;)
2663          if (code[LINK_SIZE+1] == OP_CALLOUT)          if (code[LINK_SIZE+1] == OP_CALLOUT)
2664            {            {
2665            rrc = 0;            rrc = 0;
2666            if (pcre_callout != NULL)            if (PUBL(callout) != NULL)
2667              {              {
2668              pcre_callout_block cb;              PUBL(callout_block) cb;
2669              cb.version          = 1;   /* Version 1 of the callout block */              cb.version          = 1;   /* Version 1 of the callout block */
2670              cb.callout_number   = code[LINK_SIZE+2];              cb.callout_number   = code[LINK_SIZE+2];
2671              cb.offset_vector    = offsets;              cb.offset_vector    = offsets;
2672    #if defined COMPILE_PCRE8
2673              cb.subject          = (PCRE_SPTR)start_subject;              cb.subject          = (PCRE_SPTR)start_subject;
2674              cb.subject_length   = end_subject - start_subject;  #elif defined COMPILE_PCRE16
2675              cb.start_match      = current_subject - start_subject;              cb.subject          = (PCRE_SPTR16)start_subject;
2676              cb.current_position = ptr - start_subject;  #elif defined COMPILE_PCRE32
2677                cb.subject          = (PCRE_SPTR32)start_subject;
2678    #endif
2679                cb.subject_length   = (int)(end_subject - start_subject);
2680                cb.start_match      = (int)(current_subject - start_subject);
2681                cb.current_position = (int)(ptr - start_subject);
2682              cb.pattern_position = GET(code, LINK_SIZE + 3);              cb.pattern_position = GET(code, LINK_SIZE + 3);
2683              cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);              cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);
2684              cb.capture_top      = 1;              cb.capture_top      = 1;
2685              cb.capture_last     = -1;              cb.capture_last     = -1;
2686              cb.callout_data     = md->callout_data;              cb.callout_data     = md->callout_data;
2687              if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */              cb.mark             = NULL;   /* No (*MARK) support */
2688                if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;   /* Abandon */
2689              }              }
2690            if (rrc > 0) break;                      /* Fail this thread */            if (rrc > 0) break;                      /* Fail this thread */
2691            code += _pcre_OP_lengths[OP_CALLOUT];    /* Skip callout data */            code += PRIV(OP_lengths)[OP_CALLOUT];    /* Skip callout data */
2692            }            }
2693    
2694          condcode = code[LINK_SIZE+1];          condcode = code[LINK_SIZE+1];
2695    
2696          /* Back reference conditions are not supported */          /* Back reference conditions and duplicate named recursion conditions
2697            are not supported */
2698    
2699          if (condcode == OP_CREF) return PCRE_ERROR_DFA_UCOND;          if (condcode == OP_CREF || condcode == OP_DNCREF ||
2700                condcode == OP_DNRREF)
2701              return PCRE_ERROR_DFA_UCOND;
2702    
2703          /* The DEFINE condition is always false */          /* The DEFINE condition is always false */
2704    
# Line 2260  for (;;) Line 2711  for (;;)
2711    
2712          else if (condcode == OP_RREF)          else if (condcode == OP_RREF)
2713            {            {
2714            int value = GET2(code, LINK_SIZE+2);            int value = GET2(code, LINK_SIZE + 2);
2715            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
2716            if (recursing > 0)            if (md->recursive != NULL)
2717              { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }              { ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); }
2718            else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }            else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2719            }            }
2720    
# Line 2272  for (;;) Line 2723  for (;;)
2723          else          else
2724            {            {
2725            int rc;            int rc;
2726            const uschar *asscode = code + LINK_SIZE + 1;            const pcre_uchar *asscode = code + LINK_SIZE + 1;
2727            const uschar *endasscode = asscode + GET(asscode, 1);            const pcre_uchar *endasscode = asscode + GET(asscode, 1);
2728    
2729            while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);            while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2730    
# Line 2281  for (;;) Line 2732  for (;;)
2732              md,                                   /* fixed match data */              md,                                   /* fixed match data */
2733              asscode,                              /* this subexpression's code */              asscode,                              /* this subexpression's code */
2734              ptr,                                  /* where we currently are */              ptr,                                  /* where we currently are */
2735              ptr - start_subject,                  /* start offset */              (int)(ptr - start_subject),           /* start offset */
2736              local_offsets,                        /* offset vector */              local_offsets,                        /* offset vector */
2737              sizeof(local_offsets)/sizeof(int),    /* size of same */              sizeof(local_offsets)/sizeof(int),    /* size of same */
2738              local_workspace,                      /* workspace vector */              local_workspace,                      /* workspace vector */
2739              sizeof(local_workspace)/sizeof(int),  /* size of same */              sizeof(local_workspace)/sizeof(int),  /* size of same */
2740              ims,                                  /* the current ims flags */              rlevel);                              /* function recursion level */
             rlevel,                               /* function recursion level */  
             recursing);                           /* pass on regex recursion */  
2741    
2742              if (rc == PCRE_ERROR_DFA_UITEM) return rc;
2743            if ((rc >= 0) ==            if ((rc >= 0) ==
2744                  (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))                  (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
2745              { ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); }              { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
2746            else            else
2747              { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }              { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2748            }            }
# Line 2302  for (;;) Line 2752  for (;;)
2752        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2753        case OP_RECURSE:        case OP_RECURSE:
2754          {          {
2755            dfa_recursion_info *ri;
2756          int local_offsets[1000];          int local_offsets[1000];
2757          int local_workspace[1000];          int local_workspace[1000];
2758            const pcre_uchar *callpat = start_code + GET(code, 1);
2759            int recno = (callpat == md->start_code)? 0 :
2760              GET2(callpat, 1 + LINK_SIZE);
2761          int rc;          int rc;
2762    
2763          DPRINTF(("%.*sStarting regex recursion %d\n", rlevel*2-2, SP,          DPRINTF(("%.*sStarting regex recursion\n", rlevel*2-2, SP));
2764            recursing + 1));  
2765            /* Check for repeating a recursion without advancing the subject
2766            pointer. This should catch convoluted mutual recursions. (Some simple
2767            cases are caught at compile time.) */
2768    
2769            for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
2770              if (recno == ri->group_num && ptr == ri->subject_position)
2771                return PCRE_ERROR_RECURSELOOP;
2772    
2773            /* Remember this recursion and where we started it so as to
2774            catch infinite loops. */
2775    
2776            new_recursive.group_num = recno;
2777            new_recursive.subject_position = ptr;
2778            new_recursive.prevrec = md->recursive;
2779            md->recursive = &new_recursive;
2780    
2781          rc = internal_dfa_exec(          rc = internal_dfa_exec(
2782            md,                                   /* fixed match data */            md,                                   /* fixed match data */
2783            start_code + GET(code, 1),            /* this subexpression's code */            callpat,                              /* this subexpression's code */
2784            ptr,                                  /* where we currently are */            ptr,                                  /* where we currently are */
2785            ptr - start_subject,                  /* start offset */            (int)(ptr - start_subject),           /* start offset */
2786            local_offsets,                        /* offset vector */            local_offsets,                        /* offset vector */
2787            sizeof(local_offsets)/sizeof(int),    /* size of same */            sizeof(local_offsets)/sizeof(int),    /* size of same */
2788            local_workspace,                      /* workspace vector */            local_workspace,                      /* workspace vector */
2789            sizeof(local_workspace)/sizeof(int),  /* size of same */            sizeof(local_workspace)/sizeof(int),  /* size of same */
2790            ims,                                  /* the current ims flags */            rlevel);                              /* function recursion level */
           rlevel,                               /* function recursion level */  
           recursing + 1);                       /* regex recurse level */  
2791    
2792          DPRINTF(("%.*sReturn from regex recursion %d: rc=%d\n", rlevel*2-2, SP,          md->recursive = new_recursive.prevrec;  /* Done this recursion */
2793            recursing + 1, rc));  
2794            DPRINTF(("%.*sReturn from regex recursion: rc=%d\n", rlevel*2-2, SP,
2795              rc));
2796    
2797          /* Ran out of internal offsets */          /* Ran out of internal offsets */
2798    
# Line 2337  for (;;) Line 2806  for (;;)
2806            {            {
2807            for (rc = rc*2 - 2; rc >= 0; rc -= 2)            for (rc = rc*2 - 2; rc >= 0; rc -= 2)
2808              {              {
             const uschar *p = start_subject + local_offsets[rc];  
             const uschar *pp = start_subject + local_offsets[rc+1];  
2809              int charcount = local_offsets[rc+1] - local_offsets[rc];              int charcount = local_offsets[rc+1] - local_offsets[rc];
2810              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2811                if (utf)
2812                  {
2813                  const pcre_uchar *p = start_subject + local_offsets[rc];
2814                  const pcre_uchar *pp = start_subject + local_offsets[rc+1];
2815                  while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2816                  }
2817    #endif
2818              if (charcount > 0)              if (charcount > 0)
2819                {                {
2820                ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));                ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));
# Line 2356  for (;;) Line 2830  for (;;)
2830        break;        break;
2831    
2832        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2833          case OP_BRAPOS:
2834          case OP_SBRAPOS:
2835          case OP_CBRAPOS:
2836          case OP_SCBRAPOS:
2837          case OP_BRAPOSZERO:
2838            {
2839            int charcount, matched_count;
2840            const pcre_uchar *local_ptr = ptr;
2841            BOOL allow_zero;
2842    
2843            if (codevalue == OP_BRAPOSZERO)
2844              {
2845              allow_zero = TRUE;
2846              codevalue = *(++code);  /* Codevalue will be one of above BRAs */
2847              }
2848            else allow_zero = FALSE;
2849    
2850            /* Loop to match the subpattern as many times as possible as if it were
2851            a complete pattern. */
2852    
2853            for (matched_count = 0;; matched_count++)
2854              {
2855              int local_offsets[2];
2856              int local_workspace[1000];
2857    
2858              int rc = internal_dfa_exec(
2859                md,                                   /* fixed match data */
2860                code,                                 /* this subexpression's code */
2861                local_ptr,                            /* where we currently are */
2862                (int)(ptr - start_subject),           /* start offset */
2863                local_offsets,                        /* offset vector */
2864                sizeof(local_offsets)/sizeof(int),    /* size of same */
2865                local_workspace,                      /* workspace vector */
2866                sizeof(local_workspace)/sizeof(int),  /* size of same */
2867                rlevel);                              /* function recursion level */
2868    
2869              /* Failed to match */
2870    
2871              if (rc < 0)
2872                {
2873                if (rc != PCRE_ERROR_NOMATCH) return rc;
2874                break;
2875                }
2876    
2877              /* Matched: break the loop if zero characters matched. */
2878    
2879              charcount = local_offsets[1] - local_offsets[0];
2880              if (charcount == 0) break;
2881              local_ptr += charcount;    /* Advance temporary position ptr */
2882              }
2883    
2884            /* At this point we have matched the subpattern matched_count
2885            times, and local_ptr is pointing to the character after the end of the
2886            last match. */
2887    
2888            if (matched_count > 0 || allow_zero)
2889              {
2890              const pcre_uchar *end_subpattern = code;
2891              int next_state_offset;
2892    
2893              do { end_subpattern += GET(end_subpattern, 1); }
2894                while (*end_subpattern == OP_ALT);
2895              next_state_offset =
2896                (int)(end_subpattern - start_code + LINK_SIZE + 1);
2897    
2898              /* Optimization: if there are no more active states, and there
2899              are no new states yet set up, then skip over the subject string
2900              right here, to save looping. Otherwise, set up the new state to swing
2901              into action when the end of the matched substring is reached. */
2902    
2903              if (i + 1 >= active_count && new_count == 0)
2904                {
2905                ptr = local_ptr;
2906                clen = 0;
2907                ADD_NEW(next_state_offset, 0);
2908                }
2909              else
2910                {
2911                const pcre_uchar *p = ptr;
2912                const pcre_uchar *pp = local_ptr;
2913                charcount = (int)(pp - p);
2914    #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2915                if (utf) while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2916    #endif
2917                ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2918                }
2919              }
2920            }
2921          break;
2922    
2923          /*-----------------------------------------------------------------*/
2924        case OP_ONCE:        case OP_ONCE:
2925          case OP_ONCE_NC:
2926          {          {
2927          int local_offsets[2];          int local_offsets[2];
2928          int local_workspace[1000];          int local_workspace[1000];
# Line 2365  for (;;) Line 2931  for (;;)
2931            md,                                   /* fixed match data */            md,                                   /* fixed match data */
2932            code,                                 /* this subexpression's code */            code,                                 /* this subexpression's code */
2933            ptr,                                  /* where we currently are */            ptr,                                  /* where we currently are */
2934            ptr - start_subject,                  /* start offset */            (int)(ptr - start_subject),           /* start offset */
2935            local_offsets,                        /* offset vector */            local_offsets,                        /* offset vector */
2936            sizeof(local_offsets)/sizeof(int),    /* size of same */            sizeof(local_offsets)/sizeof(int),    /* size of same */
2937            local_workspace,                      /* workspace vector */            local_workspace,                      /* workspace vector */
2938            sizeof(local_workspace)/sizeof(int),  /* size of same */            sizeof(local_workspace)/sizeof(int),  /* size of same */
2939            ims,                                  /* the current ims flags */            rlevel);                              /* function recursion level */
           rlevel,                               /* function recursion level */  
           recursing);                           /* pass on regex recursion */  
2940    
2941          if (rc >= 0)          if (rc >= 0)
2942            {            {
2943            const uschar *end_subpattern = code;            const pcre_uchar *end_subpattern = code;
2944            int charcount = local_offsets[1] - local_offsets[0];            int charcount = local_offsets[1] - local_offsets[0];
2945            int next_state_offset, repeat_state_offset;            int next_state_offset, repeat_state_offset;
2946    
2947            do { end_subpattern += GET(end_subpattern, 1); }            do { end_subpattern += GET(end_subpattern, 1); }
2948              while (*end_subpattern == OP_ALT);              while (*end_subpattern == OP_ALT);
2949            next_state_offset = end_subpattern - start_code + LINK_SIZE + 1;            next_state_offset =
2950                (int)(end_subpattern - start_code + LINK_SIZE + 1);
2951    
2952            /* If the end of this subpattern is KETRMAX or KETRMIN, we must            /* If the end of this subpattern is KETRMAX or KETRMIN, we must
2953            arrange for the repeat state also to be added to the relevant list.            arrange for the repeat state also to be added to the relevant list.
# Line 2390  for (;;) Line 2955  for (;;)
2955    
2956            repeat_state_offset = (*end_subpattern == OP_KETRMAX ||            repeat_state_offset = (*end_subpattern == OP_KETRMAX ||
2957                                   *end_subpattern == OP_KETRMIN)?                                   *end_subpattern == OP_KETRMIN)?
2958              end_subpattern - start_code - GET(end_subpattern, 1) : -1;              (int)(end_subpattern - start_code - GET(end_subpattern, 1)) : -1;
2959    
2960            /* If we have matched an empty string, add the next state at the            /* If we have matched an empty string, add the next state at the
2961            current character pointer. This is important so that the duplicate            current character pointer. This is important so that the duplicate
# Line 2405  for (;;) Line 2970  for (;;)
2970            /* Optimization: if there are no more active states, and there            /* Optimization: if there are no more active states, and there
2971            are no new states yet set up, then skip over the subject string            are no new states yet set up, then skip over the subject string
2972            right here, to save looping. Otherwise, set up the new state to swing            right here, to save looping. Otherwise, set up the new state to swing
2973            into action when the end of the substring is reached. */            into action when the end of the matched substring is reached. */
2974    
2975            else if (i + 1 >= active_count && new_count == 0)            else if (i + 1 >= active_count && new_count == 0)
2976              {              {
# Line 2428  for (;;) Line 2993  for (;;)
2993              }              }
2994            else            else
2995              {              {
2996              const uschar *p = start_subject + local_offsets[0];  #if defined SUPPORT_UTF && !defined COMPILE_PCRE32
2997              const uschar *pp = start_subject + local_offsets[1];              if (utf)
2998              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;                {
2999                  const pcre_uchar *p = start_subject + local_offsets[0];
3000                  const pcre_uchar *pp = start_subject + local_offsets[1];
3001                  while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
3002                  }
3003    #endif
3004              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
3005              if (repeat_state_offset >= 0)              if (repeat_state_offset >= 0)
3006                { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }                { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
3007              }              }
   
3008            }            }
3009          else if (rc != PCRE_ERROR_NOMATCH) return rc;          else if (rc != PCRE_ERROR_NOMATCH) return rc;
3010          }          }
# Line 2447  for (;;) Line 3016  for (;;)
3016    
3017        case OP_CALLOUT:        case OP_CALLOUT:
3018        rrc = 0;        rrc = 0;
3019        if (pcre_callout != NULL)        if (PUBL(callout) != NULL)
3020          {          {
3021          pcre_callout_block cb;          PUBL(callout_block) cb;
3022          cb.version          = 1;   /* Version 1 of the callout block */          cb.version          = 1;   /* Version 1 of the callout block */
3023          cb.callout_number   = code[1];          cb.callout_number   = code[1];
3024          cb.offset_vector    = offsets;          cb.offset_vector    = offsets;
3025    #if defined COMPILE_PCRE8
3026          cb.subject          = (PCRE_SPTR)start_subject;          cb.subject          = (PCRE_SPTR)start_subject;
3027          cb.subject_length   = end_subject - start_subject;  #elif defined COMPILE_PCRE16
3028          cb.start_match      = current_subject - start_subject;          cb.subject          = (PCRE_SPTR16)start_subject;
3029          cb.current_position = ptr - start_subject;  #elif defined COMPILE_PCRE32
3030            cb.subject          = (PCRE_SPTR32)start_subject;
3031    #endif
3032            cb.subject_length   = (int)(end_subject - start_subject);
3033            cb.start_match      = (int)(current_subject - start_subject);
3034            cb.current_position = (int)(ptr - start_subject);
3035          cb.pattern_position = GET(code, 2);          cb.pattern_position = GET(code, 2);
3036          cb.next_item_length = GET(code, 2 + LINK_SIZE);          cb.next_item_length = GET(code, 2 + LINK_SIZE);
3037          cb.capture_top      = 1;          cb.capture_top      = 1;
3038          cb.capture_last     = -1;          cb.capture_last     = -1;
3039          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
3040          if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */          cb.mark             = NULL;   /* No (*MARK) support */
3041            if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;   /* Abandon */
3042          }          }
3043        if (rrc == 0)        if (rrc == 0)
3044          { ADD_ACTIVE(state_offset + _pcre_OP_lengths[OP_CALLOUT], 0); }          { ADD_ACTIVE(state_offset + PRIV(OP_lengths)[OP_CALLOUT], 0); }
3045        break;        break;
3046    
3047    
# Line 2481  for (;;) Line 3057  for (;;)
3057    /* We have finished the processing at the current subject character. If no    /* We have finished the processing at the current subject character. If no
3058    new states have been set for the next character, we have found all the    new states have been set for the next character, we have found all the
3059    matches that we are going to find. If we are at the top level and partial    matches that we are going to find. If we are at the top level and partial
3060    matching has been requested, check for appropriate conditions. The "forced_    matching has been requested, check for appropriate conditions.
3061    fail" variable counts the number of (*F) encountered for the character. If it  
3062    is equal to the original active_count (saved in workspace[1]) it means that    The "forced_ fail" variable counts the number of (*F) encountered for the
3063    (*F) was found on every active state. In this case we don't want to give a    character. If it is equal to the original active_count (saved in
3064    partial match. */    workspace[1]) it means that (*F) was found on every active state. In this
3065      case we don't want to give a partial match.
3066    
3067      The "could_continue" variable is true if a state could have continued but
3068      for the fact that the end of the subject was reached. */
3069    
3070    if (new_count <= 0)    if (new_count <= 0)
3071      {      {
3072      if (rlevel == 1 &&                               /* Top level, and */      if (rlevel == 1 &&                               /* Top level, and */
3073          reached_end != workspace[1] &&               /* Not all reached end */          could_continue &&                            /* Some could go on, and */
3074          forced_fail != workspace[1] &&               /* Not all forced fail & */          forced_fail != workspace[1] &&               /* Not all forced fail & */
3075          (                                            /* either... */          (                                            /* either... */
3076          (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */          (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */
# Line 2498  for (;;) Line 3078  for (;;)
3078          ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */          ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */
3079           match_count < 0)                            /* no matches */           match_count < 0)                            /* no matches */
3080          ) &&                                         /* And... */          ) &&                                         /* And... */
3081          ptr >= end_subject &&                     /* Reached end of subject */          (
3082          ptr > current_subject)                    /* Matched non-empty string */          partial_newline ||                           /* Either partial NL */
3083        {            (                                          /* or ... */
3084        if (offsetcount >= 2)            ptr >= end_subject &&                /* End of subject and */
3085          {            ptr > md->start_used_ptr)            /* Inspected non-empty string */
3086          offsets[0] = current_subject - start_subject;            )
3087          offsets[1] = end_subject - start_subject;          )
         }  
3088        match_count = PCRE_ERROR_PARTIAL;        match_count = PCRE_ERROR_PARTIAL;
       }  
   
3089      DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"      DPRINTF(("%.*sEnd of internal_dfa_exec %d: returning %d\n"
3090        "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,        "%.*s---------------------\n\n", rlevel*2-2, SP, rlevel, match_count,
3091        rlevel*2-2, SP));        rlevel*2-2, SP));
# Line 2558  Returns:          > 0 => number of match Line 3135  Returns:          > 0 => number of match
3135                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
3136  */  */
3137    
3138    #if defined COMPILE_PCRE8
3139  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3140  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
3141    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
3142    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
3143    #elif defined COMPILE_PCRE16
3144    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3145    pcre16_dfa_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
3146      PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
3147      int offsetcount, int *workspace, int wscount)
3148    #elif defined COMPILE_PCRE32
3149    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3150    pcre32_dfa_exec(const pcre32 *argument_re, const pcre32_extra *extra_data,
3151      PCRE_SPTR32 subject, int length, int start_offset, int options, int *offsets,
3152      int offsetcount, int *workspace, int wscount)
3153    #endif
3154  {  {
3155  real_pcre *re = (real_pcre *)argument_re;  REAL_PCRE *re = (REAL_PCRE *)argument_re;
3156  dfa_match_data match_block;  dfa_match_data match_block;
3157  dfa_match_data *md = &match_block;  dfa_match_data *md = &match_block;
3158  BOOL utf8, anchored, startline, firstline;  BOOL utf, anchored, startline, firstline;
3159  const uschar *current_subject, *end_subject, *lcc;  const pcre_uchar *current_subject, *end_subject;
   
 pcre_study_data internal_study;  
3160  const pcre_study_data *study = NULL;  const pcre_study_data *study = NULL;
 real_pcre internal_re;  
3161    
3162  const uschar *req_byte_ptr;  const pcre_uchar *req_char_ptr;
3163  const uschar *start_bits = NULL;  const pcre_uint8 *start_bits = NULL;
3164  BOOL first_byte_caseless = FALSE;  BOOL has_first_char = FALSE;
3165  BOOL req_byte_caseless = FALSE;  BOOL has_req_char = FALSE;
3166  int first_byte = -1;  pcre_uchar first_char = 0;
3167  int req_byte = -1;  pcre_uchar first_char2 = 0;
3168  int req_byte2 = -1;  pcre_uchar req_char = 0;
3169    pcre_uchar req_char2 = 0;
3170  int newline;  int newline;
3171    
3172  /* Plausibility checks */  /* Plausibility checks */
# Line 2589  if (re == NULL || subject == NULL || wor Line 3176  if (re == NULL || subject == NULL || wor
3176     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
3177  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
3178  if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;  if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
3179    if (length < 0) return PCRE_ERROR_BADLENGTH;
3180    if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
3181    
3182  /* We need to find the pointer to any study data before we test for byte  /* Check that the first field in the block is the magic number. If it is not,
3183  flipping, so we scan the extra_data block first. This may set two fields in the  return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
3184  match block, so we must initialize them beforehand. However, the other fields  REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
3185  in the match block must not be set until after the byte flipping. */  means that the pattern is likely compiled with different endianness. */
3186    
3187    if (re->magic_number != MAGIC_NUMBER)
3188      return re->magic_number == REVERSED_MAGIC_NUMBER?
3189        PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
3190    if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
3191    
3192    /* If restarting after a partial match, do some sanity checks on the contents
3193    of the workspace. */
3194    
3195    if ((options & PCRE_DFA_RESTART) != 0)
3196      {
3197      if ((workspace[0] & (-2)) != 0 || workspace[1] < 1 ||
3198        workspace[1] > (wscount - 2)/INTS_PER_STATEBLOCK)
3199          return PCRE_ERROR_DFA_BADRESTART;
3200      }
3201    
3202    /* Set up study, callout, and table data */
3203    
3204  md->tables = re->tables;  md->tables = re->tables;
3205  md->callout_data = NULL;  md->callout_data = NULL;
# Line 2612  if (extra_data != NULL) Line 3218  if (extra_data != NULL)
3218      md->tables = extra_data->tables;      md->tables = extra_data->tables;
3219    }    }
3220    
 /* Check that the first field in the block is the magic number. If it is not,  
 test for a regex that was compiled on a host of opposite endianness. If this is  
 the case, flipped values are put in internal_re and internal_study if there was  
 study data too. */  
   
 if (re->magic_number != MAGIC_NUMBER)  
   {  
   re = _pcre_try_flipped(re, &internal_re, study, &internal_study);  
   if (re == NULL) return PCRE_ERROR_BADMAGIC;  
   if (study != NULL) study = &internal_study;  
   }  
   
3221  /* Set some local values */  /* Set some local values */
3222    
3223  current_subject = (const unsigned char *)subject + start_offset;  current_subject = (const pcre_uchar *)subject + start_offset;
3224  end_subject = (const unsigned char *)subject + length;  end_subject = (const pcre_uchar *)subject + length;
3225  req_byte_ptr = current_subject - 1;  req_char_ptr = current_subject - 1;
3226    
3227  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3228  utf8 = (re->options & PCRE_UTF8) != 0;  /* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
3229    utf = (re->options & PCRE_UTF8) != 0;
3230  #else  #else
3231  utf8 = FALSE;  utf = FALSE;
3232  #endif  #endif
3233    
3234  anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||  anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
# Line 2641  anchored = (options & (PCRE_ANCHORED|PCR Line 3236  anchored = (options & (PCRE_ANCHORED|PCR
3236    
3237  /* The remaining fixed data for passing around. */  /* The remaining fixed data for passing around. */
3238    
3239  md->start_code = (const uschar *)argument_re +  md->start_code = (const pcre_uchar *)argument_re +
3240      re->name_table_offset + re->name_count * re->name_entry_size;      re->name_table_offset + re->name_count * re->name_entry_size;
3241  md->start_subject = (const unsigned char *)subject;  md->start_subject = (const pcre_uchar *)subject;
3242  md->end_subject = end_subject;  md->end_subject = end_subject;
3243    md->start_offset = start_offset;
3244  md->moptions = options;  md->moptions = options;
3245  md->poptions = re->options;  md->poptions = re->options;
3246    
# Line 2703  else Line 3299  else
3299  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
3300  back the character offset. */  back the character offset. */
3301    
3302  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3303  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
3304    {    {
3305    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)    int erroroffset;
3306      return PCRE_ERROR_BADUTF8;    int errorcode = PRIV(valid_utf)((pcre_uchar *)subject, length, &erroroffset);
3307    if (start_offset > 0 && start_offset < length)    if (errorcode != 0)
3308      {      {
3309      int tb = ((uschar *)subject)[start_offset];      if (offsetcount >= 2)
     if (tb > 127)  
3310        {        {
3311        tb &= 0xc0;        offsets[0] = erroroffset;
3312        if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;        offsets[1] = errorcode;
3313        }        }
3314    #if defined COMPILE_PCRE8
3315        return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0) ?
3316          PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
3317    #elif defined COMPILE_PCRE16
3318        return (errorcode <= PCRE_UTF16_ERR1 && (options & PCRE_PARTIAL_HARD) != 0) ?
3319          PCRE_ERROR_SHORTUTF16 : PCRE_ERROR_BADUTF16;
3320    #elif defined COMPILE_PCRE32
3321        return PCRE_ERROR_BADUTF32;
3322    #endif
3323      }      }
3324    #if defined COMPILE_PCRE8 || defined COMPILE_PCRE16
3325      if (start_offset > 0 && start_offset < length &&
3326            NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
3327        return PCRE_ERROR_BADUTF8_OFFSET;
3328    #endif
3329    }    }
3330  #endif  #endif
3331    
# Line 2724  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 3333  if (utf8 && (options & PCRE_NO_UTF8_CHEC
3333  is a feature that makes it possible to save compiled regex and re-use them  is a feature that makes it possible to save compiled regex and re-use them
3334  in other programs later. */  in other programs later. */
3335    
3336  if (md->tables == NULL) md->tables = _pcre_default_tables;  if (md->tables == NULL) md->tables = PRIV(default_tables);
3337    
3338  /* The lower casing table and the "must be at the start of a line" flag are  /* The "must be at the start of a line" flags are used in a loop when finding
3339  used in a loop when finding where to start. */  where to start. */
3340    
 lcc = md->tables + lcc_offset;  
3341  startline = (re->flags & PCRE_STARTLINE) != 0;  startline = (re->flags & PCRE_STARTLINE) != 0;
3342  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
3343    
# Line 2743  if (!anchored) Line 3351  if (!anchored)
3351    {    {
3352    if ((re->flags & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
3353      {      {
3354      first_byte = re->first_byte & 255;      has_first_char = TRUE;
3355      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      first_char = first_char2 = (pcre_uchar)(re->first_char);
3356        first_byte = lcc[first_byte];      if ((re->flags & PCRE_FCH_CASELESS) != 0)
3357          {
3358          first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);
3359    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3360          if (utf && first_char > 127)
3361            first_char2 = UCD_OTHERCASE(first_char);
3362    #endif
3363          }
3364      }      }
3365    else    else
3366      {      {
3367      if (startline && study != NULL &&      if (!startline && study != NULL &&
3368           (study->options & PCRE_STUDY_MAPPED) != 0)           (study->flags & PCRE_STUDY_MAPPED) != 0)
3369        start_bits = study->start_bits;        start_bits = study->start_bits;
3370      }      }
3371    }    }
# Line 2760  character" set. */ Line 3375  character" set. */
3375    
3376  if ((re->flags & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
3377    {    {
3378    req_byte = re->req_byte & 255;    has_req_char = TRUE;
3379    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_char = req_char2 = (pcre_uchar)(re->req_char);
3380    req_byte2 = (md->tables + fcc_offset)[req_byte];  /* case flipped */    if ((re->flags & PCRE_RCH_CASELESS) != 0)
3381        {
3382        req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);
3383    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3384        if (utf && req_char > 127)
3385          req_char2 = UCD_OTHERCASE(req_char);
3386    #endif
3387        }
3388    }    }
3389    
3390  /* Call the main matching function, looping for a non-anchored regex after a  /* Call the main matching function, looping for a non-anchored regex after a
# Line 2775  for (;;) Line 3397  for (;;)
3397    
3398    if ((options & PCRE_DFA_RESTART) == 0)    if ((options & PCRE_DFA_RESTART) == 0)
3399      {      {
3400      const uschar *save_end_subject = end_subject;      const pcre_uchar *save_end_subject = end_subject;
3401    
3402      /* If firstline is TRUE, the start of the match is constrained to the first      /* If firstline is TRUE, the start of the match is constrained to the first
3403      line of a multiline string. Implement this by temporarily adjusting      line of a multiline string. Implement this by temporarily adjusting
# Line 2784  for (;;) Line 3406  for (;;)
3406    
3407      if (firstline)      if (firstline)
3408        {        {
3409        USPTR t = current_subject;        PCRE_PUCHAR t = current_subject;
3410  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3411        if (utf8)        if (utf)
3412          {          {
3413          while (t < md->end_subject && !IS_NEWLINE(t))          while (t < md->end_subject && !IS_NEWLINE(t))
3414            {            {
3415            t++;            t++;
3416            while (t < end_subject && (*t & 0xc0) == 0x80) t++;            ACROSSCHAR(t < end_subject, *t, t++);
3417            }            }
3418          }          }
3419        else        else
# Line 2801  for (;;) Line 3423  for (;;)
3423        }        }
3424    
3425      /* There are some optimizations that avoid running the match if a known      /* There are some optimizations that avoid running the match if a known
3426      starting point is not found, or if a known later character is not present.      starting point is not found. However, there is an option that disables
3427      However, there is an option that disables these, for testing and for      these, for testing and for ensuring that all callouts do actually occur.
3428      ensuring that all callouts do actually occur. */      The option can be set in the regex by (*NO_START_OPT) or passed in
3429        match-time options. */
3430    
3431      if ((options & PCRE_NO_START_OPTIMIZE) == 0)      if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
3432        {        {
3433          /* Advance to a known first char. */
3434    
3435        /* Advance to a known first byte. */        if (has_first_char)
   
       if (first_byte >= 0)  
3436          {          {
3437          if (first_byte_caseless)          if (first_char != first_char2)
3438              {
3439              pcre_uchar csc;
3440            while (current_subject < end_subject &&            while (current_subject < end_subject &&
3441                   lcc[*current_subject] != first_byte)                   (csc = RAWUCHARTEST(current_subject)) != first_char && csc != first_char2)
3442              current_subject++;              current_subject++;
3443              }
3444          else          else
3445            while (current_subject < end_subject &&            while (current_subject < end_subject &&
3446                   *current_subject != first_byte)                   RAWUCHARTEST(current_subject) != first_char)
3447              current_subject++;              current_subject++;
3448          }          }
3449    
# Line 2828  for (;;) Line 3453  for (;;)
3453          {          {
3454          if (current_subject > md->start_subject + start_offset)          if (current_subject > md->start_subject + start_offset)
3455            {            {
3456  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3457            if (utf8)            if (utf)
3458              {              {
3459              while (current_subject < end_subject &&              while (current_subject < end_subject &&
3460                     !WAS_NEWLINE(current_subject))                     !WAS_NEWLINE(current_subject))
3461                {                {
3462                current_subject++;                current_subject++;
3463                while(current_subject < end_subject &&                ACROSSCHAR(current_subject < end_subject, *current_subject,
3464                      (*current_subject & 0xc0) == 0x80)                  current_subject++);
                 current_subject++;  
3465                }                }
3466              }              }
3467            else            else
# Line 2849  for (;;) Line 3473  for (;;)
3473            ANYCRLF, and we are now at a LF, advance the match position by one            ANYCRLF, and we are now at a LF, advance the match position by one
3474            more character. */            more character. */
3475    
3476            if (current_subject[-1] == CHAR_CR &&            if (RAWUCHARTEST(current_subject - 1) == CHAR_CR &&
3477                 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&                 (md->nltype == NLTYPE_ANY || md->nltype == NLTYPE_ANYCRLF) &&
3478                 current_subject < end_subject &&                 current_subject < end_subject &&
3479                 *current_subject == CHAR_NL)                 RAWUCHARTEST(current_subject) == CHAR_NL)
3480              current_subject++;              current_subject++;
3481            }            }
3482          }          }
# Line 2863  for (;;) Line 3487  for (;;)
3487          {          {
3488          while (current_subject < end_subject)          while (current_subject < end_subject)
3489            {            {
3490            register unsigned int c = *current_subject;            register pcre_uint32 c = RAWUCHARTEST(current_subject);
3491            if ((start_bits[c/8] & (1 << (c&7))) == 0) current_subject++;  #ifndef COMPILE_PCRE8
3492              else break;            if (c > 255) c = 255;
3493    #endif
3494              if ((start_bits[c/8] & (1 << (c&7))) == 0)
3495                {
3496                current_subject++;
3497    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3498                /* In non 8-bit mode, the iteration will stop for
3499                characters > 255 at the beginning or not stop at all. */
3500                if (utf)
3501                  ACROSSCHAR(current_subject < end_subject, *current_subject,
3502                    current_subject++);
3503    #endif
3504                }
3505              else break;
3506            }            }
3507          }          }
3508        }        }
# Line 2873  for (;;) Line 3510  for (;;)
3510      /* Restore fudged end_subject */      /* Restore fudged end_subject */
3511    
3512      end_subject = save_end_subject;      end_subject = save_end_subject;
     }  
3513    
3514    /* If req_byte is set, we know that that character must appear in the subject      /* The following two optimizations are disabled for partial matching or if
3515    for the match to succeed. If the first character is set, req_byte must be      disabling is explicitly requested (and of course, by the test above, this
3516    later in the subject; otherwise the test starts at the match point. This      code is not obeyed when restarting after a partial match). */
   optimization can save a huge amount of work in patterns with nested unlimited  
   repeats that aren't going to match. Writing separate code for cased/caseless  
   versions makes it go faster, as does using an autoincrement and backing off  
   on a match.  
   
   HOWEVER: when the subject string is very, very long, searching to its end can  
   take a long time, and give bad performance on quite ordinary patterns. This  
   showed up when somebody was matching /^C/ on a 32-megabyte string... so we  
   don't do this when the string is sufficiently long.  
   
   ALSO: this processing is disabled when partial matching is requested, and can  
   also be explicitly deactivated. Furthermore, we have to disable when  
   restarting after a partial match, because the required character may have  
   already been matched. */  
   
   if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&  
       req_byte >= 0 &&  
       end_subject - current_subject < REQ_BYTE_MAX &&  
       (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT|PCRE_DFA_RESTART)) == 0)  
     {  
     register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);  
3517    
3518      /* We don't need to repeat the search if we haven't yet reached the      if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 &&
3519      place we found it at last time. */          (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)
   
     if (p > req_byte_ptr)  
3520        {        {
3521        if (req_byte_caseless)        /* If the pattern was studied, a minimum subject length may be set. This
3522          {        is a lower bound; no actual string of that length may actually match the
3523          while (p < end_subject)        pattern. Although the value is, strictly, in characters, we treat it as
3524            {        bytes to avoid spending too much time in this optimization. */
3525            register int pp = *p++;  
3526            if (pp == req_byte || pp == req_byte2) { p--; break; }        if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
3527            }            (pcre_uint32)(end_subject - current_subject) < study->minlength)
3528          }          return PCRE_ERROR_NOMATCH;
3529        else  
3530          /* If req_char is set, we know that that character must appear in the
3531          subject for the match to succeed. If the first character is set, req_char
3532          must be later in the subject; otherwise the test starts at the match
3533          point. This optimization can save a huge amount of work in patterns with
3534          nested unlimited repeats that aren't going to match. Writing separate
3535          code for cased/caseless versions makes it go faster, as does using an
3536          autoincrement and backing off on a match.
3537    
3538          HOWEVER: when the subject string is very, very long, searching to its end
3539          can take a long time, and give bad performance on quite ordinary
3540          patterns. This showed up when somebody was matching /^C/ on a 32-megabyte
3541          string... so we don't do this when the string is sufficiently long. */
3542    
3543          if (has_req_char && end_subject - current_subject < REQ_BYTE_MAX)
3544          {          {
3545          while (p < end_subject)          register PCRE_PUCHAR p = current_subject + (has_first_char? 1:0);
3546    
3547            /* We don't need to repeat the search if we haven't yet reached the
3548            place we found it at last time. */
3549    
3550            if (p > req_char_ptr)
3551            {            {
3552            if (*p++ == req_byte) { p--; break; }            if (req_char != req_char2)
3553            }              {
3554          }              while (p < end_subject)
3555                  {
3556                  register pcre_uint32 pp = RAWUCHARINCTEST(p);
3557                  if (pp == req_char || pp == req_char2) { p--; break; }
3558                  }
3559                }
3560              else
3561                {
3562                while (p < end_subject)
3563                  {
3564                  if (RAWUCHARINCTEST(p) == req_char) { p--; break; }
3565                  }
3566                }
3567    
3568        /* If we can't find the required character, break the matching loop,            /* If we can't find the required character, break the matching loop,
3569        which will cause a return or PCRE_ERROR_NOMATCH. */            which will cause a return or PCRE_ERROR_NOMATCH. */
3570    
3571        if (p >= end_subject) break;            if (p >= end_subject) break;
3572    
3573        /* If we have found the required character, save the point where we            /* If we have found the required character, save the point where we
3574        found it, so that we don't search again next time round the loop if            found it, so that we don't search again next time round the loop if
3575        the start hasn't passed this character yet. */            the start hasn't passed this character yet. */
3576    
3577        req_byte_ptr = p;            req_char_ptr = p;
3578              }
3579            }
3580        }        }
3581      }      }   /* End of optimizations that are done when not restarting */
3582    
3583    /* OK, now we can do the business */    /* OK, now we can do the business */
3584    
3585      md->start_used_ptr = current_subject;
3586      md->recursive = NULL;
3587    
3588    rc = internal_dfa_exec(    rc = internal_dfa_exec(
3589      md,                                /* fixed match data */      md,                                /* fixed match data */
3590      md->start_code,                    /* this subexpression's code */      md->start_code,                    /* this subexpression's code */
# Line 2945  for (;;) Line 3594  for (;;)
3594      offsetcount,                       /* size of same */      offsetcount,                       /* size of same */
3595      workspace,                         /* workspace vector */      workspace,                         /* workspace vector */
3596      wscount,                           /* size of same */      wscount,                           /* size of same */
3597      re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL), /* ims flags */      0);                                /* function recurse level */
     0,                                 /* function recurse level */  
     0);                                /* regex recurse level */  
3598    
3599    /* Anything other than "no match" means we are done, always; otherwise, carry    /* Anything other than "no match" means we are done, always; otherwise, carry
3600    on only if not anchored. */    on only if not anchored. */
3601    
3602    if (rc != PCRE_ERROR_NOMATCH || anchored) return rc;    if (rc != PCRE_ERROR_NOMATCH || anchored)
3603        {
3604        if (rc == PCRE_ERROR_PARTIAL && offsetcount >= 2)
3605          {
3606          offsets[0] = (int)(md->start_used_ptr - (PCRE_PUCHAR)subject);
3607          offsets[1] = (int)(end_subject - (PCRE_PUCHAR)subject);
3608          if (offsetcount > 2)
3609            offsets[2] = (int)(current_subject - (PCRE_PUCHAR)subject);
3610          }
3611        return rc;
3612        }
3613    
3614    /* Advance to the next subject character unless we are at the end of a line    /* Advance to the next subject character unless we are at the end of a line
3615    and firstline is set. */    and firstline is set. */
3616    
3617    if (firstline && IS_NEWLINE(current_subject)) break;    if (firstline && IS_NEWLINE(current_subject)) break;
3618    current_subject++;    current_subject++;
3619    if (utf8)  #ifdef SUPPORT_UTF
3620      if (utf)
3621      {      {
3622      while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)      ACROSSCHAR(current_subject < end_subject, *current_subject,
3623        current_subject++;        current_subject++);
3624      }      }
3625    #endif
3626    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
3627    
3628    /* If we have just passed a CR and we are now at a LF, and the pattern does    /* If we have just passed a CR and we are now at a LF, and the pattern does
3629    not contain any explicit matches for \r or \n, and the newline option is CRLF    not contain any explicit matches for \r or \n, and the newline option is CRLF
3630    or ANY or ANYCRLF, advance the match position by one more character. */    or ANY or ANYCRLF, advance the match position by one more character. */
3631    
3632    if (current_subject[-1] == CHAR_CR &&    if (RAWUCHARTEST(current_subject - 1) == CHAR_CR &&
3633        current_subject < end_subject &&        current_subject < end_subject &&
3634        *current_subject == CHAR_NL &&        RAWUCHARTEST(current_subject) == CHAR_NL &&
3635        (re->flags & PCRE_HASCRORLF) == 0 &&        (re->flags & PCRE_HASCRORLF) == 0 &&
3636          (md->nltype == NLTYPE_ANY ||          (md->nltype == NLTYPE_ANY ||
3637           md->nltype == NLTYPE_ANYCRLF ||           md->nltype == NLTYPE_ANYCRLF ||

Legend:
Removed from v.428  
changed lines
  Added in v.1376

  ViewVC Help
Powered by ViewVC 1.1.5