/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 473 by ph10, Sat Jan 2 12:40:07 2010 UTC revision 925 by ph10, Wed Feb 22 14:24:56 2012 UTC
# Line 7  and semantics are as close as possible t Line 7  and semantics are as close as possible t
7  below for why this module is different).  below for why this module is different).
8    
9                         Written by Philip Hazel                         Written by Philip Hazel
10             Copyright (c) 1997-2010 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
11    
12  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
13  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 106  never stored, so we push them well clear Line 106  never stored, so we push them well clear
106    
107    
108  /* This table identifies those opcodes that are followed immediately by a  /* This table identifies those opcodes that are followed immediately by a
109  character that is to be tested in some way. This makes is possible to  character that is to be tested in some way. This makes it possible to
110  centralize the loading of these characters. In the case of Type * etc, the  centralize the loading of these characters. In the case of Type * etc, the
111  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
112  small value. Non-zero values in the table are the offsets from the opcode where  small value. Non-zero values in the table are the offsets from the opcode where
113  the character is to be found. ***NOTE*** If the start of this table is  the character is to be found. ***NOTE*** If the start of this table is
114  modified, the three tables that follow must also be modified. */  modified, the three tables that follow must also be modified. */
115    
116  static const uschar coptable[] = {  static const pcre_uint8 coptable[] = {
117    0,                             /* End                                    */    0,                             /* End                                    */
118    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
119    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
120    0, 0, 0,                       /* Any, AllAny, Anybyte                   */    0, 0, 0,                       /* Any, AllAny, Anybyte                   */
121    0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */    0, 0,                          /* \P, \p                                 */
122    0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */    0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
123    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */    0,                             /* \X                                     */
124      0, 0, 0, 0, 0, 0,              /* \Z, \z, ^, ^M, $, $M                   */
125    1,                             /* Char                                   */    1,                             /* Char                                   */
126    1,                             /* Charnc                                 */    1,                             /* Chari                                  */
127    1,                             /* not                                    */    1,                             /* not                                    */
128      1,                             /* noti                                   */
129    /* Positive single-char repeats                                          */    /* Positive single-char repeats                                          */
130    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
131    3, 3, 3,                       /* upto, minupto, exact                   */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto, minupto                          */
132    1, 1, 1, 3,                    /* *+, ++, ?+, upto+                      */    1+IMM2_SIZE,                   /* exact                                  */
133      1, 1, 1, 1+IMM2_SIZE,          /* *+, ++, ?+, upto+                      */
134      1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */
135      1+IMM2_SIZE, 1+IMM2_SIZE,      /* upto I, minupto I                      */
136      1+IMM2_SIZE,                   /* exact I                                */
137      1, 1, 1, 1+IMM2_SIZE,          /* *+I, ++I, ?+I, upto+I                  */
138    /* Negative single-char repeats - only for chars < 256                   */    /* Negative single-char repeats - only for chars < 256                   */
139    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
140    3, 3, 3,                       /* NOT upto, minupto, exact               */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto, minupto                      */
141    1, 1, 1, 3,                    /* NOT *+, ++, ?+, updo+                  */    1+IMM2_SIZE,                   /* NOT exact                              */
142      1, 1, 1, 1+IMM2_SIZE,          /* NOT *+, ++, ?+, upto+                  */
143      1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */
144      1+IMM2_SIZE, 1+IMM2_SIZE,      /* NOT upto I, minupto I                  */
145      1+IMM2_SIZE,                   /* NOT exact I                            */
146      1, 1, 1, 1+IMM2_SIZE,          /* NOT *+I, ++I, ?+I, upto+I              */
147    /* Positive type repeats                                                 */    /* Positive type repeats                                                 */
148    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
149    3, 3, 3,                       /* Type upto, minupto, exact              */    1+IMM2_SIZE, 1+IMM2_SIZE,      /* Type upto, minupto                     */
150    1, 1, 1, 3,                    /* Type *+, ++, ?+, upto+                 */    1+IMM2_SIZE,                   /* Type exact                             */
151      1, 1, 1, 1+IMM2_SIZE,          /* Type *+, ++, ?+, upto+                 */
152    /* Character class & ref repeats                                         */    /* Character class & ref repeats                                         */
153    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */    0, 0, 0, 0, 0, 0,              /* *, *?, +, +?, ?, ??                    */
154    0, 0,                          /* CRRANGE, CRMINRANGE                    */    0, 0,                          /* CRRANGE, CRMINRANGE                    */
# Line 143  static const uschar coptable[] = { Line 156  static const uschar coptable[] = {
156    0,                             /* NCLASS                                 */    0,                             /* NCLASS                                 */
157    0,                             /* XCLASS - variable length               */    0,                             /* XCLASS - variable length               */
158    0,                             /* REF                                    */    0,                             /* REF                                    */
159      0,                             /* REFI                                   */
160    0,                             /* RECURSE                                */    0,                             /* RECURSE                                */
161    0,                             /* CALLOUT                                */    0,                             /* CALLOUT                                */
162    0,                             /* Alt                                    */    0,                             /* Alt                                    */
163    0,                             /* Ket                                    */    0,                             /* Ket                                    */
164    0,                             /* KetRmax                                */    0,                             /* KetRmax                                */
165    0,                             /* KetRmin                                */    0,                             /* KetRmin                                */
166      0,                             /* KetRpos                                */
167      0,                             /* Reverse                                */
168    0,                             /* Assert                                 */    0,                             /* Assert                                 */
169    0,                             /* Assert not                             */    0,                             /* Assert not                             */
170    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
171    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
172    0,                             /* Reverse                                */    0, 0,                          /* ONCE, ONCE_NC                          */
173    0, 0, 0, 0,                    /* ONCE, BRA, CBRA, COND                  */    0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
174    0, 0, 0,                       /* SBRA, SCBRA, SCOND                     */    0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
175    0,                             /* CREF                                   */    0, 0,                          /* CREF, NCREF                            */
176    0,                             /* RREF                                   */    0, 0,                          /* RREF, NRREF                            */
177    0,                             /* DEF                                    */    0,                             /* DEF                                    */
178    0, 0,                          /* BRAZERO, BRAMINZERO                    */    0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
179    0, 0, 0, 0,                    /* PRUNE, SKIP, THEN, COMMIT              */    0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
180    0, 0, 0, 0                     /* FAIL, ACCEPT, CLOSE, SKIPZERO          */    0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
181      0, 0, 0, 0,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */
182      0, 0                           /* CLOSE, SKIPZERO  */
183  };  };
184    
185  /* This table identifies those opcodes that inspect a character. It is used to  /* This table identifies those opcodes that inspect a character. It is used to
# Line 169  remember the fact that a character could Line 187  remember the fact that a character could
187  the subject is reached. ***NOTE*** If the start of this table is modified, the  the subject is reached. ***NOTE*** If the start of this table is modified, the
188  two tables that follow must also be modified. */  two tables that follow must also be modified. */
189    
190  static const uschar poptable[] = {  static const pcre_uint8 poptable[] = {
191    0,                             /* End                                    */    0,                             /* End                                    */
192    0, 0, 0, 1, 1,                 /* \A, \G, \K, \B, \b                     */    0, 0, 0, 1, 1,                 /* \A, \G, \K, \B, \b                     */
193    1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */    1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */
194    1, 1, 1,                       /* Any, AllAny, Anybyte                   */    1, 1, 1,                       /* Any, AllAny, Anybyte                   */
195    1, 1, 1,                       /* NOTPROP, PROP, EXTUNI                  */    1, 1,                          /* \P, \p                                 */
196    1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */    1, 1, 1, 1, 1,                 /* \R, \H, \h, \V, \v                     */
197    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */    1,                             /* \X                                     */
198      0, 0, 0, 0, 0, 0,              /* \Z, \z, ^, ^M, $, $M                   */
199    1,                             /* Char                                   */    1,                             /* Char                                   */
200    1,                             /* Charnc                                 */    1,                             /* Chari                                  */
201    1,                             /* not                                    */    1,                             /* not                                    */
202      1,                             /* noti                                   */
203    /* Positive single-char repeats                                          */    /* Positive single-char repeats                                          */
204    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */    1, 1, 1, 1, 1, 1,              /* *, *?, +, +?, ?, ??                    */
205    1, 1, 1,                       /* upto, minupto, exact                   */    1, 1, 1,                       /* upto, minupto, exact                   */
206    1, 1, 1, 1,                    /* *+, ++, ?+, upto+                      */    1, 1, 1, 1,                    /* *+, ++, ?+, upto+                      */
207      1, 1, 1, 1, 1, 1,              /* *I, *?I, +I, +?I, ?I, ??I              */
208      1, 1, 1,                       /* upto I, minupto I, exact I             */
209      1, 1, 1, 1,                    /* *+I, ++I, ?+I, upto+I                  */
210    /* Negative single-char repeats - only for chars < 256                   */    /* Negative single-char repeats - only for chars < 256                   */
211    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */    1, 1, 1, 1, 1, 1,              /* NOT *, *?, +, +?, ?, ??                */
212    1, 1, 1,                       /* NOT upto, minupto, exact               */    1, 1, 1,                       /* NOT upto, minupto, exact               */
213    1, 1, 1, 1,                    /* NOT *+, ++, ?+, upto+                  */    1, 1, 1, 1,                    /* NOT *+, ++, ?+, upto+                  */
214      1, 1, 1, 1, 1, 1,              /* NOT *I, *?I, +I, +?I, ?I, ??I          */
215      1, 1, 1,                       /* NOT upto I, minupto I, exact I         */
216      1, 1, 1, 1,                    /* NOT *+I, ++I, ?+I, upto+I              */
217    /* Positive type repeats                                                 */    /* Positive type repeats                                                 */
218    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */    1, 1, 1, 1, 1, 1,              /* Type *, *?, +, +?, ?, ??               */
219    1, 1, 1,                       /* Type upto, minupto, exact              */    1, 1, 1,                       /* Type upto, minupto, exact              */
# Line 199  static const uschar poptable[] = { Line 225  static const uschar poptable[] = {
225    1,                             /* NCLASS                                 */    1,                             /* NCLASS                                 */
226    1,                             /* XCLASS - variable length               */    1,                             /* XCLASS - variable length               */
227    0,                             /* REF                                    */    0,                             /* REF                                    */
228      0,                             /* REFI                                   */
229    0,                             /* RECURSE                                */    0,                             /* RECURSE                                */
230    0,                             /* CALLOUT                                */    0,                             /* CALLOUT                                */
231    0,                             /* Alt                                    */    0,                             /* Alt                                    */
232    0,                             /* Ket                                    */    0,                             /* Ket                                    */
233    0,                             /* KetRmax                                */    0,                             /* KetRmax                                */
234    0,                             /* KetRmin                                */    0,                             /* KetRmin                                */
235      0,                             /* KetRpos                                */
236      0,                             /* Reverse                                */
237    0,                             /* Assert                                 */    0,                             /* Assert                                 */
238    0,                             /* Assert not                             */    0,                             /* Assert not                             */
239    0,                             /* Assert behind                          */    0,                             /* Assert behind                          */
240    0,                             /* Assert behind not                      */    0,                             /* Assert behind not                      */
241    0,                             /* Reverse                                */    0, 0,                          /* ONCE, ONCE_NC                          */
242    0, 0, 0, 0,                    /* ONCE, BRA, CBRA, COND                  */    0, 0, 0, 0, 0,                 /* BRA, BRAPOS, CBRA, CBRAPOS, COND       */
243    0, 0, 0,                       /* SBRA, SCBRA, SCOND                     */    0, 0, 0, 0, 0,                 /* SBRA, SBRAPOS, SCBRA, SCBRAPOS, SCOND  */
244    0,                             /* CREF                                   */    0, 0,                          /* CREF, NCREF                            */
245    0,                             /* RREF                                   */    0, 0,                          /* RREF, NRREF                            */
246    0,                             /* DEF                                    */    0,                             /* DEF                                    */
247    0, 0,                          /* BRAZERO, BRAMINZERO                    */    0, 0, 0,                       /* BRAZERO, BRAMINZERO, BRAPOSZERO        */
248    0, 0, 0, 0,                    /* PRUNE, SKIP, THEN, COMMIT              */    0, 0, 0,                       /* MARK, PRUNE, PRUNE_ARG                 */
249    0, 0, 0, 0                     /* FAIL, ACCEPT, CLOSE, SKIPZERO          */    0, 0, 0, 0,                    /* SKIP, SKIP_ARG, THEN, THEN_ARG         */
250      0, 0, 0, 0,                    /* COMMIT, FAIL, ACCEPT, ASSERT_ACCEPT    */
251      0, 0                           /* CLOSE, SKIPZERO                        */
252  };  };
253    
254  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
255  and \w */  and \w */
256    
257  static const uschar toptable1[] = {  static const pcre_uint8 toptable1[] = {
258    0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
259    ctype_digit, ctype_digit,    ctype_digit, ctype_digit,
260    ctype_space, ctype_space,    ctype_space, ctype_space,
# Line 231  static const uschar toptable1[] = { Line 262  static const uschar toptable1[] = {
262    0, 0                            /* OP_ANY, OP_ALLANY */    0, 0                            /* OP_ANY, OP_ALLANY */
263  };  };
264    
265  static const uschar toptable2[] = {  static const pcre_uint8 toptable2[] = {
266    0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
267    ctype_digit, 0,    ctype_digit, 0,
268    ctype_space, 0,    ctype_space, 0,
# Line 248  these structures in, is a vector of ints Line 279  these structures in, is a vector of ints
279  typedef struct stateblock {  typedef struct stateblock {
280    int offset;                     /* Offset to opcode */    int offset;                     /* Offset to opcode */
281    int count;                      /* Count for repeats */    int count;                      /* Count for repeats */
   int ims;                        /* ims flag bits */  
282    int data;                       /* Some use extra data */    int data;                       /* Some use extra data */
283  } stateblock;  } stateblock;
284    
285  #define INTS_PER_STATEBLOCK  (sizeof(stateblock)/sizeof(int))  #define INTS_PER_STATEBLOCK  (sizeof(stateblock)/sizeof(int))
286    
287    
288  #ifdef DEBUG  #ifdef PCRE_DEBUG
289  /*************************************************  /*************************************************
290  *             Print character string             *  *             Print character string             *
291  *************************************************/  *************************************************/
# Line 271  Returns:       nothing Line 301  Returns:       nothing
301  */  */
302    
303  static void  static void
304  pchars(unsigned char *p, int length, FILE *f)  pchars(const pcre_uchar *p, int length, FILE *f)
305  {  {
306  int c;  int c;
307  while (length-- > 0)  while (length-- > 0)
# Line 304  Arguments: Line 334  Arguments:
334    offsetcount       size of same    offsetcount       size of same
335    workspace         vector of workspace    workspace         vector of workspace
336    wscount           size of same    wscount           size of same
   ims               the current ims flags  
337    rlevel            function call recursion level    rlevel            function call recursion level
   recursing         regex recursive call level  
338    
339  Returns:            > 0 => number of match offset pairs placed in offsets  Returns:            > 0 => number of match offset pairs placed in offsets
340                      = 0 => offsets overflowed; longest matches are present                      = 0 => offsets overflowed; longest matches are present
# Line 321  for the current character, one for the f Line 349  for the current character, one for the f
349      { \      { \
350      next_active_state->offset = (x); \      next_active_state->offset = (x); \
351      next_active_state->count  = (y); \      next_active_state->count  = (y); \
     next_active_state->ims    = ims; \  
352      next_active_state++; \      next_active_state++; \
353      DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \      DPRINTF(("%.*sADD_ACTIVE(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
354      } \      } \
# Line 332  for the current character, one for the f Line 359  for the current character, one for the f
359      { \      { \
360      next_active_state->offset = (x); \      next_active_state->offset = (x); \
361      next_active_state->count  = (y); \      next_active_state->count  = (y); \
     next_active_state->ims    = ims; \  
362      next_active_state->data   = (z); \      next_active_state->data   = (z); \
363      next_active_state++; \      next_active_state++; \
364      DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \      DPRINTF(("%.*sADD_ACTIVE_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
# Line 344  for the current character, one for the f Line 370  for the current character, one for the f
370      { \      { \
371      next_new_state->offset = (x); \      next_new_state->offset = (x); \
372      next_new_state->count  = (y); \      next_new_state->count  = (y); \
     next_new_state->ims    = ims; \  
373      next_new_state++; \      next_new_state++; \
374      DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \      DPRINTF(("%.*sADD_NEW(%d,%d)\n", rlevel*2-2, SP, (x), (y))); \
375      } \      } \
# Line 355  for the current character, one for the f Line 380  for the current character, one for the f
380      { \      { \
381      next_new_state->offset = (x); \      next_new_state->offset = (x); \
382      next_new_state->count  = (y); \      next_new_state->count  = (y); \
     next_new_state->ims    = ims; \  
383      next_new_state->data   = (z); \      next_new_state->data   = (z); \
384      next_new_state++; \      next_new_state++; \
385      DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \      DPRINTF(("%.*sADD_NEW_DATA(%d,%d,%d)\n", rlevel*2-2, SP, (x), (y), (z))); \
# Line 367  for the current character, one for the f Line 391  for the current character, one for the f
391  static int  static int
392  internal_dfa_exec(  internal_dfa_exec(
393    dfa_match_data *md,    dfa_match_data *md,
394    const uschar *this_start_code,    const pcre_uchar *this_start_code,
395    const uschar *current_subject,    const pcre_uchar *current_subject,
396    int start_offset,    int start_offset,
397    int *offsets,    int *offsets,
398    int offsetcount,    int offsetcount,
399    int *workspace,    int *workspace,
400    int wscount,    int wscount,
401    int ims,    int  rlevel)
   int  rlevel,  
   int  recursing)  
402  {  {
403  stateblock *active_states, *new_states, *temp_states;  stateblock *active_states, *new_states, *temp_states;
404  stateblock *next_active_state, *next_new_state;  stateblock *next_active_state, *next_new_state;
405    
406  const uschar *ctypes, *lcc, *fcc;  const pcre_uint8 *ctypes, *lcc, *fcc;
407  const uschar *ptr;  const pcre_uchar *ptr;
408  const uschar *end_code, *first_op;  const pcre_uchar *end_code, *first_op;
409    
410    dfa_recursion_info new_recursive;
411    
412  int active_count, new_count, match_count;  int active_count, new_count, match_count;
413    
414  /* Some fields in the md block are frequently referenced, so we load them into  /* Some fields in the md block are frequently referenced, so we load them into
415  independent variables in the hope that this will perform better. */  independent variables in the hope that this will perform better. */
416    
417  const uschar *start_subject = md->start_subject;  const pcre_uchar *start_subject = md->start_subject;
418  const uschar *end_subject = md->end_subject;  const pcre_uchar *end_subject = md->end_subject;
419  const uschar *start_code = md->start_code;  const pcre_uchar *start_code = md->start_code;
420    
421  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
422  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;  BOOL utf = (md->poptions & PCRE_UTF8) != 0;
423  #else  #else
424  BOOL utf8 = FALSE;  BOOL utf = FALSE;
425  #endif  #endif
426    
427    BOOL reset_could_continue = FALSE;
428    
429  rlevel++;  rlevel++;
430  offsetcount &= (-2);  offsetcount &= (-2);
431    
# Line 408  wscount = (wscount - (wscount % (INTS_PE Line 434  wscount = (wscount - (wscount % (INTS_PE
434            (2 * INTS_PER_STATEBLOCK);            (2 * INTS_PER_STATEBLOCK);
435    
436  DPRINTF(("\n%.*s---------------------\n"  DPRINTF(("\n%.*s---------------------\n"
437    "%.*sCall to internal_dfa_exec f=%d r=%d\n",    "%.*sCall to internal_dfa_exec f=%d\n",
438    rlevel*2-2, SP, rlevel*2-2, SP, rlevel, recursing));    rlevel*2-2, SP, rlevel*2-2, SP, rlevel));
439    
440  ctypes = md->tables + ctypes_offset;  ctypes = md->tables + ctypes_offset;
441  lcc = md->tables + lcc_offset;  lcc = md->tables + lcc_offset;
# Line 422  next_new_state = new_states = active_sta Line 448  next_new_state = new_states = active_sta
448  new_count = 0;  new_count = 0;
449    
450  first_op = this_start_code + 1 + LINK_SIZE +  first_op = this_start_code + 1 + LINK_SIZE +
451    ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);    ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
452        *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
453        ? IMM2_SIZE:0);
454    
455  /* The first thing in any (sub) pattern is a bracket of some sort. Push all  /* The first thing in any (sub) pattern is a bracket of some sort. Push all
456  the alternative states onto the list, and find out where the end is. This  the alternative states onto the list, and find out where the end is. This
# Line 450  if (*first_op == OP_REVERSE) Line 478  if (*first_op == OP_REVERSE)
478    /* If we can't go back the amount required for the longest lookbehind    /* If we can't go back the amount required for the longest lookbehind
479    pattern, go back as far as we can; some alternatives may still be viable. */    pattern, go back as far as we can; some alternatives may still be viable. */
480    
481  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
482    /* In character mode we have to step back character by character */    /* In character mode we have to step back character by character */
483    
484    if (utf8)    if (utf)
485      {      {
486      for (gone_back = 0; gone_back < max_back; gone_back++)      for (gone_back = 0; gone_back < max_back; gone_back++)
487        {        {
488        if (current_subject <= start_subject) break;        if (current_subject <= start_subject) break;
489        current_subject--;        current_subject--;
490        while (current_subject > start_subject &&        ACROSSCHAR(current_subject > start_subject, *current_subject, current_subject--);
              (*current_subject & 0xc0) == 0x80)  
         current_subject--;  
491        }        }
492      }      }
493    else    else
# Line 471  if (*first_op == OP_REVERSE) Line 497  if (*first_op == OP_REVERSE)
497    
498      {      {
499      gone_back = (current_subject - max_back < start_subject)?      gone_back = (current_subject - max_back < start_subject)?
500        current_subject - start_subject : max_back;        (int)(current_subject - start_subject) : max_back;
501      current_subject -= gone_back;      current_subject -= gone_back;
502      }      }
503    
# Line 488  if (*first_op == OP_REVERSE) Line 514  if (*first_op == OP_REVERSE)
514      int back = GET(end_code, 2+LINK_SIZE);      int back = GET(end_code, 2+LINK_SIZE);
515      if (back <= gone_back)      if (back <= gone_back)
516        {        {
517        int bstate = end_code - start_code + 2 + 2*LINK_SIZE;        int bstate = (int)(end_code - start_code + 2 + 2*LINK_SIZE);
518        ADD_NEW_DATA(-bstate, 0, gone_back - back);        ADD_NEW_DATA(-bstate, 0, gone_back - back);
519        }        }
520      end_code += GET(end_code, 1);      end_code += GET(end_code, 1);
# Line 521  else Line 547  else
547    else    else
548      {      {
549      int length = 1 + LINK_SIZE +      int length = 1 + LINK_SIZE +
550        ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA)? 2:0);        ((*this_start_code == OP_CBRA || *this_start_code == OP_SCBRA ||
551            *this_start_code == OP_CBRAPOS || *this_start_code == OP_SCBRAPOS)
552            ? IMM2_SIZE:0);
553      do      do
554        {        {
555        ADD_NEW(end_code - start_code + length, 0);        ADD_NEW((int)(end_code - start_code + length), 0);
556        end_code += GET(end_code, 1);        end_code += GET(end_code, 1);
557        length = 1 + LINK_SIZE;        length = 1 + LINK_SIZE;
558        }        }
# Line 534  else Line 562  else
562    
563  workspace[0] = 0;    /* Bit indicating which vector is current */  workspace[0] = 0;    /* Bit indicating which vector is current */
564    
565  DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, end_code - start_code));  DPRINTF(("%.*sEnd state = %d\n", rlevel*2-2, SP, (int)(end_code - start_code)));
566    
567  /* Loop for scanning the subject */  /* Loop for scanning the subject */
568    
# Line 545  for (;;) Line 573  for (;;)
573    int clen, dlen;    int clen, dlen;
574    unsigned int c, d;    unsigned int c, d;
575    int forced_fail = 0;    int forced_fail = 0;
576    BOOL could_continue = FALSE;    BOOL partial_newline = FALSE;
577      BOOL could_continue = reset_could_continue;
578      reset_could_continue = FALSE;
579    
580    /* Make the new state list into the active state list and empty the    /* Make the new state list into the active state list and empty the
581    new state list. */    new state list. */
582    
# Line 559  for (;;) Line 589  for (;;)
589    workspace[0] ^= 1;              /* Remember for the restarting feature */    workspace[0] ^= 1;              /* Remember for the restarting feature */
590    workspace[1] = active_count;    workspace[1] = active_count;
591    
592  #ifdef DEBUG  #ifdef PCRE_DEBUG
593    printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);    printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
594    pchars((uschar *)ptr, strlen((char *)ptr), stdout);    pchars(ptr, STRLEN_UC(ptr), stdout);
595    printf("\"\n");    printf("\"\n");
596    
597    printf("%.*sActive states: ", rlevel*2-2, SP);    printf("%.*sActive states: ", rlevel*2-2, SP);
# Line 582  for (;;) Line 612  for (;;)
612    if (ptr < end_subject)    if (ptr < end_subject)
613      {      {
614      clen = 1;        /* Number of bytes in the character */      clen = 1;        /* Number of bytes in the character */
615  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
616      if (utf8) { GETCHARLEN(c, ptr, clen); } else      if (utf) { GETCHARLEN(c, ptr, clen); } else
617  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
618      c = *ptr;      c = *ptr;
619      }      }
620    else    else
# Line 601  for (;;) Line 631  for (;;)
631    for (i = 0; i < active_count; i++)    for (i = 0; i < active_count; i++)
632      {      {
633      stateblock *current_state = active_states + i;      stateblock *current_state = active_states + i;
634      const uschar *code;      BOOL caseless = FALSE;
635        const pcre_uchar *code;
636      int state_offset = current_state->offset;      int state_offset = current_state->offset;
637      int count, codevalue, rrc;      int count, codevalue, rrc;
638    
639  #ifdef DEBUG  #ifdef PCRE_DEBUG
640      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
641      if (clen == 0) printf("EOL\n");      if (clen == 0) printf("EOL\n");
642        else if (c > 32 && c < 127) printf("'%c'\n", c);        else if (c > 32 && c < 127) printf("'%c'\n", c);
643          else printf("0x%02x\n", c);          else printf("0x%02x\n", c);
644  #endif  #endif
645    
     /* This variable is referred to implicity in the ADD_xxx macros. */  
   
     ims = current_state->ims;  
   
646      /* A negative offset is a special case meaning "hold off going to this      /* A negative offset is a special case meaning "hold off going to this
647      (negated) state until the number of characters in the data field have      (negated) state until the number of characters in the data field have
648      been skipped". */      been skipped". If the could_continue flag was passed over from a previous
649        state, arrange for it to passed on. */
650    
651      if (state_offset < 0)      if (state_offset < 0)
652        {        {
# Line 627  for (;;) Line 655  for (;;)
655          DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));          DPRINTF(("%.*sSkipping this character\n", rlevel*2-2, SP));
656          ADD_NEW_DATA(state_offset, current_state->count,          ADD_NEW_DATA(state_offset, current_state->count,
657            current_state->data - 1);            current_state->data - 1);
658            if (could_continue) reset_could_continue = TRUE;
659          continue;          continue;
660          }          }
661        else        else
# Line 666  for (;;) Line 695  for (;;)
695      permitted.      permitted.
696    
697      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
698      argument that is not a data character - but is always one byte long. We      argument that is not a data character - but is always one byte long because
699      have to take special action to deal with  \P, \p, \H, \h, \V, \v and \X in      the values are small. We have to take special action to deal with  \P, \p,
700      this case. To keep the other cases fast, convert these ones to new opcodes.      \H, \h, \V, \v and \X in this case. To keep the other cases fast, convert
701      */      these ones to new opcodes. */
702    
703      if (coptable[codevalue] > 0)      if (coptable[codevalue] > 0)
704        {        {
705        dlen = 1;        dlen = 1;
706  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
707        if (utf8) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else        if (utf) { GETCHARLEN(d, (code + coptable[codevalue]), dlen); } else
708  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
709        d = code[coptable[codevalue]];        d = code[coptable[codevalue]];
710        if (codevalue >= OP_TYPESTAR)        if (codevalue >= OP_TYPESTAR)
711          {          {
# Line 706  for (;;) Line 735  for (;;)
735    
736      switch (codevalue)      switch (codevalue)
737        {        {
738    /* ========================================================================== */
739          /* These cases are never obeyed. This is a fudge that causes a compile-
740          time error if the vectors coptable or poptable, which are indexed by
741          opcode, are not the correct length. It seems to be the only way to do
742          such a check at compile time, as the sizeof() operator does not work
743          in the C preprocessor. */
744    
745          case OP_TABLE_LENGTH:
746          case OP_TABLE_LENGTH +
747            ((sizeof(coptable) == OP_TABLE_LENGTH) &&
748             (sizeof(poptable) == OP_TABLE_LENGTH)):
749          break;
750    
751  /* ========================================================================== */  /* ========================================================================== */
752        /* Reached a closing bracket. If not at the end of the pattern, carry        /* Reached a closing bracket. If not at the end of the pattern, carry
753        on with the next opcode. Otherwise, unless we have an empty string and        on with the next opcode. For repeating opcodes, also add the repeat
754          state. Note that KETRPOS will always be encountered at the end of the
755          subpattern, because the possessive subpattern repeats are always handled
756          using recursive calls. Thus, it never adds any new states.
757    
758          At the end of the (sub)pattern, unless we have an empty string and
759        PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the        PCRE_NOTEMPTY is set, or PCRE_NOTEMPTY_ATSTART is set and we are at the
760        start of the subject, save the match data, shifting up all previous        start of the subject, save the match data, shifting up all previous
761        matches so we always have the longest first. */        matches so we always have the longest first. */
# Line 717  for (;;) Line 763  for (;;)
763        case OP_KET:        case OP_KET:
764        case OP_KETRMIN:        case OP_KETRMIN:
765        case OP_KETRMAX:        case OP_KETRMAX:
766          case OP_KETRPOS:
767        if (code != end_code)        if (code != end_code)
768          {          {
769          ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);          ADD_ACTIVE(state_offset + 1 + LINK_SIZE, 0);
# Line 733  for (;;) Line 780  for (;;)
780                  current_subject > start_subject + md->start_offset)))                  current_subject > start_subject + md->start_offset)))
781            {            {
782            if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;            if (match_count < 0) match_count = (offsetcount >= 2)? 1 : 0;
783              else if (match_count > 0 && ++match_count * 2 >= offsetcount)              else if (match_count > 0 && ++match_count * 2 > offsetcount)
784                match_count = 0;                match_count = 0;
785            count = ((match_count == 0)? offsetcount : match_count * 2) - 2;            count = ((match_count == 0)? offsetcount : match_count * 2) - 2;
786            if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));            if (count > 0) memmove(offsets + 2, offsets, count * sizeof(int));
787            if (offsetcount >= 2)            if (offsetcount >= 2)
788              {              {
789              offsets[0] = current_subject - start_subject;              offsets[0] = (int)(current_subject - start_subject);
790              offsets[1] = ptr - start_subject;              offsets[1] = (int)(ptr - start_subject);
791              DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,              DPRINTF(("%.*sSet matched string = \"%.*s\"\n", rlevel*2-2, SP,
792                offsets[1] - offsets[0], current_subject));                offsets[1] - offsets[0], current_subject));
793              }              }
# Line 762  for (;;) Line 809  for (;;)
809        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
810        case OP_ALT:        case OP_ALT:
811        do { code += GET(code, 1); } while (*code == OP_ALT);        do { code += GET(code, 1); } while (*code == OP_ALT);
812        ADD_ACTIVE(code - start_code, 0);        ADD_ACTIVE((int)(code - start_code), 0);
813        break;        break;
814    
815        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
# Line 770  for (;;) Line 817  for (;;)
817        case OP_SBRA:        case OP_SBRA:
818        do        do
819          {          {
820          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);          ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
821          code += GET(code, 1);          code += GET(code, 1);
822          }          }
823        while (*code == OP_ALT);        while (*code == OP_ALT);
# Line 779  for (;;) Line 826  for (;;)
826        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
827        case OP_CBRA:        case OP_CBRA:
828        case OP_SCBRA:        case OP_SCBRA:
829        ADD_ACTIVE(code - start_code + 3 + LINK_SIZE,  0);        ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE + IMM2_SIZE),  0);
830        code += GET(code, 1);        code += GET(code, 1);
831        while (*code == OP_ALT)        while (*code == OP_ALT)
832          {          {
833          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE,  0);          ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE),  0);
834          code += GET(code, 1);          code += GET(code, 1);
835          }          }
836        break;        break;
# Line 794  for (;;) Line 841  for (;;)
841        ADD_ACTIVE(state_offset + 1, 0);        ADD_ACTIVE(state_offset + 1, 0);
842        code += 1 + GET(code, 2);        code += 1 + GET(code, 2);
843        while (*code == OP_ALT) code += GET(code, 1);        while (*code == OP_ALT) code += GET(code, 1);
844        ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);        ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
845        break;        break;
846    
847        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
848        case OP_SKIPZERO:        case OP_SKIPZERO:
849        code += 1 + GET(code, 2);        code += 1 + GET(code, 2);
850        while (*code == OP_ALT) code += GET(code, 1);        while (*code == OP_ALT) code += GET(code, 1);
851        ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);        ADD_ACTIVE((int)(code - start_code + 1 + LINK_SIZE), 0);
852        break;        break;
853    
854        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
855        case OP_CIRC:        case OP_CIRC:
856        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||        if (ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0)
           ((ims & PCRE_MULTILINE) != 0 &&  
             ptr != end_subject &&  
             WAS_NEWLINE(ptr)))  
857          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
858        break;        break;
859    
860        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
861        case OP_EOD:        case OP_CIRCM:
862        if (ptr >= end_subject) { ADD_ACTIVE(state_offset + 1, 0); }        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
863              (ptr != end_subject && WAS_NEWLINE(ptr)))
864            { ADD_ACTIVE(state_offset + 1, 0); }
865        break;        break;
866    
867        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
868        case OP_OPT:        case OP_EOD:
869        ims = code[1];        if (ptr >= end_subject)
870        ADD_ACTIVE(state_offset + 2, 0);          {
871            if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
872              could_continue = TRUE;
873            else { ADD_ACTIVE(state_offset + 1, 0); }
874            }
875        break;        break;
876    
877        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
# Line 844  for (;;) Line 894  for (;;)
894        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
895        case OP_ANY:        case OP_ANY:
896        if (clen > 0 && !IS_NEWLINE(ptr))        if (clen > 0 && !IS_NEWLINE(ptr))
897          { ADD_NEW(state_offset + 1, 0); }          {
898            if (ptr + 1 >= md->end_subject &&
899                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
900                NLBLOCK->nltype == NLTYPE_FIXED &&
901                NLBLOCK->nllen == 2 &&
902                c == NLBLOCK->nl[0])
903              {
904              could_continue = partial_newline = TRUE;
905              }
906            else
907              {
908              ADD_NEW(state_offset + 1, 0);
909              }
910            }
911        break;        break;
912    
913        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
# Line 855  for (;;) Line 918  for (;;)
918    
919        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
920        case OP_EODN:        case OP_EODN:
921        if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))        if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
922            could_continue = TRUE;
923          else if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
924          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
925        break;        break;
926    
# Line 863  for (;;) Line 928  for (;;)
928        case OP_DOLL:        case OP_DOLL:
929        if ((md->moptions & PCRE_NOTEOL) == 0)        if ((md->moptions & PCRE_NOTEOL) == 0)
930          {          {
931          if (clen == 0 ||          if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
932              could_continue = TRUE;
933            else if (clen == 0 ||
934              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&              ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr) &&
935                 ((ims & PCRE_MULTILINE) != 0 || ptr == end_subject - md->nllen)                 (ptr == end_subject - md->nllen)
936              ))              ))
937            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
938            else if (ptr + 1 >= md->end_subject &&
939                     (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
940                     NLBLOCK->nltype == NLTYPE_FIXED &&
941                     NLBLOCK->nllen == 2 &&
942                     c == NLBLOCK->nl[0])
943              {
944              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
945                {
946                reset_could_continue = TRUE;
947                ADD_NEW_DATA(-(state_offset + 1), 0, 1);
948                }
949              else could_continue = partial_newline = TRUE;
950              }
951            }
952          break;
953    
954          /*-----------------------------------------------------------------*/
955          case OP_DOLLM:
956          if ((md->moptions & PCRE_NOTEOL) == 0)
957            {
958            if (clen == 0 && (md->moptions & PCRE_PARTIAL_HARD) != 0)
959              could_continue = TRUE;
960            else if (clen == 0 ||
961                ((md->poptions & PCRE_DOLLAR_ENDONLY) == 0 && IS_NEWLINE(ptr)))
962              { ADD_ACTIVE(state_offset + 1, 0); }
963            else if (ptr + 1 >= md->end_subject &&
964                     (md->moptions & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) != 0 &&
965                     NLBLOCK->nltype == NLTYPE_FIXED &&
966                     NLBLOCK->nllen == 2 &&
967                     c == NLBLOCK->nl[0])
968              {
969              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
970                {
971                reset_could_continue = TRUE;
972                ADD_NEW_DATA(-(state_offset + 1), 0, 1);
973                }
974              else could_continue = partial_newline = TRUE;
975              }
976          }          }
977        else if ((ims & PCRE_MULTILINE) != 0 && IS_NEWLINE(ptr))        else if (IS_NEWLINE(ptr))
978          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
979        break;        break;
980    
# Line 900  for (;;) Line 1005  for (;;)
1005    
1006          if (ptr > start_subject)          if (ptr > start_subject)
1007            {            {
1008            const uschar *temp = ptr - 1;            const pcre_uchar *temp = ptr - 1;
1009            if (temp < md->start_used_ptr) md->start_used_ptr = temp;            if (temp < md->start_used_ptr) md->start_used_ptr = temp;
1010  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
1011            if (utf8) BACKCHAR(temp);            if (utf) { BACKCHAR(temp); }
1012  #endif  #endif
1013            GETCHARTEST(d, temp);            GETCHARTEST(d, temp);
1014    #ifdef SUPPORT_UCP
1015              if ((md->poptions & PCRE_UCP) != 0)
1016                {
1017                if (d == '_') left_word = TRUE; else
1018                  {
1019                  int cat = UCD_CATEGORY(d);
1020                  left_word = (cat == ucp_L || cat == ucp_N);
1021                  }
1022                }
1023              else
1024    #endif
1025            left_word = d < 256 && (ctypes[d] & ctype_word) != 0;            left_word = d < 256 && (ctypes[d] & ctype_word) != 0;
1026            }            }
1027          else left_word = 0;          else left_word = FALSE;
1028    
1029          if (clen > 0)          if (clen > 0)
1030              {
1031    #ifdef SUPPORT_UCP
1032              if ((md->poptions & PCRE_UCP) != 0)
1033                {
1034                if (c == '_') right_word = TRUE; else
1035                  {
1036                  int cat = UCD_CATEGORY(c);
1037                  right_word = (cat == ucp_L || cat == ucp_N);
1038                  }
1039                }
1040              else
1041    #endif
1042            right_word = c < 256 && (ctypes[c] & ctype_word) != 0;            right_word = c < 256 && (ctypes[c] & ctype_word) != 0;
1043          else right_word = 0;            }
1044            else right_word = FALSE;
1045    
1046          if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))          if ((left_word == right_word) == (codevalue == OP_NOT_WORD_BOUNDARY))
1047            { ADD_ACTIVE(state_offset + 1, 0); }            { ADD_ACTIVE(state_offset + 1, 0); }
# Line 939  for (;;) Line 1068  for (;;)
1068            break;            break;
1069    
1070            case PT_LAMP:            case PT_LAMP:
1071            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1072                   prop->chartype == ucp_Lt;
1073            break;            break;
1074    
1075            case PT_GC:            case PT_GC:
1076            OK = _pcre_ucp_gentype[prop->chartype] == code[2];            OK = PRIV(ucp_gentype)[prop->chartype] == code[2];
1077            break;            break;
1078    
1079            case PT_PC:            case PT_PC:
# Line 954  for (;;) Line 1084  for (;;)
1084            OK = prop->script == code[2];            OK = prop->script == code[2];
1085            break;            break;
1086    
1087              /* These are specials for combination cases. */
1088    
1089              case PT_ALNUM:
1090              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1091                   PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1092              break;
1093    
1094              case PT_SPACE:    /* Perl space */
1095              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1096                   c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1097              break;
1098    
1099              case PT_PXSPACE:  /* POSIX space */
1100              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1101                   c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1102                   c == CHAR_FF || c == CHAR_CR;
1103              break;
1104    
1105              case PT_WORD:
1106              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1107                   PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1108                   c == CHAR_UNDERSCORE;
1109              break;
1110    
1111            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
1112    
1113            default:            default:
# Line 981  for (;;) Line 1135  for (;;)
1135        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1136        if (clen > 0)        if (clen > 0)
1137          {          {
1138          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1139                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1140                NLBLOCK->nltype == NLTYPE_FIXED &&
1141                NLBLOCK->nllen == 2 &&
1142                c == NLBLOCK->nl[0])
1143              {
1144              could_continue = partial_newline = TRUE;
1145              }
1146            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1147              (c < 256 &&              (c < 256 &&
1148                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1149                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1004  for (;;) Line 1166  for (;;)
1166        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1167        if (clen > 0)        if (clen > 0)
1168          {          {
1169          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1170                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1171                NLBLOCK->nltype == NLTYPE_FIXED &&
1172                NLBLOCK->nllen == 2 &&
1173                c == NLBLOCK->nl[0])
1174              {
1175              could_continue = partial_newline = TRUE;
1176              }
1177            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1178              (c < 256 &&              (c < 256 &&
1179                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1180                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1026  for (;;) Line 1196  for (;;)
1196        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1197        if (clen > 0)        if (clen > 0)
1198          {          {
1199          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1200                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1201                NLBLOCK->nltype == NLTYPE_FIXED &&
1202                NLBLOCK->nllen == 2 &&
1203                c == NLBLOCK->nl[0])
1204              {
1205              could_continue = partial_newline = TRUE;
1206              }
1207            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1208              (c < 256 &&              (c < 256 &&
1209                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1210                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1046  for (;;) Line 1224  for (;;)
1224        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1225        if (clen > 0)        if (clen > 0)
1226          {          {
1227          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1228                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1229                NLBLOCK->nltype == NLTYPE_FIXED &&
1230                NLBLOCK->nllen == 2 &&
1231                c == NLBLOCK->nl[0])
1232              {
1233              could_continue = partial_newline = TRUE;
1234              }
1235            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1236              (c < 256 &&              (c < 256 &&
1237                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1238                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
1239            {            {
1240            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1241              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 1 + IMM2_SIZE + 1, 0); }
1242            else            else
1243              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
1244            }            }
# Line 1063  for (;;) Line 1249  for (;;)
1249        case OP_TYPEUPTO:        case OP_TYPEUPTO:
1250        case OP_TYPEMINUPTO:        case OP_TYPEMINUPTO:
1251        case OP_TYPEPOSUPTO:        case OP_TYPEPOSUPTO:
1252        ADD_ACTIVE(state_offset + 4, 0);        ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0);
1253        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1254        if (clen > 0)        if (clen > 0)
1255          {          {
1256          if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||          if (d == OP_ANY && ptr + 1 >= md->end_subject &&
1257                (md->moptions & (PCRE_PARTIAL_HARD)) != 0 &&
1258                NLBLOCK->nltype == NLTYPE_FIXED &&
1259                NLBLOCK->nllen == 2 &&
1260                c == NLBLOCK->nl[0])
1261              {
1262              could_continue = partial_newline = TRUE;
1263              }
1264            else if ((c >= 256 && d != OP_DIGIT && d != OP_WHITESPACE && d != OP_WORDCHAR) ||
1265              (c < 256 &&              (c < 256 &&
1266                (d != OP_ANY || !IS_NEWLINE(ptr)) &&                (d != OP_ANY || !IS_NEWLINE(ptr)) &&
1267                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))                ((ctypes[c] & toptable1[d]) ^ toptable2[d]) != 0))
# Line 1078  for (;;) Line 1272  for (;;)
1272              next_active_state--;              next_active_state--;
1273              }              }
1274            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1275              { ADD_NEW(state_offset + 4, 0); }              { ADD_NEW(state_offset + 2 + IMM2_SIZE, 0); }
1276            else            else
1277              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
1278            }            }
# Line 1108  for (;;) Line 1302  for (;;)
1302            break;            break;
1303    
1304            case PT_LAMP:            case PT_LAMP:
1305            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1306                prop->chartype == ucp_Lt;
1307            break;            break;
1308    
1309            case PT_GC:            case PT_GC:
1310            OK = _pcre_ucp_gentype[prop->chartype] == code[3];            OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1311            break;            break;
1312    
1313            case PT_PC:            case PT_PC:
# Line 1123  for (;;) Line 1318  for (;;)
1318            OK = prop->script == code[3];            OK = prop->script == code[3];
1319            break;            break;
1320    
1321              /* These are specials for combination cases. */
1322    
1323              case PT_ALNUM:
1324              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1325                   PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1326              break;
1327    
1328              case PT_SPACE:    /* Perl space */
1329              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1330                   c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1331              break;
1332    
1333              case PT_PXSPACE:  /* POSIX space */
1334              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1335                   c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1336                   c == CHAR_FF || c == CHAR_CR;
1337              break;
1338    
1339              case PT_WORD:
1340              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1341                   PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1342                   c == CHAR_UNDERSCORE;
1343              break;
1344    
1345            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
1346    
1347            default:            default:
# Line 1151  for (;;) Line 1370  for (;;)
1370        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1371        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1372          {          {
1373          const uschar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1374          int ncount = 0;          int ncount = 0;
1375          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
1376            {            {
# Line 1330  for (;;) Line 1549  for (;;)
1549            break;            break;
1550    
1551            case PT_LAMP:            case PT_LAMP:
1552            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1553                prop->chartype == ucp_Lt;
1554            break;            break;
1555    
1556            case PT_GC:            case PT_GC:
1557            OK = _pcre_ucp_gentype[prop->chartype] == code[3];            OK = PRIV(ucp_gentype)[prop->chartype] == code[3];
1558            break;            break;
1559    
1560            case PT_PC:            case PT_PC:
# Line 1345  for (;;) Line 1565  for (;;)
1565            OK = prop->script == code[3];            OK = prop->script == code[3];
1566            break;            break;
1567    
1568              /* These are specials for combination cases. */
1569    
1570              case PT_ALNUM:
1571              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1572                   PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1573              break;
1574    
1575              case PT_SPACE:    /* Perl space */
1576              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1577                   c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1578              break;
1579    
1580              case PT_PXSPACE:  /* POSIX space */
1581              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1582                   c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1583                   c == CHAR_FF || c == CHAR_CR;
1584              break;
1585    
1586              case PT_WORD:
1587              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1588                   PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1589                   c == CHAR_UNDERSCORE;
1590              break;
1591    
1592            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
1593    
1594            default:            default:
# Line 1382  for (;;) Line 1626  for (;;)
1626        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1627        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1628          {          {
1629          const uschar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1630          int ncount = 0;          int ncount = 0;
1631          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
1632              codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)              codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
# Line 1564  for (;;) Line 1808  for (;;)
1808        case OP_PROP_EXTRA + OP_TYPEMINUPTO:        case OP_PROP_EXTRA + OP_TYPEMINUPTO:
1809        case OP_PROP_EXTRA + OP_TYPEPOSUPTO:        case OP_PROP_EXTRA + OP_TYPEPOSUPTO:
1810        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_PROP_EXTRA + OP_TYPEEXACT)
1811          { ADD_ACTIVE(state_offset + 6, 0); }          { ADD_ACTIVE(state_offset + 1 + IMM2_SIZE + 3, 0); }
1812        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1813        if (clen > 0)        if (clen > 0)
1814          {          {
1815          BOOL OK;          BOOL OK;
1816          const ucd_record * prop = GET_UCD(c);          const ucd_record * prop = GET_UCD(c);
1817          switch(code[4])          switch(code[1 + IMM2_SIZE + 1])
1818            {            {
1819            case PT_ANY:            case PT_ANY:
1820            OK = TRUE;            OK = TRUE;
1821            break;            break;
1822    
1823            case PT_LAMP:            case PT_LAMP:
1824            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll || prop->chartype == ucp_Lt;            OK = prop->chartype == ucp_Lu || prop->chartype == ucp_Ll ||
1825                prop->chartype == ucp_Lt;
1826            break;            break;
1827    
1828            case PT_GC:            case PT_GC:
1829            OK = _pcre_ucp_gentype[prop->chartype] == code[5];            OK = PRIV(ucp_gentype)[prop->chartype] == code[1 + IMM2_SIZE + 2];
1830            break;            break;
1831    
1832            case PT_PC:            case PT_PC:
1833            OK = prop->chartype == code[5];            OK = prop->chartype == code[1 + IMM2_SIZE + 2];
1834            break;            break;
1835    
1836            case PT_SC:            case PT_SC:
1837            OK = prop->script == code[5];            OK = prop->script == code[1 + IMM2_SIZE + 2];
1838              break;
1839    
1840              /* These are specials for combination cases. */
1841    
1842              case PT_ALNUM:
1843              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1844                   PRIV(ucp_gentype)[prop->chartype] == ucp_N;
1845              break;
1846    
1847              case PT_SPACE:    /* Perl space */
1848              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1849                   c == CHAR_HT || c == CHAR_NL || c == CHAR_FF || c == CHAR_CR;
1850              break;
1851    
1852              case PT_PXSPACE:  /* POSIX space */
1853              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_Z ||
1854                   c == CHAR_HT || c == CHAR_NL || c == CHAR_VT ||
1855                   c == CHAR_FF || c == CHAR_CR;
1856              break;
1857    
1858              case PT_WORD:
1859              OK = PRIV(ucp_gentype)[prop->chartype] == ucp_L ||
1860                   PRIV(ucp_gentype)[prop->chartype] == ucp_N ||
1861                   c == CHAR_UNDERSCORE;
1862            break;            break;
1863    
1864            /* Should never occur, but keep compilers from grumbling. */            /* Should never occur, but keep compilers from grumbling. */
# Line 1607  for (;;) Line 1876  for (;;)
1876              next_active_state--;              next_active_state--;
1877              }              }
1878            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1879              { ADD_NEW(state_offset + 6, 0); }              { ADD_NEW(state_offset + 1 + IMM2_SIZE + 3, 0); }
1880            else            else
1881              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
1882            }            }
# Line 1620  for (;;) Line 1889  for (;;)
1889        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEMINUPTO:
1890        case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:        case OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO:
1891        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_EXTUNI_EXTRA + OP_TYPEEXACT)
1892          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1893        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1894        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1895          {          {
1896          const uschar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1897          int ncount = 0;          int ncount = 0;
1898          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
1899            {            {
# Line 1640  for (;;) Line 1909  for (;;)
1909            ncount++;            ncount++;
1910            nptr += ndlen;            nptr += ndlen;
1911            }            }
1912            if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
1913                reset_could_continue = TRUE;
1914          if (++count >= GET2(code, 1))          if (++count >= GET2(code, 1))
1915            { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }            { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1916          else          else
1917            { ADD_NEW_DATA(-state_offset, count, ncount); }            { ADD_NEW_DATA(-state_offset, count, ncount); }
1918          }          }
# Line 1654  for (;;) Line 1925  for (;;)
1925        case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:        case OP_ANYNL_EXTRA + OP_TYPEMINUPTO:
1926        case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:        case OP_ANYNL_EXTRA + OP_TYPEPOSUPTO:
1927        if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_ANYNL_EXTRA + OP_TYPEEXACT)
1928          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1929        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1930        if (clen > 0)        if (clen > 0)
1931          {          {
# Line 1681  for (;;) Line 1952  for (;;)
1952              next_active_state--;              next_active_state--;
1953              }              }
1954            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1955              { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, ncount); }
1956            else            else
1957              { ADD_NEW_DATA(-state_offset, count, ncount); }              { ADD_NEW_DATA(-state_offset, count, ncount); }
1958            break;            break;
# Line 1698  for (;;) Line 1969  for (;;)
1969        case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:        case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
1970        case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:        case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
1971        if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
1972          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
1973        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1974        if (clen > 0)        if (clen > 0)
1975          {          {
# Line 1727  for (;;) Line 1998  for (;;)
1998              next_active_state--;              next_active_state--;
1999              }              }
2000            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2001              { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
2002            else            else
2003              { ADD_NEW_DATA(-state_offset, count, 0); }              { ADD_NEW_DATA(-state_offset, count, 0); }
2004            }            }
# Line 1740  for (;;) Line 2011  for (;;)
2011        case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:        case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
2012        case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:        case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
2013        if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)        if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
2014          { ADD_ACTIVE(state_offset + 4, 0); }          { ADD_ACTIVE(state_offset + 2 + IMM2_SIZE, 0); }
2015        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2016        if (clen > 0)        if (clen > 0)
2017          {          {
# Line 1782  for (;;) Line 2053  for (;;)
2053              next_active_state--;              next_active_state--;
2054              }              }
2055            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2056              { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }              { ADD_NEW_DATA(-(state_offset + 2 + IMM2_SIZE), 0, 0); }
2057            else            else
2058              { ADD_NEW_DATA(-state_offset, count, 0); }              { ADD_NEW_DATA(-state_offset, count, 0); }
2059            }            }
# Line 1801  for (;;) Line 2072  for (;;)
2072        break;        break;
2073    
2074        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2075        case OP_CHARNC:        case OP_CHARI:
2076        if (clen == 0) break;        if (clen == 0) break;
2077    
2078  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2079        if (utf8)        if (utf)
2080          {          {
2081          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else          if (c == d) { ADD_NEW(state_offset + dlen + 1, 0); } else
2082            {            {
2083            unsigned int othercase;            unsigned int othercase;
2084            if (c < 128) othercase = fcc[c]; else            if (c < 128)
2085                othercase = fcc[c];
2086            /* If we have Unicode property support, we can use it to test the            else
2087            other case of the character. */              /* If we have Unicode property support, we can use it to test the
2088                other case of the character. */
2089  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2090            othercase = UCD_OTHERCASE(c);              othercase = UCD_OTHERCASE(c);
2091  #else  #else
2092            othercase = NOTACHAR;              othercase = NOTACHAR;
2093  #endif  #endif
2094    
2095            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }            if (d == othercase) { ADD_NEW(state_offset + dlen + 1, 0); }
2096            }            }
2097          }          }
2098        else        else
2099  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2100          /* Not UTF mode */
       /* Non-UTF-8 mode */  
2101          {          {
2102          if (lcc[c] == lcc[d]) { ADD_NEW(state_offset + 2, 0); }          if (TABLE_GET(c, lcc, c) == TABLE_GET(d, lcc, d))
2103              { ADD_NEW(state_offset + 2, 0); }
2104          }          }
2105        break;        break;
2106    
# Line 1843  for (;;) Line 2114  for (;;)
2114        case OP_EXTUNI:        case OP_EXTUNI:
2115        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
2116          {          {
2117          const uschar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
2118          int ncount = 0;          int ncount = 0;
2119          while (nptr < end_subject)          while (nptr < end_subject)
2120            {            {
# Line 1853  for (;;) Line 2124  for (;;)
2124            ncount++;            ncount++;
2125            nptr += nclen;            nptr += nclen;
2126            }            }
2127            if (nptr >= end_subject && (md->moptions & PCRE_PARTIAL_HARD) != 0)
2128                reset_could_continue = TRUE;
2129          ADD_NEW_DATA(-(state_offset + 1), 0, ncount);          ADD_NEW_DATA(-(state_offset + 1), 0, ncount);
2130          }          }
2131        break;        break;
# Line 1878  for (;;) Line 2151  for (;;)
2151          break;          break;
2152    
2153          case 0x000d:          case 0x000d:
2154          if (ptr + 1 < end_subject && ptr[1] == 0x0a)          if (ptr + 1 >= end_subject)
2155              {
2156              ADD_NEW(state_offset + 1, 0);
2157              if ((md->moptions & PCRE_PARTIAL_HARD) != 0)
2158                reset_could_continue = TRUE;
2159              }
2160            else if (ptr[1] == 0x0a)
2161            {            {
2162            ADD_NEW_DATA(-(state_offset + 1), 0, 1);            ADD_NEW_DATA(-(state_offset + 1), 0, 1);
2163            }            }
2164          else          else
2165            {            {
2166            ADD_NEW(state_offset + 1, 0);            ADD_NEW(state_offset + 1, 0);
2167            }            }
2168          break;          break;
2169          }          }
2170        break;        break;
# Line 1987  for (;;) Line 2266  for (;;)
2266        break;        break;
2267    
2268        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2269        /* Match a negated single character. This is only used for one-byte        /* Match a negated single character casefully. */
       characters, that is, we know that d < 256. The character we are  
       checking (c) can be multibyte. */  
2270    
2271        case OP_NOT:        case OP_NOT:
2272          if (clen > 0 && c != d) { ADD_NEW(state_offset + dlen + 1, 0); }
2273          break;
2274    
2275          /*-----------------------------------------------------------------*/
2276          /* Match a negated single character caselessly. */
2277    
2278          case OP_NOTI:
2279        if (clen > 0)        if (clen > 0)
2280          {          {
2281          unsigned int otherd = ((ims & PCRE_CASELESS) != 0)? fcc[d] : d;          unsigned int otherd;
2282          if (c != d && c != otherd) { ADD_NEW(state_offset + dlen + 1, 0); }  #ifdef SUPPORT_UTF
2283          }          if (utf && d >= 128)
2284              {
2285    #ifdef SUPPORT_UCP
2286              otherd = UCD_OTHERCASE(d);
2287    #endif  /* SUPPORT_UCP */
2288              }
2289            else
2290    #endif  /* SUPPORT_UTF */
2291            otherd = TABLE_GET(d, fcc, d);
2292            if (c != d && c != otherd)
2293              { ADD_NEW(state_offset + dlen + 1, 0); }
2294            }
2295        break;        break;
2296    
2297        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2298          case OP_PLUSI:
2299          case OP_MINPLUSI:
2300          case OP_POSPLUSI:
2301          case OP_NOTPLUSI:
2302          case OP_NOTMINPLUSI:
2303          case OP_NOTPOSPLUSI:
2304          caseless = TRUE;
2305          codevalue -= OP_STARI - OP_STAR;
2306    
2307          /* Fall through */
2308        case OP_PLUS:        case OP_PLUS:
2309        case OP_MINPLUS:        case OP_MINPLUS:
2310        case OP_POSPLUS:        case OP_POSPLUS:
# Line 2011  for (;;) Line 2316  for (;;)
2316        if (clen > 0)        if (clen > 0)
2317          {          {
2318          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2319          if ((ims & PCRE_CASELESS) != 0)          if (caseless)
2320            {            {
2321  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2322            if (utf8 && d >= 128)            if (utf && d >= 128)
2323              {              {
2324  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2325              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2326  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2327              }              }
2328            else            else
2329  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2330            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2331            }            }
2332          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2333            {            {
# Line 2039  for (;;) Line 2344  for (;;)
2344        break;        break;
2345    
2346        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2347          case OP_QUERYI:
2348          case OP_MINQUERYI:
2349          case OP_POSQUERYI:
2350          case OP_NOTQUERYI:
2351          case OP_NOTMINQUERYI:
2352          case OP_NOTPOSQUERYI:
2353          caseless = TRUE;
2354          codevalue -= OP_STARI - OP_STAR;
2355          /* Fall through */
2356        case OP_QUERY:        case OP_QUERY:
2357        case OP_MINQUERY:        case OP_MINQUERY:
2358        case OP_POSQUERY:        case OP_POSQUERY:
# Line 2049  for (;;) Line 2363  for (;;)
2363        if (clen > 0)        if (clen > 0)
2364          {          {
2365          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2366          if ((ims & PCRE_CASELESS) != 0)          if (caseless)
2367            {            {
2368  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2369            if (utf8 && d >= 128)            if (utf && d >= 128)
2370              {              {
2371  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2372              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2373  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2374              }              }
2375            else            else
2376  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2377            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2378            }            }
2379          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2380            {            {
# Line 2075  for (;;) Line 2389  for (;;)
2389        break;        break;
2390    
2391        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2392          case OP_STARI:
2393          case OP_MINSTARI:
2394          case OP_POSSTARI:
2395          case OP_NOTSTARI:
2396          case OP_NOTMINSTARI:
2397          case OP_NOTPOSSTARI:
2398          caseless = TRUE;
2399          codevalue -= OP_STARI - OP_STAR;
2400          /* Fall through */
2401        case OP_STAR:        case OP_STAR:
2402        case OP_MINSTAR:        case OP_MINSTAR:
2403        case OP_POSSTAR:        case OP_POSSTAR:
# Line 2085  for (;;) Line 2408  for (;;)
2408        if (clen > 0)        if (clen > 0)
2409          {          {
2410          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2411          if ((ims & PCRE_CASELESS) != 0)          if (caseless)
2412            {            {
2413  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2414            if (utf8 && d >= 128)            if (utf && d >= 128)
2415              {              {
2416  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2417              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2418  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2419              }              }
2420            else            else
2421  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2422            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2423            }            }
2424          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2425            {            {
# Line 2111  for (;;) Line 2434  for (;;)
2434        break;        break;
2435    
2436        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2437          case OP_EXACTI:
2438          case OP_NOTEXACTI:
2439          caseless = TRUE;
2440          codevalue -= OP_STARI - OP_STAR;
2441          /* Fall through */
2442        case OP_EXACT:        case OP_EXACT:
2443        case OP_NOTEXACT:        case OP_NOTEXACT:
2444        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2445        if (clen > 0)        if (clen > 0)
2446          {          {
2447          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2448          if ((ims & PCRE_CASELESS) != 0)          if (caseless)
2449            {            {
2450  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2451            if (utf8 && d >= 128)            if (utf && d >= 128)
2452              {              {
2453  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2454              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2455  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2456              }              }
2457            else            else
2458  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2459            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2460            }            }
2461          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2462            {            {
2463            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2464              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2465            else            else
2466              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
2467            }            }
# Line 2141  for (;;) Line 2469  for (;;)
2469        break;        break;
2470    
2471        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2472          case OP_UPTOI:
2473          case OP_MINUPTOI:
2474          case OP_POSUPTOI:
2475          case OP_NOTUPTOI:
2476          case OP_NOTMINUPTOI:
2477          case OP_NOTPOSUPTOI:
2478          caseless = TRUE;
2479          codevalue -= OP_STARI - OP_STAR;
2480          /* Fall through */
2481        case OP_UPTO:        case OP_UPTO:
2482        case OP_MINUPTO:        case OP_MINUPTO:
2483        case OP_POSUPTO:        case OP_POSUPTO:
2484        case OP_NOTUPTO:        case OP_NOTUPTO:
2485        case OP_NOTMINUPTO:        case OP_NOTMINUPTO:
2486        case OP_NOTPOSUPTO:        case OP_NOTPOSUPTO:
2487        ADD_ACTIVE(state_offset + dlen + 3, 0);        ADD_ACTIVE(state_offset + dlen + 1 + IMM2_SIZE, 0);
2488        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
2489        if (clen > 0)        if (clen > 0)
2490          {          {
2491          unsigned int otherd = NOTACHAR;          unsigned int otherd = NOTACHAR;
2492          if ((ims & PCRE_CASELESS) != 0)          if (caseless)
2493            {            {
2494  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2495            if (utf8 && d >= 128)            if (utf && d >= 128)
2496              {              {
2497  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
2498              otherd = UCD_OTHERCASE(d);              otherd = UCD_OTHERCASE(d);
2499  #endif  /* SUPPORT_UCP */  #endif  /* SUPPORT_UCP */
2500              }              }
2501            else            else
2502  #endif  /* SUPPORT_UTF8 */  #endif  /* SUPPORT_UTF */
2503            otherd = fcc[d];            otherd = TABLE_GET(d, fcc, d);
2504            }            }
2505          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))          if ((c == d || c == otherd) == (codevalue < OP_NOTSTAR))
2506            {            {
# Line 2173  for (;;) Line 2510  for (;;)
2510              next_active_state--;              next_active_state--;
2511              }              }
2512            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
2513              { ADD_NEW(state_offset + dlen + 3, 0); }              { ADD_NEW(state_offset + dlen + 1 + IMM2_SIZE, 0); }
2514            else            else
2515              { ADD_NEW(state_offset, count); }              { ADD_NEW(state_offset, count); }
2516            }            }
# Line 2190  for (;;) Line 2527  for (;;)
2527          {          {
2528          BOOL isinclass = FALSE;          BOOL isinclass = FALSE;
2529          int next_state_offset;          int next_state_offset;
2530          const uschar *ecode;          const pcre_uchar *ecode;
2531    
2532          /* For a simple class, there is always just a 32-byte table, and we          /* For a simple class, there is always just a 32-byte table, and we
2533          can set isinclass from it. */          can set isinclass from it. */
2534    
2535          if (codevalue != OP_XCLASS)          if (codevalue != OP_XCLASS)
2536            {            {
2537            ecode = code + 33;            ecode = code + 1 + (32 / sizeof(pcre_uchar));
2538            if (clen > 0)            if (clen > 0)
2539              {              {
2540              isinclass = (c > 255)? (codevalue == OP_NCLASS) :              isinclass = (c > 255)? (codevalue == OP_NCLASS) :
2541                ((code[1 + c/8] & (1 << (c&7))) != 0);                ((((pcre_uint8 *)(code + 1))[c/8] & (1 << (c&7))) != 0);
2542              }              }
2543            }            }
2544    
# Line 2212  for (;;) Line 2549  for (;;)
2549          else          else
2550           {           {
2551           ecode = code + GET(code, 1);           ecode = code + GET(code, 1);
2552           if (clen > 0) isinclass = _pcre_xclass(c, code + 1 + LINK_SIZE);           if (clen > 0) isinclass = PRIV(xclass)(c, code + 1 + LINK_SIZE, utf);
2553           }           }
2554    
2555          /* At this point, isinclass is set for all kinds of class, and ecode          /* At this point, isinclass is set for all kinds of class, and ecode
2556          points to the byte after the end of the class. If there is a          points to the byte after the end of the class. If there is a
2557          quantifier, this is where it will be. */          quantifier, this is where it will be. */
2558    
2559          next_state_offset = ecode - start_code;          next_state_offset = (int)(ecode - start_code);
2560    
2561          switch (*ecode)          switch (*ecode)
2562            {            {
# Line 2246  for (;;) Line 2583  for (;;)
2583            case OP_CRMINRANGE:            case OP_CRMINRANGE:
2584            count = current_state->count;  /* Already matched */            count = current_state->count;  /* Already matched */
2585            if (count >= GET2(ecode, 1))            if (count >= GET2(ecode, 1))
2586              { ADD_ACTIVE(next_state_offset + 5, 0); }              { ADD_ACTIVE(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2587            if (isinclass)            if (isinclass)
2588              {              {
2589              int max = GET2(ecode, 3);              int max = GET2(ecode, 1 + IMM2_SIZE);
2590              if (++count >= max && max != 0)   /* Max 0 => no limit */              if (++count >= max && max != 0)   /* Max 0 => no limit */
2591                { ADD_NEW(next_state_offset + 5, 0); }                { ADD_NEW(next_state_offset + 1 + 2 * IMM2_SIZE, 0); }
2592              else              else
2593                { ADD_NEW(state_offset, count); }                { ADD_NEW(state_offset, count); }
2594              }              }
# Line 2282  for (;;) Line 2619  for (;;)
2619          int rc;          int rc;
2620          int local_offsets[2];          int local_offsets[2];
2621          int local_workspace[1000];          int local_workspace[1000];
2622          const uschar *endasscode = code + GET(code, 1);          const pcre_uchar *endasscode = code + GET(code, 1);
2623    
2624          while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);          while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2625    
# Line 2290  for (;;) Line 2627  for (;;)
2627            md,                                   /* static match data */            md,                                   /* static match data */
2628            code,                                 /* this subexpression's code */            code,                                 /* this subexpression's code */
2629            ptr,                                  /* where we currently are */            ptr,                                  /* where we currently are */
2630            ptr - start_subject,                  /* start offset */            (int)(ptr - start_subject),           /* start offset */
2631            local_offsets,                        /* offset vector */            local_offsets,                        /* offset vector */
2632            sizeof(local_offsets)/sizeof(int),    /* size of same */            sizeof(local_offsets)/sizeof(int),    /* size of same */
2633            local_workspace,                      /* workspace vector */            local_workspace,                      /* workspace vector */
2634            sizeof(local_workspace)/sizeof(int),  /* size of same */            sizeof(local_workspace)/sizeof(int),  /* size of same */
2635            ims,                                  /* the current ims flags */            rlevel);                              /* function recursion level */
2636            rlevel,                               /* function recursion level */  
           recursing);                           /* pass on regex recursion */  
   
2637          if (rc == PCRE_ERROR_DFA_UITEM) return rc;          if (rc == PCRE_ERROR_DFA_UITEM) return rc;
2638          if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))          if ((rc >= 0) == (codevalue == OP_ASSERT || codevalue == OP_ASSERTBACK))
2639              { ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); }              { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
2640          }          }
2641        break;        break;
2642    
# Line 2321  for (;;) Line 2656  for (;;)
2656          if (code[LINK_SIZE+1] == OP_CALLOUT)          if (code[LINK_SIZE+1] == OP_CALLOUT)
2657            {            {
2658            rrc = 0;            rrc = 0;
2659            if (pcre_callout != NULL)            if (PUBL(callout) != NULL)
2660              {              {
2661              pcre_callout_block cb;              PUBL(callout_block) cb;
2662              cb.version          = 1;   /* Version 1 of the callout block */              cb.version          = 1;   /* Version 1 of the callout block */
2663              cb.callout_number   = code[LINK_SIZE+2];              cb.callout_number   = code[LINK_SIZE+2];
2664              cb.offset_vector    = offsets;              cb.offset_vector    = offsets;
2665    #ifdef COMPILE_PCRE8
2666              cb.subject          = (PCRE_SPTR)start_subject;              cb.subject          = (PCRE_SPTR)start_subject;
2667              cb.subject_length   = end_subject - start_subject;  #else
2668              cb.start_match      = current_subject - start_subject;              cb.subject          = (PCRE_SPTR16)start_subject;
2669              cb.current_position = ptr - start_subject;  #endif
2670                cb.subject_length   = (int)(end_subject - start_subject);
2671                cb.start_match      = (int)(current_subject - start_subject);
2672                cb.current_position = (int)(ptr - start_subject);
2673              cb.pattern_position = GET(code, LINK_SIZE + 3);              cb.pattern_position = GET(code, LINK_SIZE + 3);
2674              cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);              cb.next_item_length = GET(code, 3 + 2*LINK_SIZE);
2675              cb.capture_top      = 1;              cb.capture_top      = 1;
2676              cb.capture_last     = -1;              cb.capture_last     = -1;
2677              cb.callout_data     = md->callout_data;              cb.callout_data     = md->callout_data;
2678              if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */              cb.mark             = NULL;   /* No (*MARK) support */
2679                if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;   /* Abandon */
2680              }              }
2681            if (rrc > 0) break;                      /* Fail this thread */            if (rrc > 0) break;                      /* Fail this thread */
2682            code += _pcre_OP_lengths[OP_CALLOUT];    /* Skip callout data */            code += PRIV(OP_lengths)[OP_CALLOUT];    /* Skip callout data */
2683            }            }
2684    
2685          condcode = code[LINK_SIZE+1];          condcode = code[LINK_SIZE+1];
# Line 2360  for (;;) Line 2700  for (;;)
2700    
2701          else if (condcode == OP_RREF || condcode == OP_NRREF)          else if (condcode == OP_RREF || condcode == OP_NRREF)
2702            {            {
2703            int value = GET2(code, LINK_SIZE+2);            int value = GET2(code, LINK_SIZE + 2);
2704            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;            if (value != RREF_ANY) return PCRE_ERROR_DFA_UCOND;
2705            if (recursing > 0)            if (md->recursive != NULL)
2706              { ADD_ACTIVE(state_offset + LINK_SIZE + 4, 0); }              { ADD_ACTIVE(state_offset + LINK_SIZE + 2 + IMM2_SIZE, 0); }
2707            else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }            else { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2708            }            }
2709    
# Line 2372  for (;;) Line 2712  for (;;)
2712          else          else
2713            {            {
2714            int rc;            int rc;
2715            const uschar *asscode = code + LINK_SIZE + 1;            const pcre_uchar *asscode = code + LINK_SIZE + 1;
2716            const uschar *endasscode = asscode + GET(asscode, 1);            const pcre_uchar *endasscode = asscode + GET(asscode, 1);
2717    
2718            while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);            while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2719    
# Line 2381  for (;;) Line 2721  for (;;)
2721              md,                                   /* fixed match data */              md,                                   /* fixed match data */
2722              asscode,                              /* this subexpression's code */              asscode,                              /* this subexpression's code */
2723              ptr,                                  /* where we currently are */              ptr,                                  /* where we currently are */
2724              ptr - start_subject,                  /* start offset */              (int)(ptr - start_subject),           /* start offset */
2725              local_offsets,                        /* offset vector */              local_offsets,                        /* offset vector */
2726              sizeof(local_offsets)/sizeof(int),    /* size of same */              sizeof(local_offsets)/sizeof(int),    /* size of same */
2727              local_workspace,                      /* workspace vector */              local_workspace,                      /* workspace vector */
2728              sizeof(local_workspace)/sizeof(int),  /* size of same */              sizeof(local_workspace)/sizeof(int),  /* size of same */
2729              ims,                                  /* the current ims flags */              rlevel);                              /* function recursion level */
             rlevel,                               /* function recursion level */  
             recursing);                           /* pass on regex recursion */  
2730    
2731            if (rc == PCRE_ERROR_DFA_UITEM) return rc;            if (rc == PCRE_ERROR_DFA_UITEM) return rc;
2732            if ((rc >= 0) ==            if ((rc >= 0) ==
2733                  (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))                  (condcode == OP_ASSERT || condcode == OP_ASSERTBACK))
2734              { ADD_ACTIVE(endasscode + LINK_SIZE + 1 - start_code, 0); }              { ADD_ACTIVE((int)(endasscode + LINK_SIZE + 1 - start_code), 0); }
2735            else            else
2736              { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }              { ADD_ACTIVE(state_offset + codelink + LINK_SIZE + 1, 0); }
2737            }            }
# Line 2403  for (;;) Line 2741  for (;;)
2741        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2742        case OP_RECURSE:        case OP_RECURSE:
2743          {          {
2744            dfa_recursion_info *ri;
2745          int local_offsets[1000];          int local_offsets[1000];
2746          int local_workspace[1000];          int local_workspace[1000];
2747            const pcre_uchar *callpat = start_code + GET(code, 1);
2748            int recno = (callpat == md->start_code)? 0 :
2749              GET2(callpat, 1 + LINK_SIZE);
2750          int rc;          int rc;
2751    
2752          DPRINTF(("%.*sStarting regex recursion %d\n", rlevel*2-2, SP,          DPRINTF(("%.*sStarting regex recursion\n", rlevel*2-2, SP));
2753            recursing + 1));  
2754            /* Check for repeating a recursion without advancing the subject
2755            pointer. This should catch convoluted mutual recursions. (Some simple
2756            cases are caught at compile time.) */
2757    
2758            for (ri = md->recursive; ri != NULL; ri = ri->prevrec)
2759              if (recno == ri->group_num && ptr == ri->subject_position)
2760                return PCRE_ERROR_RECURSELOOP;
2761    
2762            /* Remember this recursion and where we started it so as to
2763            catch infinite loops. */
2764    
2765            new_recursive.group_num = recno;
2766            new_recursive.subject_position = ptr;
2767            new_recursive.prevrec = md->recursive;
2768            md->recursive = &new_recursive;
2769    
2770          rc = internal_dfa_exec(          rc = internal_dfa_exec(
2771            md,                                   /* fixed match data */            md,                                   /* fixed match data */
2772            start_code + GET(code, 1),            /* this subexpression's code */            callpat,                              /* this subexpression's code */
2773            ptr,                                  /* where we currently are */            ptr,                                  /* where we currently are */
2774            ptr - start_subject,                  /* start offset */            (int)(ptr - start_subject),           /* start offset */
2775            local_offsets,                        /* offset vector */            local_offsets,                        /* offset vector */
2776            sizeof(local_offsets)/sizeof(int),    /* size of same */            sizeof(local_offsets)/sizeof(int),    /* size of same */
2777            local_workspace,                      /* workspace vector */            local_workspace,                      /* workspace vector */
2778            sizeof(local_workspace)/sizeof(int),  /* size of same */            sizeof(local_workspace)/sizeof(int),  /* size of same */
2779            ims,                                  /* the current ims flags */            rlevel);                              /* function recursion level */
           rlevel,                               /* function recursion level */  
           recursing + 1);                       /* regex recurse level */  
2780    
2781          DPRINTF(("%.*sReturn from regex recursion %d: rc=%d\n", rlevel*2-2, SP,          md->recursive = new_recursive.prevrec;  /* Done this recursion */
2782            recursing + 1, rc));  
2783            DPRINTF(("%.*sReturn from regex recursion: rc=%d\n", rlevel*2-2, SP,
2784              rc));
2785    
2786          /* Ran out of internal offsets */          /* Ran out of internal offsets */
2787    
# Line 2438  for (;;) Line 2795  for (;;)
2795            {            {
2796            for (rc = rc*2 - 2; rc >= 0; rc -= 2)            for (rc = rc*2 - 2; rc >= 0; rc -= 2)
2797              {              {
             const uschar *p = start_subject + local_offsets[rc];  
             const uschar *pp = start_subject + local_offsets[rc+1];  
2798              int charcount = local_offsets[rc+1] - local_offsets[rc];              int charcount = local_offsets[rc+1] - local_offsets[rc];
2799              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;  #ifdef SUPPORT_UTF
2800                const pcre_uchar *p = start_subject + local_offsets[rc];
2801                const pcre_uchar *pp = start_subject + local_offsets[rc+1];
2802                while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2803    #endif
2804              if (charcount > 0)              if (charcount > 0)
2805                {                {
2806                ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));                ADD_NEW_DATA(-(state_offset + LINK_SIZE + 1), 0, (charcount - 1));
# Line 2457  for (;;) Line 2816  for (;;)
2816        break;        break;
2817    
2818        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
2819          case OP_BRAPOS:
2820          case OP_SBRAPOS:
2821          case OP_CBRAPOS:
2822          case OP_SCBRAPOS:
2823          case OP_BRAPOSZERO:
2824            {
2825            int charcount, matched_count;
2826            const pcre_uchar *local_ptr = ptr;
2827            BOOL allow_zero;
2828    
2829            if (codevalue == OP_BRAPOSZERO)
2830              {
2831              allow_zero = TRUE;
2832              codevalue = *(++code);  /* Codevalue will be one of above BRAs */
2833              }
2834            else allow_zero = FALSE;
2835    
2836            /* Loop to match the subpattern as many times as possible as if it were
2837            a complete pattern. */
2838    
2839            for (matched_count = 0;; matched_count++)
2840              {
2841              int local_offsets[2];
2842              int local_workspace[1000];
2843    
2844              int rc = internal_dfa_exec(
2845                md,                                   /* fixed match data */
2846                code,                                 /* this subexpression's code */
2847                local_ptr,                            /* where we currently are */
2848                (int)(ptr - start_subject),           /* start offset */
2849                local_offsets,                        /* offset vector */
2850                sizeof(local_offsets)/sizeof(int),    /* size of same */
2851                local_workspace,                      /* workspace vector */
2852                sizeof(local_workspace)/sizeof(int),  /* size of same */
2853                rlevel);                              /* function recursion level */
2854    
2855              /* Failed to match */
2856    
2857              if (rc < 0)
2858                {
2859                if (rc != PCRE_ERROR_NOMATCH) return rc;
2860                break;
2861                }
2862    
2863              /* Matched: break the loop if zero characters matched. */
2864    
2865              charcount = local_offsets[1] - local_offsets[0];
2866              if (charcount == 0) break;
2867              local_ptr += charcount;    /* Advance temporary position ptr */
2868              }
2869    
2870            /* At this point we have matched the subpattern matched_count
2871            times, and local_ptr is pointing to the character after the end of the
2872            last match. */
2873    
2874            if (matched_count > 0 || allow_zero)
2875              {
2876              const pcre_uchar *end_subpattern = code;
2877              int next_state_offset;
2878    
2879              do { end_subpattern += GET(end_subpattern, 1); }
2880                while (*end_subpattern == OP_ALT);
2881              next_state_offset =
2882                (int)(end_subpattern - start_code + LINK_SIZE + 1);
2883    
2884              /* Optimization: if there are no more active states, and there
2885              are no new states yet set up, then skip over the subject string
2886              right here, to save looping. Otherwise, set up the new state to swing
2887              into action when the end of the matched substring is reached. */
2888    
2889              if (i + 1 >= active_count && new_count == 0)
2890                {
2891                ptr = local_ptr;
2892                clen = 0;
2893                ADD_NEW(next_state_offset, 0);
2894                }
2895              else
2896                {
2897                const pcre_uchar *p = ptr;
2898                const pcre_uchar *pp = local_ptr;
2899                charcount = (int)(pp - p);
2900    #ifdef SUPPORT_UTF
2901                while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2902    #endif
2903                ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2904                }
2905              }
2906            }
2907          break;
2908    
2909          /*-----------------------------------------------------------------*/
2910        case OP_ONCE:        case OP_ONCE:
2911          case OP_ONCE_NC:
2912          {          {
2913          int local_offsets[2];          int local_offsets[2];
2914          int local_workspace[1000];          int local_workspace[1000];
# Line 2466  for (;;) Line 2917  for (;;)
2917            md,                                   /* fixed match data */            md,                                   /* fixed match data */
2918            code,                                 /* this subexpression's code */            code,                                 /* this subexpression's code */
2919            ptr,                                  /* where we currently are */            ptr,                                  /* where we currently are */
2920            ptr - start_subject,                  /* start offset */            (int)(ptr - start_subject),           /* start offset */
2921            local_offsets,                        /* offset vector */            local_offsets,                        /* offset vector */
2922            sizeof(local_offsets)/sizeof(int),    /* size of same */            sizeof(local_offsets)/sizeof(int),    /* size of same */
2923            local_workspace,                      /* workspace vector */            local_workspace,                      /* workspace vector */
2924            sizeof(local_workspace)/sizeof(int),  /* size of same */            sizeof(local_workspace)/sizeof(int),  /* size of same */
2925            ims,                                  /* the current ims flags */            rlevel);                              /* function recursion level */
           rlevel,                               /* function recursion level */  
           recursing);                           /* pass on regex recursion */  
2926    
2927          if (rc >= 0)          if (rc >= 0)
2928            {            {
2929            const uschar *end_subpattern = code;            const pcre_uchar *end_subpattern = code;
2930            int charcount = local_offsets[1] - local_offsets[0];            int charcount = local_offsets[1] - local_offsets[0];
2931            int next_state_offset, repeat_state_offset;            int next_state_offset, repeat_state_offset;
2932    
2933            do { end_subpattern += GET(end_subpattern, 1); }            do { end_subpattern += GET(end_subpattern, 1); }
2934              while (*end_subpattern == OP_ALT);              while (*end_subpattern == OP_ALT);
2935            next_state_offset = end_subpattern - start_code + LINK_SIZE + 1;            next_state_offset =
2936                (int)(end_subpattern - start_code + LINK_SIZE + 1);
2937    
2938            /* If the end of this subpattern is KETRMAX or KETRMIN, we must            /* If the end of this subpattern is KETRMAX or KETRMIN, we must
2939            arrange for the repeat state also to be added to the relevant list.            arrange for the repeat state also to be added to the relevant list.
# Line 2491  for (;;) Line 2941  for (;;)
2941    
2942            repeat_state_offset = (*end_subpattern == OP_KETRMAX ||            repeat_state_offset = (*end_subpattern == OP_KETRMAX ||
2943                                   *end_subpattern == OP_KETRMIN)?                                   *end_subpattern == OP_KETRMIN)?
2944              end_subpattern - start_code - GET(end_subpattern, 1) : -1;              (int)(end_subpattern - start_code - GET(end_subpattern, 1)) : -1;
2945    
2946            /* If we have matched an empty string, add the next state at the            /* If we have matched an empty string, add the next state at the
2947            current character pointer. This is important so that the duplicate            current character pointer. This is important so that the duplicate
# Line 2506  for (;;) Line 2956  for (;;)
2956            /* Optimization: if there are no more active states, and there            /* Optimization: if there are no more active states, and there
2957            are no new states yet set up, then skip over the subject string            are no new states yet set up, then skip over the subject string
2958            right here, to save looping. Otherwise, set up the new state to swing            right here, to save looping. Otherwise, set up the new state to swing
2959            into action when the end of the substring is reached. */            into action when the end of the matched substring is reached. */
2960    
2961            else if (i + 1 >= active_count && new_count == 0)            else if (i + 1 >= active_count && new_count == 0)
2962              {              {
# Line 2529  for (;;) Line 2979  for (;;)
2979              }              }
2980            else            else
2981              {              {
2982              const uschar *p = start_subject + local_offsets[0];  #ifdef SUPPORT_UTF
2983              const uschar *pp = start_subject + local_offsets[1];              const pcre_uchar *p = start_subject + local_offsets[0];
2984              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;              const pcre_uchar *pp = start_subject + local_offsets[1];
2985                while (p < pp) if (NOT_FIRSTCHAR(*p++)) charcount--;
2986    #endif
2987              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2988              if (repeat_state_offset >= 0)              if (repeat_state_offset >= 0)
2989                { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }                { ADD_NEW_DATA(-repeat_state_offset, 0, (charcount - 1)); }
2990              }              }
   
2991            }            }
2992          else if (rc != PCRE_ERROR_NOMATCH) return rc;          else if (rc != PCRE_ERROR_NOMATCH) return rc;
2993          }          }
# Line 2548  for (;;) Line 2999  for (;;)
2999    
3000        case OP_CALLOUT:        case OP_CALLOUT:
3001        rrc = 0;        rrc = 0;
3002        if (pcre_callout != NULL)        if (PUBL(callout) != NULL)
3003          {          {
3004          pcre_callout_block cb;          PUBL(callout_block) cb;
3005          cb.version          = 1;   /* Version 1 of the callout block */          cb.version          = 1;   /* Version 1 of the callout block */
3006          cb.callout_number   = code[1];          cb.callout_number   = code[1];
3007          cb.offset_vector    = offsets;          cb.offset_vector    = offsets;
3008    #ifdef COMPILE_PCRE8
3009          cb.subject          = (PCRE_SPTR)start_subject;          cb.subject          = (PCRE_SPTR)start_subject;
3010          cb.subject_length   = end_subject - start_subject;  #else
3011          cb.start_match      = current_subject - start_subject;          cb.subject          = (PCRE_SPTR16)start_subject;
3012          cb.current_position = ptr - start_subject;  #endif
3013            cb.subject_length   = (int)(end_subject - start_subject);
3014            cb.start_match      = (int)(current_subject - start_subject);
3015            cb.current_position = (int)(ptr - start_subject);
3016          cb.pattern_position = GET(code, 2);          cb.pattern_position = GET(code, 2);
3017          cb.next_item_length = GET(code, 2 + LINK_SIZE);          cb.next_item_length = GET(code, 2 + LINK_SIZE);
3018          cb.capture_top      = 1;          cb.capture_top      = 1;
3019          cb.capture_last     = -1;          cb.capture_last     = -1;
3020          cb.callout_data     = md->callout_data;          cb.callout_data     = md->callout_data;
3021          if ((rrc = (*pcre_callout)(&cb)) < 0) return rrc;   /* Abandon */          cb.mark             = NULL;   /* No (*MARK) support */
3022            if ((rrc = (*PUBL(callout))(&cb)) < 0) return rrc;   /* Abandon */
3023          }          }
3024        if (rrc == 0)        if (rrc == 0)
3025          { ADD_ACTIVE(state_offset + _pcre_OP_lengths[OP_CALLOUT], 0); }          { ADD_ACTIVE(state_offset + PRIV(OP_lengths)[OP_CALLOUT], 0); }
3026        break;        break;
3027    
3028    
# Line 2591  for (;;) Line 3047  for (;;)
3047    
3048    The "could_continue" variable is true if a state could have continued but    The "could_continue" variable is true if a state could have continued but
3049    for the fact that the end of the subject was reached. */    for the fact that the end of the subject was reached. */
3050    
3051    if (new_count <= 0)    if (new_count <= 0)
3052      {      {
3053      if (rlevel == 1 &&                               /* Top level, and */      if (rlevel == 1 &&                               /* Top level, and */
3054          could_continue &&                            /* Some could go on */          could_continue &&                            /* Some could go on, and */
3055          forced_fail != workspace[1] &&               /* Not all forced fail & */          forced_fail != workspace[1] &&               /* Not all forced fail & */
3056          (                                            /* either... */          (                                            /* either... */
3057          (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */          (md->moptions & PCRE_PARTIAL_HARD) != 0      /* Hard partial */
# Line 2603  for (;;) Line 3059  for (;;)
3059          ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */          ((md->moptions & PCRE_PARTIAL_SOFT) != 0 &&  /* Soft partial and */
3060           match_count < 0)                            /* no matches */           match_count < 0)                            /* no matches */
3061          ) &&                                         /* And... */          ) &&                                         /* And... */
3062          ptr >= end_subject &&                     /* Reached end of subject */          (
3063          ptr > current_subject)                    /* Matched non-empty string */          partial_newline ||                           /* Either partial NL */
3064              (                                          /* or ... */
3065              ptr >= end_subject &&                /* End of subject and */
3066              ptr > md->start_used_ptr)            /* Inspected non-empty string */
3067              )
3068            )
3069        {        {
3070        if (offsetcount >= 2)        if (offsetcount >= 2)
3071          {          {
3072          offsets[0] = md->start_used_ptr - start_subject;          offsets[0] = (int)(md->start_used_ptr - start_subject);
3073          offsets[1] = end_subject - start_subject;          offsets[1] = (int)(end_subject - start_subject);
3074          }          }
3075        match_count = PCRE_ERROR_PARTIAL;        match_count = PCRE_ERROR_PARTIAL;
3076        }        }
# Line 2663  Returns:          > 0 => number of match Line 3124  Returns:          > 0 => number of match
3124                   < -1 => some kind of unexpected problem                   < -1 => some kind of unexpected problem
3125  */  */
3126    
3127    #ifdef COMPILE_PCRE8
3128  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION  PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3129  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,  pcre_dfa_exec(const pcre *argument_re, const pcre_extra *extra_data,
3130    const char *subject, int length, int start_offset, int options, int *offsets,    const char *subject, int length, int start_offset, int options, int *offsets,
3131    int offsetcount, int *workspace, int wscount)    int offsetcount, int *workspace, int wscount)
3132    #else
3133    PCRE_EXP_DEFN int PCRE_CALL_CONVENTION
3134    pcre16_dfa_exec(const pcre16 *argument_re, const pcre16_extra *extra_data,
3135      PCRE_SPTR16 subject, int length, int start_offset, int options, int *offsets,
3136      int offsetcount, int *workspace, int wscount)
3137    #endif
3138  {  {
3139  real_pcre *re = (real_pcre *)argument_re;  REAL_PCRE *re = (REAL_PCRE *)argument_re;
3140  dfa_match_data match_block;  dfa_match_data match_block;
3141  dfa_match_data *md = &match_block;  dfa_match_data *md = &match_block;
3142  BOOL utf8, anchored, startline, firstline;  BOOL utf, anchored, startline, firstline;
3143  const uschar *current_subject, *end_subject, *lcc;  const pcre_uchar *current_subject, *end_subject;
   
 pcre_study_data internal_study;  
3144  const pcre_study_data *study = NULL;  const pcre_study_data *study = NULL;
 real_pcre internal_re;  
3145    
3146  const uschar *req_byte_ptr;  const pcre_uchar *req_char_ptr;
3147  const uschar *start_bits = NULL;  const pcre_uint8 *start_bits = NULL;
3148  BOOL first_byte_caseless = FALSE;  BOOL has_first_char = FALSE;
3149  BOOL req_byte_caseless = FALSE;  BOOL has_req_char = FALSE;
3150  int first_byte = -1;  pcre_uchar first_char = 0;
3151  int req_byte = -1;  pcre_uchar first_char2 = 0;
3152  int req_byte2 = -1;  pcre_uchar req_char = 0;
3153    pcre_uchar req_char2 = 0;
3154  int newline;  int newline;
3155    
3156  /* Plausibility checks */  /* Plausibility checks */
# Line 2694  if (re == NULL || subject == NULL || wor Line 3160  if (re == NULL || subject == NULL || wor
3160     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;     (offsets == NULL && offsetcount > 0)) return PCRE_ERROR_NULL;
3161  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;  if (offsetcount < 0) return PCRE_ERROR_BADCOUNT;
3162  if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;  if (wscount < 20) return PCRE_ERROR_DFA_WSSIZE;
3163    if (start_offset < 0 || start_offset > length) return PCRE_ERROR_BADOFFSET;
3164    
3165  /* We need to find the pointer to any study data before we test for byte  /* We need to find the pointer to any study data before we test for byte
3166  flipping, so we scan the extra_data block first. This may set two fields in the  flipping, so we scan the extra_data block first. This may set two fields in the
# Line 2715  if (extra_data != NULL) Line 3182  if (extra_data != NULL)
3182      md->callout_data = extra_data->callout_data;      md->callout_data = extra_data->callout_data;
3183    if ((flags & PCRE_EXTRA_TABLES) != 0)    if ((flags & PCRE_EXTRA_TABLES) != 0)
3184      md->tables = extra_data->tables;      md->tables = extra_data->tables;
3185      ((pcre_extra *)extra_data)->flags &= ~PCRE_EXTRA_USED_JIT;  /* No JIT support here */
3186    }    }
3187    
3188  /* Check that the first field in the block is the magic number. If it is not,  /* Check that the first field in the block is the magic number. If it is not,
3189  test for a regex that was compiled on a host of opposite endianness. If this is  return with PCRE_ERROR_BADMAGIC. However, if the magic number is equal to
3190  the case, flipped values are put in internal_re and internal_study if there was  REVERSED_MAGIC_NUMBER we return with PCRE_ERROR_BADENDIANNESS, which
3191  study data too. */  means that the pattern is likely compiled with different endianness. */
3192    
3193  if (re->magic_number != MAGIC_NUMBER)  if (re->magic_number != MAGIC_NUMBER)
3194    {    return re->magic_number == REVERSED_MAGIC_NUMBER?
3195    re = _pcre_try_flipped(re, &internal_re, study, &internal_study);      PCRE_ERROR_BADENDIANNESS:PCRE_ERROR_BADMAGIC;
3196    if (re == NULL) return PCRE_ERROR_BADMAGIC;  if ((re->flags & PCRE_MODE) == 0) return PCRE_ERROR_BADMODE;
   if (study != NULL) study = &internal_study;  
   }  
3197    
3198  /* Set some local values */  /* Set some local values */
3199    
3200  current_subject = (const unsigned char *)subject + start_offset;  current_subject = (const pcre_uchar *)subject + start_offset;
3201  end_subject = (const unsigned char *)subject + length;  end_subject = (const pcre_uchar *)subject + length;
3202  req_byte_ptr = current_subject - 1;  req_char_ptr = current_subject - 1;
3203    
3204  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3205  utf8 = (re->options & PCRE_UTF8) != 0;  /* PCRE_UTF16 has the same value as PCRE_UTF8. */
3206    utf = (re->options & PCRE_UTF8) != 0;
3207  #else  #else
3208  utf8 = FALSE;  utf = FALSE;
3209  #endif  #endif
3210    
3211  anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||  anchored = (options & (PCRE_ANCHORED|PCRE_DFA_RESTART)) != 0 ||
# Line 2746  anchored = (options & (PCRE_ANCHORED|PCR Line 3213  anchored = (options & (PCRE_ANCHORED|PCR
3213    
3214  /* The remaining fixed data for passing around. */  /* The remaining fixed data for passing around. */
3215    
3216  md->start_code = (const uschar *)argument_re +  md->start_code = (const pcre_uchar *)argument_re +
3217      re->name_table_offset + re->name_count * re->name_entry_size;      re->name_table_offset + re->name_count * re->name_entry_size;
3218  md->start_subject = (const unsigned char *)subject;  md->start_subject = (const pcre_uchar *)subject;
3219  md->end_subject = end_subject;  md->end_subject = end_subject;
3220  md->start_offset = start_offset;  md->start_offset = start_offset;
3221  md->moptions = options;  md->moptions = options;
# Line 2809  else Line 3276  else
3276  /* Check a UTF-8 string if required. Unfortunately there's no way of passing  /* Check a UTF-8 string if required. Unfortunately there's no way of passing
3277  back the character offset. */  back the character offset. */
3278    
3279  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3280  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf && (options & PCRE_NO_UTF8_CHECK) == 0)
3281    {    {
3282    if (_pcre_valid_utf8((uschar *)subject, length) >= 0)    int erroroffset;
3283      return PCRE_ERROR_BADUTF8;    int errorcode = PRIV(valid_utf)((pcre_uchar *)subject, length, &erroroffset);
3284    if (start_offset > 0 && start_offset < length)    if (errorcode != 0)
3285      {      {
3286      int tb = ((uschar *)subject)[start_offset];      if (offsetcount >= 2)
     if (tb > 127)  
3287        {        {
3288        tb &= 0xc0;        offsets[0] = erroroffset;
3289        if (tb != 0 && tb != 0xc0) return PCRE_ERROR_BADUTF8_OFFSET;        offsets[1] = errorcode;
3290        }        }
3291        return (errorcode <= PCRE_UTF8_ERR5 && (options & PCRE_PARTIAL_HARD) != 0)?
3292          PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
3293      }      }
3294      if (start_offset > 0 && start_offset < length &&
3295            NOT_FIRSTCHAR(((PCRE_PUCHAR)subject)[start_offset]))
3296        return PCRE_ERROR_BADUTF8_OFFSET;
3297    }    }
3298  #endif  #endif
3299    
# Line 2830  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 3301  if (utf8 && (options & PCRE_NO_UTF8_CHEC
3301  is a feature that makes it possible to save compiled regex and re-use them  is a feature that makes it possible to save compiled regex and re-use them
3302  in other programs later. */  in other programs later. */
3303    
3304  if (md->tables == NULL) md->tables = _pcre_default_tables;  if (md->tables == NULL) md->tables = PRIV(default_tables);
3305    
3306  /* The lower casing table and the "must be at the start of a line" flag are  /* The "must be at the start of a line" flags are used in a loop when finding
3307  used in a loop when finding where to start. */  where to start. */
3308    
 lcc = md->tables + lcc_offset;  
3309  startline = (re->flags & PCRE_STARTLINE) != 0;  startline = (re->flags & PCRE_STARTLINE) != 0;
3310  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
3311    
# Line 2849  if (!anchored) Line 3319  if (!anchored)
3319    {    {
3320    if ((re->flags & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
3321      {      {
3322      first_byte = re->first_byte & 255;      has_first_char = TRUE;
3323      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      first_char = first_char2 = (pcre_uchar)(re->first_char);
3324        first_byte = lcc[first_byte];      if ((re->flags & PCRE_FCH_CASELESS) != 0)
3325          {
3326          first_char2 = TABLE_GET(first_char, md->tables + fcc_offset, first_char);
3327    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3328          if (utf && first_char > 127)
3329            first_char2 = UCD_OTHERCASE(first_char);
3330    #endif
3331          }
3332      }      }
3333    else    else
3334      {      {
# Line 2866  character" set. */ Line 3343  character" set. */
3343    
3344  if ((re->flags & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
3345    {    {
3346    req_byte = re->req_byte & 255;    has_req_char = TRUE;
3347    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_char = req_char2 = (pcre_uchar)(re->req_char);
3348    req_byte2 = (md->tables + fcc_offset)[req_byte];  /* case flipped */    if ((re->flags & PCRE_RCH_CASELESS) != 0)
3349        {
3350        req_char2 = TABLE_GET(req_char, md->tables + fcc_offset, req_char);
3351    #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
3352        if (utf && req_char > 127)
3353          req_char2 = UCD_OTHERCASE(req_char);
3354    #endif
3355        }
3356    }    }
3357    
3358  /* Call the main matching function, looping for a non-anchored regex after a  /* Call the main matching function, looping for a non-anchored regex after a
# Line 2881  for (;;) Line 3365  for (;;)
3365    
3366    if ((options & PCRE_DFA_RESTART) == 0)    if ((options & PCRE_DFA_RESTART) == 0)
3367      {      {
3368      const uschar *save_end_subject = end_subject;      const pcre_uchar *save_end_subject = end_subject;
3369    
3370      /* If firstline is TRUE, the start of the match is constrained to the first      /* If firstline is TRUE, the start of the match is constrained to the first
3371      line of a multiline string. Implement this by temporarily adjusting      line of a multiline string. Implement this by temporarily adjusting
# Line 2890  for (;;) Line 3374  for (;;)
3374    
3375      if (firstline)      if (firstline)
3376        {        {
3377        USPTR t = current_subject;        PCRE_PUCHAR t = current_subject;
3378  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3379        if (utf8)        if (utf)
3380          {          {
3381          while (t < md->end_subject && !IS_NEWLINE(t))          while (t < md->end_subject && !IS_NEWLINE(t))
3382            {            {
3383            t++;            t++;
3384            while (t < end_subject && (*t & 0xc0) == 0x80) t++;            ACROSSCHAR(t < end_subject, *t, t++);
3385            }            }
3386          }          }
3387        else        else
# Line 2908  for (;;) Line 3392  for (;;)
3392    
3393      /* There are some optimizations that avoid running the match if a known      /* There are some optimizations that avoid running the match if a known
3394      starting point is not found. However, there is an option that disables      starting point is not found. However, there is an option that disables
3395      these, for testing and for ensuring that all callouts do actually occur. */      these, for testing and for ensuring that all callouts do actually occur.
3396        The option can be set in the regex by (*NO_START_OPT) or passed in
3397        match-time options. */
3398    
3399      if ((options & PCRE_NO_START_OPTIMIZE) == 0)      if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0)
3400        {        {
3401        /* Advance to a known first byte. */        /* Advance to a known first char. */
3402    
3403        if (first_byte >= 0)        if (has_first_char)
3404          {          {
3405          if (first_byte_caseless)          if (first_char != first_char2)
3406            while (current_subject < end_subject &&            while (current_subject < end_subject &&
3407                   lcc[*current_subject] != first_byte)                *current_subject != first_char && *current_subject != first_char2)
3408              current_subject++;              current_subject++;
3409          else          else
3410            while (current_subject < end_subject &&            while (current_subject < end_subject &&
3411                   *current_subject != first_byte)                   *current_subject != first_char)
3412              current_subject++;              current_subject++;
3413          }          }
3414    
# Line 2932  for (;;) Line 3418  for (;;)
3418          {          {
3419          if (current_subject > md->start_subject + start_offset)          if (current_subject > md->start_subject + start_offset)
3420            {            {
3421  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
3422            if (utf8)            if (utf)
3423              {              {
3424              while (current_subject < end_subject &&              while (current_subject < end_subject &&
3425                     !WAS_NEWLINE(current_subject))                     !WAS_NEWLINE(current_subject))
3426                {                {
3427                current_subject++;                current_subject++;
3428                while(current_subject < end_subject &&                ACROSSCHAR(current_subject < end_subject, *current_subject,
3429                      (*current_subject & 0xc0) == 0x80)                  current_subject++);
                 current_subject++;  
3430                }                }
3431              }              }
3432            else            else
# Line 2968  for (;;) Line 3453  for (;;)
3453          while (current_subject < end_subject)          while (current_subject < end_subject)
3454            {            {
3455            register unsigned int c = *current_subject;            register unsigned int c = *current_subject;
3456            if ((start_bits[c/8] & (1 << (c&7))) == 0) current_subject++;  #ifndef COMPILE_PCRE8
3457              else break;            if (c > 255) c = 255;
3458    #endif
3459              if ((start_bits[c/8] & (1 << (c&7))) == 0)
3460                {
3461                current_subject++;
3462    #if defined SUPPORT_UTF && defined COMPILE_PCRE8
3463                /* In non 8-bit mode, the iteration will stop for
3464                characters > 255 at the beginning or not stop at all. */
3465                if (utf)
3466                  ACROSSCHAR(current_subject < end_subject, *current_subject,
3467                    current_subject++);
3468    #endif
3469                }
3470              else break;
3471            }            }
3472          }          }
3473        }        }
# Line 2982  for (;;) Line 3480  for (;;)
3480      disabling is explicitly requested (and of course, by the test above, this      disabling is explicitly requested (and of course, by the test above, this
3481      code is not obeyed when restarting after a partial match). */      code is not obeyed when restarting after a partial match). */
3482    
3483      if ((options & PCRE_NO_START_OPTIMIZE) == 0 &&      if (((options | re->options) & PCRE_NO_START_OPTIMIZE) == 0 &&
3484          (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)          (options & (PCRE_PARTIAL_HARD|PCRE_PARTIAL_SOFT)) == 0)
3485        {        {
3486        /* If the pattern was studied, a minimum subject length may be set. This        /* If the pattern was studied, a minimum subject length may be set. This
# Line 2991  for (;;) Line 3489  for (;;)
3489        bytes to avoid spending too much time in this optimization. */        bytes to avoid spending too much time in this optimization. */
3490    
3491        if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&        if (study != NULL && (study->flags & PCRE_STUDY_MINLEN) != 0 &&
3492            end_subject - current_subject < study->minlength)            (pcre_uint32)(end_subject - current_subject) < study->minlength)
3493          return PCRE_ERROR_NOMATCH;          return PCRE_ERROR_NOMATCH;
3494    
3495        /* If req_byte is set, we know that that character must appear in the        /* If req_char is set, we know that that character must appear in the
3496        subject for the match to succeed. If the first character is set, req_byte        subject for the match to succeed. If the first character is set, req_char
3497        must be later in the subject; otherwise the test starts at the match        must be later in the subject; otherwise the test starts at the match
3498        point. This optimization can save a huge amount of work in patterns with        point. This optimization can save a huge amount of work in patterns with
3499        nested unlimited repeats that aren't going to match. Writing separate        nested unlimited repeats that aren't going to match. Writing separate
# Line 3007  for (;;) Line 3505  for (;;)
3505        patterns. This showed up when somebody was matching /^C/ on a 32-megabyte        patterns. This showed up when somebody was matching /^C/ on a 32-megabyte
3506        string... so we don't do this when the string is sufficiently long. */        string... so we don't do this when the string is sufficiently long. */
3507    
3508        if (req_byte >= 0 && end_subject - current_subject < REQ_BYTE_MAX)        if (has_req_char && end_subject - current_subject < REQ_BYTE_MAX)
3509          {          {
3510          register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);          register PCRE_PUCHAR p = current_subject + (has_first_char? 1:0);
3511    
3512          /* We don't need to repeat the search if we haven't yet reached the          /* We don't need to repeat the search if we haven't yet reached the
3513          place we found it at last time. */          place we found it at last time. */
3514    
3515          if (p > req_byte_ptr)          if (p > req_char_ptr)
3516            {            {
3517            if (req_byte_caseless)            if (req_char != req_char2)
3518              {              {
3519              while (p < end_subject)              while (p < end_subject)
3520                {                {
3521                register int pp = *p++;                register int pp = *p++;
3522                if (pp == req_byte || pp == req_byte2) { p--; break; }                if (pp == req_char || pp == req_char2) { p--; break; }
3523                }                }
3524              }              }
3525            else            else
3526              {              {
3527              while (p < end_subject)              while (p < end_subject)
3528                {                {
3529                if (*p++ == req_byte) { p--; break; }                if (*p++ == req_char) { p--; break; }
3530                }                }
3531              }              }
3532    
# Line 3041  for (;;) Line 3539  for (;;)
3539            found it, so that we don't search again next time round the loop if            found it, so that we don't search again next time round the loop if
3540            the start hasn't passed this character yet. */            the start hasn't passed this character yet. */
3541    
3542            req_byte_ptr = p;            req_char_ptr = p;
3543            }            }
3544          }          }
3545        }        }
# Line 3050  for (;;) Line 3548  for (;;)
3548    /* OK, now we can do the business */    /* OK, now we can do the business */
3549    
3550    md->start_used_ptr = current_subject;    md->start_used_ptr = current_subject;
3551      md->recursive = NULL;
3552    
3553    rc = internal_dfa_exec(    rc = internal_dfa_exec(
3554      md,                                /* fixed match data */      md,                                /* fixed match data */
# Line 3060  for (;;) Line 3559  for (;;)
3559      offsetcount,                       /* size of same */      offsetcount,                       /* size of same */
3560      workspace,                         /* workspace vector */      workspace,                         /* workspace vector */
3561      wscount,                           /* size of same */      wscount,                           /* size of same */
3562      re->options & (PCRE_CASELESS|PCRE_MULTILINE|PCRE_DOTALL), /* ims flags */      0);                                /* function recurse level */
     0,                                 /* function recurse level */  
     0);                                /* regex recurse level */  
3563    
3564    /* Anything other than "no match" means we are done, always; otherwise, carry    /* Anything other than "no match" means we are done, always; otherwise, carry
3565    on only if not anchored. */    on only if not anchored. */
# Line 3074  for (;;) Line 3571  for (;;)
3571    
3572    if (firstline && IS_NEWLINE(current_subject)) break;    if (firstline && IS_NEWLINE(current_subject)) break;
3573    current_subject++;    current_subject++;
3574    if (utf8)  #ifdef SUPPORT_UTF
3575      if (utf)
3576      {      {
3577      while (current_subject < end_subject && (*current_subject & 0xc0) == 0x80)      ACROSSCHAR(current_subject < end_subject, *current_subject,
3578        current_subject++;        current_subject++);
3579      }      }
3580    #endif
3581    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
3582    
3583    /* If we have just passed a CR and we are now at a LF, and the pattern does    /* If we have just passed a CR and we are now at a LF, and the pattern does

Legend:
Removed from v.473  
changed lines
  Added in v.925

  ViewVC Help
Powered by ViewVC 1.1.5