/[pcre]/code/branches/pcre16/pcretest.c
ViewVC logotype

Diff of /code/branches/pcre16/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 805 by ph10, Wed Dec 14 16:49:20 2011 UTC revision 811 by zherczeg, Mon Dec 19 14:05:44 2011 UTC
# Line 4  Line 4 
4    
5  /* This program was hacked up as a tester for PCRE. I really should have  /* This program was hacked up as a tester for PCRE. I really should have
6  written it more tidily in the first place. Will I ever learn? It has grown and  written it more tidily in the first place. Will I ever learn? It has grown and
7  been extended and consequently is now rather, er, *very* untidy in places.  been extended and consequently is now rather, er, *very* untidy in places. The
8    addition of 16-bit support has made it even worse. :-(
9    
10  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
11  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 107  appropriately for an application, not fo Line 108  appropriately for an application, not fo
108  #include "pcre.h"  #include "pcre.h"
109  #include "pcre_internal.h"  #include "pcre_internal.h"
110    
111  /* The pcre_printint() function, which prints the internal form of a compiled  /* The pcre_printint() function, which prints the internal form of a compiled
112  regex, is held in a separate file so that (a) it can be compiled in either  regex, is held in a separate file so that (a) it can be compiled in either
113  8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c  8-bit or 16-bit mode, and (b) it can be #included directly in pcre_compile.c
114  when that is compiled in debug mode. */  when that is compiled in debug mode. */
115    
116  #ifdef SUPPORT_PCRE8  #ifdef SUPPORT_PCRE8
# Line 149  that differ in their output from isprint Line 150  that differ in their output from isprint
150  #define PRINTABLE(c) ((c) >= 32 && (c) < 127)  #define PRINTABLE(c) ((c) >= 32 && (c) < 127)
151  #endif  #endif
152    
153  #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))  #define PRINTOK(c) (locale_set? isprint(c) : PRINTABLE(c))
154    
155  /* It is possible to compile this test program without including support for  /* It is possible to compile this test program without including support for
156  testing the POSIX interface, though this is not available via the standard  testing the POSIX interface, though this is not available via the standard
# Line 159  Makefile. */ Line 160  Makefile. */
160  #include "pcreposix.h"  #include "pcreposix.h"
161  #endif  #endif
162    
163  /* It is also possible, for the benefit of the version currently imported into  /* It is also possible, originally for the benefit of a version that was
164  Exim, to build pcretest without support for UTF8 (define NOUTF8), without the  imported into Exim, to build pcretest without support for UTF8 (define NOUTF8),
165  interface to the DFA matcher (NODFA), and without the doublecheck of the old  without the interface to the DFA matcher (NODFA), and without the doublecheck
166  "info" function (define NOINFOCHECK). In fact, we automatically cut out the  of the old "info" function (define NOINFOCHECK). In fact, we automatically cut
167  UTF8 support if PCRE is built without it. */  out the UTF8 support if PCRE is built without it. */
168    
169  #ifndef SUPPORT_UTF8  #ifndef SUPPORT_UTF8
170  #ifndef NOUTF8  #ifndef NOUTF8
# Line 171  UTF8 support if PCRE is built without it Line 172  UTF8 support if PCRE is built without it
172  #endif  #endif
173  #endif  #endif
174    
175    /* To make the code a bit tidier for 8-bit and 16-bit support, we define macros
176    for all the pcre[16]_xxx functions (except pcre16_fullinfo, which is called
177    only from one place and is handled differently). I couldn't dream up any way of
178    using a single macro to do this in a generic way, because of the many different
179    argument requirements. We know that at least one of SUPPORT_PCRE8 and
180    SUPPORT_PCRE16 must be set. First define macros for each individual mode; then
181    use these in the definitions of generic macros. */
182    
183    #ifdef SUPPORT_PCRE8
184    #define PCHARS8(lv, p, len, f) \
185      lv = pchars((pcre_uint8 *)p, len, f)
186    
187    #define PCHARSV8(p, len, f) \
188      (void)pchars((pcre_uint8 *)p, len, f)
189    
190    #define PCRE_COMPILE8(re, pat, options, error, erroffset, tables) \
191      re = pcre_compile((char *)pat, options, error, erroffset, tables)
192    
193    #define PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
194        offsets, size_offsets) \
195      count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options, \
196        offsets, size_offsets)
197    
198    #define PCRE_STUDY8(extra, re, options, error) \
199      extra = pcre_study(re, options, error)
200    
201    #define PCRE_FREE_STUDY8(extra) \
202      pcre_free_study(extra)
203    
204    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables) \
205      pcre_pattern_to_host_byte_order(re, extra, tables)
206    
207    #endif /* SUPPORT_PCRE8 */
208    
209    
210    #ifdef SUPPORT_PCRE16
211    #define PCHARS16(lv, p, len, f) \
212      lv = pchars16((PCRE_SPTR16)p, len, f)
213    
214    #define PCHARSV16(p, len, f) \
215      (void)pchars16((PCRE_SPTR16)p, len, f)
216    
217    #define PCRE_COMPILE16(re, pat, options, error, erroffset, tables) \
218      re = pcre16_compile((PCRE_SPTR16)pat, options, error, erroffset, tables)
219    
220    #define PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
221        offsets, size_offsets) \
222      count = pcre16_exec(re, extra, (PCRE_SPTR16)bptr, len, start_offset, \
223        options, offsets, size_offsets)
224    
225    #define PCRE_FREE_STUDY16(extra) \
226      pcre16_free_study(extra)
227    
228    #define PCRE_STUDY16(extra, re, options, error) \
229      extra = pcre16_study(re, options, error)
230    
231    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables) \
232      pcre16_pattern_to_host_byte_order(re, extra, tables)
233    
234    #endif /* SUPPORT_PCRE16 */
235    
236    
237    /* ----- Both modes are supported; a runtime test is needed ----- */
238    
239    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
240    
241    #define PCHARS(lv, p, len, f) \
242      if (use_pcre16) \
243        PCHARS16(lv, p, len, f); \
244      else \
245        PCHARS8(lv, p, len, f)
246    
247    #define PCHARSV(p, len, f) \
248      if (use_pcre16) \
249        PCHARSV16(p, len, f); \
250      else \
251        PCHARSV8(p, len, f)
252    
253    #define PCRE_COMPILE(re, pat, options, error, erroffset, tables) \
254      if (use_pcre16) \
255        PCRE_COMPILE16(re, pat, options, error, erroffset, tables); \
256      else \
257        PCRE_COMPILE8(re, pat, options, error, erroffset, tables)
258    
259    #define PCRE_EXEC(count, re, extra, bptr, len, start_offset, options, \
260        offsets, size_offsets) \
261      if (use_pcre16) \
262        PCRE_EXEC16(count, re, extra, bptr, len, start_offset, options, \
263          offsets, size_offsets); \
264      else \
265        PCRE_EXEC8(count, re, extra, bptr, len, start_offset, options, \
266          offsets, size_offsets)
267    
268    #define PCRE_FREE_STUDY(extra) \
269      if (use_pcre16) \
270        PCRE_FREE_STUDY16(extra); \
271      else \
272        PCRE_FREE_STUDY8(extra)
273    
274    #define PCRE_STUDY(extra, re, options, error) \
275      if (use_pcre16) \
276        PCRE_STUDY16(extra, re, options, error); \
277      else \
278        PCRE_STUDY8(extra, re, options, error)
279    
280    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, tables) \
281      if (use_pcre16) \
282        PCRE_PATTERN_TO_HOST_BYTE_ORDER16(re, extra, tables); \
283      else \
284        PCRE_PATTERN_TO_HOST_BYTE_ORDER8(re, extra, tables)
285    
286    /* ----- Only 8-bit mode is supported ----- */
287    
288    #elif defined SUPPORT_PCRE8
289    #define PCHARS           PCHARS8
290    #define PCHARSV          PCHARSV8
291    #define PCRE_COMPILE     PCRE_COMPILE8
292    #define PCRE_EXEC        PCRE_EXEC8
293    #define PCRE_FREE_STUDY  PCRE_FREE_STUDY8
294    #define PCRE_STUDY       PCRE_STUDY8
295    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER8
296    
297    /* ----- Only 16-bit mode is supported ----- */
298    
299    #else
300    #define PCHARS           PCHARS16
301    #define PCHARSV          PCHARSV16
302    #define PCRE_COMPILE     PCRE_COMPILE16
303    #define PCRE_EXEC        PCRE_EXEC16
304    #define PCRE_FREE_STUDY  PCRE_FREE_STUDY16
305    #define PCRE_STUDY       PCRE_STUDY16
306    #define PCRE_PATTERN_TO_HOST_BYTE_ORDER PCRE_PATTERN_TO_HOST_BYTE_ORDER16
307    #endif
308    
309    /* ----- End of mode-specific function call macros ----- */
310    
311    
312  /* Other parameters */  /* Other parameters */
313    
# Line 198  static int debug_lengths; Line 335  static int debug_lengths;
335  static int first_callout;  static int first_callout;
336  static int locale_set = 0;  static int locale_set = 0;
337  static int show_malloc;  static int show_malloc;
338  static int use_utf8;  static int use_utf;
339  static size_t gotten_store;  static size_t gotten_store;
340  static size_t first_gotten_store = 0;  static size_t first_gotten_store = 0;
341  static const unsigned char *last_callout_mark = NULL;  static const unsigned char *last_callout_mark = NULL;
342    
 static int (*fullinfo)(const pcre *, const pcre_extra *, int, void *);  
   
343  /* The buffers grow automatically if very long input lines are encountered. */  /* The buffers grow automatically if very long input lines are encountered. */
344    
345  static int buffer_size = 50000;  static int buffer_size = 50000;
# Line 217  static int buffer16_size = 0; Line 352  static int buffer16_size = 0;
352  static pcre_uint16 *buffer16 = NULL;  static pcre_uint16 *buffer16 = NULL;
353  #endif  #endif
354    
355    /* If we have 8-bit support, default use_pcre16 to false; if there is also
356    16-bit support, it can be changed by an option. If there is no 8-bit support,
357    there must be 16-bit support, so default it to 1. */
358    
359    #ifdef SUPPORT_PCRE8
360    static int use_pcre16 = 0;
361    #else
362    static int use_pcre16 = 1;
363    #endif
364    
365  /* Textual explanations for runtime error codes */  /* Textual explanations for runtime error codes */
366    
367  static const char *errtexts[] = {  static const char *errtexts[] = {
# Line 248  static const char *errtexts[] = { Line 393  static const char *errtexts[] = {
393    NULL,  /* SHORTUTF8 is handled specially */    NULL,  /* SHORTUTF8 is handled specially */
394    "nested recursion at the same subject position",    "nested recursion at the same subject position",
395    "JIT stack limit reached",    "JIT stack limit reached",
396    "pattern compiled in wrong mode (8-bit/16-bit error)"    "pattern compiled in wrong mode (8-bit/16-bit error)"
397  };  };
398    
399    
# Line 264  the L (locale) option also adjusts the t Line 409  the L (locale) option also adjusts the t
409  /* This is the set of tables distributed as default with PCRE. It recognizes  /* This is the set of tables distributed as default with PCRE. It recognizes
410  only ASCII characters. */  only ASCII characters. */
411    
412  static const unsigned char tables0[] = {  static const pcre_uint8 tables0[] = {
413    
414  /* This table is a lower casing table. */  /* This table is a lower casing table. */
415    
# Line 437  graph, print, punct, and cntrl. Other cl Line 582  graph, print, punct, and cntrl. Other cl
582  be at least an approximation of ISO 8859. In particular, there are characters  be at least an approximation of ISO 8859. In particular, there are characters
583  greater than 128 that are marked as spaces, letters, etc. */  greater than 128 that are marked as spaces, letters, etc. */
584    
585  static const unsigned char tables1[] = {  static const pcre_uint8 tables1[] = {
586  0,1,2,3,4,5,6,7,  0,1,2,3,4,5,6,7,
587  8,9,10,11,12,13,14,15,  8,9,10,11,12,13,14,15,
588  16,17,18,19,20,21,22,23,  16,17,18,19,20,21,22,23,
# Line 610  return (pcre_jit_stack *)arg; Line 755  return (pcre_jit_stack *)arg;
755  }  }
756    
757    
758    /*************************************************
759    *            Convert UTF-8 string to value       *
760    *************************************************/
761    
762    /* This function takes one or more bytes that represents a UTF-8 character,
763    and returns the value of the character.
764    
765    Argument:
766      utf8bytes   a pointer to the byte vector
767      vptr        a pointer to an int to receive the value
768    
769    Returns:      >  0 => the number of bytes consumed
770                  -6 to 0 => malformed UTF-8 character at offset = (-return)
771    */
772    
773    #if !defined NOUTF8
774    
775    static int
776    utf82ord(pcre_uint8 *utf8bytes, int *vptr)
777    {
778    int c = *utf8bytes++;
779    int d = c;
780    int i, j, s;
781    
782    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
783      {
784      if ((d & 0x80) == 0) break;
785      d <<= 1;
786      }
787    
788    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
789    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
790    
791    /* i now has a value in the range 1-5 */
792    
793    s = 6*i;
794    d = (c & utf8_table3[i]) << s;
795    
796    for (j = 0; j < i; j++)
797      {
798      c = *utf8bytes++;
799      if ((c & 0xc0) != 0x80) return -(j+1);
800      s -= 6;
801      d |= (c & 0x3f) << s;
802      }
803    
804    /* Check that encoding was the correct unique one */
805    
806    for (j = 0; j < utf8_table1_size; j++)
807      if (d <= utf8_table1[j]) break;
808    if (j != i) return -(i+1);
809    
810    /* Valid value */
811    
812    *vptr = d;
813    return i+1;
814    }
815    
816    #endif
817    
818    
819    
820    /*************************************************
821    *       Convert character value to UTF-8         *
822    *************************************************/
823    
824    /* This function takes an integer value in the range 0 - 0x7fffffff
825    and encodes it as a UTF-8 character in 0 to 6 bytes.
826    
827    Arguments:
828      cvalue     the character value
829      utf8bytes  pointer to buffer for result - at least 6 bytes long
830    
831    Returns:     number of characters placed in the buffer
832    */
833    
834    #if !defined NOUTF8
835    
836    static int
837    ord2utf8(int cvalue, pcre_uint8 *utf8bytes)
838    {
839    register int i, j;
840    for (i = 0; i < utf8_table1_size; i++)
841      if (cvalue <= utf8_table1[i]) break;
842    utf8bytes += i;
843    for (j = i; j > 0; j--)
844     {
845     *utf8bytes-- = 0x80 | (cvalue & 0x3f);
846     cvalue >>= 6;
847     }
848    *utf8bytes = utf8_table2[i] | cvalue;
849    return i + 1;
850    }
851    
852    #endif
853    
854    
855    
856  #ifdef SUPPORT_PCRE16  #ifdef SUPPORT_PCRE16
857  /*************************************************  /*************************************************
858  *         Convert a string to 16-bit             *  *         Convert a string to 16-bit             *
859  *************************************************/  *************************************************/
860    
861  /* The result is always left in buffer16. */  /* In non-UTF mode, the space needed for a 16-bit string is exactly double the
862    8-bit size. For a UTF-8 string, the size needed for UTF-16 is no more than
863    double, because up to 0xffff uses no more than 3 bytes in UTF-8 but possibly 4
864    in UTF-16. Higher values use 4 bytes in UTF-8 and up to 4 bytes in UTF-16. The
865    result is always left in buffer16.
866    
867    Arguments:
868      p          points to a byte string
869      utf        true if UTF-8 (to be converted to UTF-16)
870      len        number of bytes in the string (excluding trailing zero)
871    
872    Returns:     number of 16-bit data items used (excluding trailing zero)
873                 OR -1 if a UTF-8 string is malformed
874    */
875    
876  static int  static int
877  to16(unsigned char *p, int utf)  to16(pcre_uint8 *p, int utf, int len)
878  {  {
879  pcre_uint16 *pp;  pcre_uint16 *pp;
 int len = (int)strlen((char *)p) + 1;  
880    
881  if (buffer16_size < 2*len)  if (buffer16_size < 2*len + 2)
882    {    {
883    if (buffer16 != NULL) free(buffer16);    if (buffer16 != NULL) free(buffer16);
884    buffer16_size = 2*len;    buffer16_size = 2*len + 2;
885    buffer16 = (pcre_uint16 *)malloc(buffer16_size);    buffer16 = (pcre_uint16 *)malloc(buffer16_size);
886    if (buffer16 == NULL)    if (buffer16 == NULL)
887      {      {
888      fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);      fprintf(stderr, "pcretest: malloc(%d) failed for buffer16\n", buffer16_size);
889      exit(1);      exit(1);
890      }      }
891    }    }
892    
893  pp = buffer16;  pp = buffer16;
894    
895  if (!utf)  if (!utf)
896    {    {
897    while (*p != 0) *pp++ = *p++;    while (len-- > 0) *pp++ = *p++;
   *pp++ = 0;  
898    }    }
899    
900  else  else
901    {    {
902  fprintf(stderr, "pcretest: no support yet for UTF-16\n");    int c;
903  exit(1);    while (len > 0)
904    }      {
905        int chlen = utf82ord(p, &c);
906        if (chlen <= 0) return -1;
907        p += chlen;
908        len -= chlen;
909        if (c < 0x10000) *pp++ = c; else
910          {
911          c -= 0x10000;
912          *pp++ = 0xD800 | (c >> 10);
913          *pp++ = 0xDC00 | (c & 0x3ff);
914          }
915        }
916      }
917    
918    *pp = 0;
919  return pp - buffer16;  return pp - buffer16;
920  }  }
921  #endif  #endif
922    
923    
# Line 727  for (;;) Line 994  for (;;)
994    else    else
995      {      {
996      int new_buffer_size = 2*buffer_size;      int new_buffer_size = 2*buffer_size;
997      pcre_uint8 *new_buffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_buffer = (pcre_uint8 *)malloc(new_buffer_size);
998      pcre_uint8 *new_dbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_dbuffer = (pcre_uint8 *)malloc(new_buffer_size);
999      pcre_uint8 *new_pbuffer = (unsigned char *)malloc(new_buffer_size);      pcre_uint8 *new_pbuffer = (pcre_uint8 *)malloc(new_buffer_size);
1000    
1001      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)      if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
1002        {        {
# Line 760  return NULL;  /* Control never gets here Line 1027  return NULL;  /* Control never gets here
1027    
1028    
1029    
   
   
   
   
1030  /*************************************************  /*************************************************
1031  *          Read number from string               *  *          Read number from string               *
1032  *************************************************/  *************************************************/
# Line 780  Returns:        the unsigned long Line 1043  Returns:        the unsigned long
1043  */  */
1044    
1045  static int  static int
1046  get_value(unsigned char *str, unsigned char **endptr)  get_value(pcre_uint8 *str, pcre_uint8 **endptr)
1047  {  {
1048  int result = 0;  int result = 0;
1049  while(*str != 0 && isspace(*str)) str++;  while(*str != 0 && isspace(*str)) str++;
# Line 791  return(result); Line 1054  return(result);
1054    
1055    
1056    
   
1057  /*************************************************  /*************************************************
1058  *            Convert UTF-8 string to value       *  *             Print one character                *
1059  *************************************************/  *************************************************/
1060    
1061  /* This function takes one or more bytes that represents a UTF-8 character,  /* Print a single character either literally, or as a hex escape. */
 and returns the value of the character.  
1062    
1063  Argument:  static int pchar(int c, FILE *f)
   utf8bytes   a pointer to the byte vector  
   vptr        a pointer to an int to receive the value  
   
 Returns:      >  0 => the number of bytes consumed  
               -6 to 0 => malformed UTF-8 character at offset = (-return)  
 */  
   
 #if !defined NOUTF8  
   
 static int  
 utf82ord(unsigned char *utf8bytes, int *vptr)  
1064  {  {
1065  int c = *utf8bytes++;  if (PRINTOK(c))
 int d = c;  
 int i, j, s;  
   
 for (i = -1; i < 6; i++)               /* i is number of additional bytes */  
1066    {    {
1067    if ((d & 0x80) == 0) break;    if (f != NULL) fprintf(f, "%c", c);
1068    d <<= 1;    return 1;
1069    }    }
1070    
1071  if (i == -1) { *vptr = c; return 1; }  /* ascii character */  if (c < 0x100)
 if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */  
   
 /* i now has a value in the range 1-5 */  
   
 s = 6*i;  
 d = (c & utf8_table3[i]) << s;  
   
 for (j = 0; j < i; j++)  
1072    {    {
1073    c = *utf8bytes++;    if (use_utf)
1074    if ((c & 0xc0) != 0x80) return -(j+1);      {
1075    s -= 6;      if (f != NULL) fprintf(f, "\\x{%02x}", c);
1076    d |= (c & 0x3f) << s;      return 6;
1077        }
1078      else
1079        {
1080        if (f != NULL) fprintf(f, "\\x%02x", c);
1081        return 4;
1082        }
1083    }    }
1084    
1085  /* Check that encoding was the correct unique one */  if (f != NULL) fprintf(f, "\\x{%02x}", c);
1086    return (c <= 0x000000ff)? 6 :
1087  for (j = 0; j < utf8_table1_size; j++)         (c <= 0x00000fff)? 7 :
1088    if (d <= utf8_table1[j]) break;         (c <= 0x0000ffff)? 8 :
1089  if (j != i) return -(i+1);         (c <= 0x000fffff)? 9 : 10;
   
 /* Valid value */  
   
 *vptr = d;  
 return i+1;  
1090  }  }
1091    
 #endif  
   
1092    
1093    
1094    #ifdef SUPPORT_PCRE8
1095  /*************************************************  /*************************************************
1096  *       Convert character value to UTF-8         *  *         Print 8-bit character string           *
1097  *************************************************/  *************************************************/
1098    
1099  /* This function takes an integer value in the range 0 - 0x7fffffff  /* Must handle UTF-8 strings in utf8 mode. Yields number of characters printed.
1100  and encodes it as a UTF-8 character in 0 to 6 bytes.  If handed a NULL file, just counts chars without printing. */
   
 Arguments:  
   cvalue     the character value  
   utf8bytes  pointer to buffer for result - at least 6 bytes long  
1101    
1102  Returns:     number of characters placed in the buffer  static int pchars(pcre_uint8 *p, int length, FILE *f)
1103  */  {
1104    int c = 0;
1105    int yield = 0;
1106    
1107    while (length-- > 0)
1108      {
1109  #if !defined NOUTF8  #if !defined NOUTF8
1110      if (use_utf)
1111        {
1112        int rc = utf82ord(p, &c);
1113        if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */
1114          {
1115          length -= rc - 1;
1116          p += rc;
1117          yield += pchar(c, f);
1118          continue;
1119          }
1120        }
1121    #endif
1122      c = *p++;
1123      yield += pchar(c, f);
1124      }
1125    
1126  static int  return yield;
 ord2utf8(int cvalue, pcre_uint8 *utf8bytes)  
 {  
 register int i, j;  
 for (i = 0; i < utf8_table1_size; i++)  
   if (cvalue <= utf8_table1[i]) break;  
 utf8bytes += i;  
 for (j = i; j > 0; j--)  
  {  
  *utf8bytes-- = 0x80 | (cvalue & 0x3f);  
  cvalue >>= 6;  
  }  
 *utf8bytes = utf8_table2[i] | cvalue;  
 return i + 1;  
1127  }  }
   
1128  #endif  #endif
1129    
1130    
1131    
1132    #ifdef SUPPORT_PCRE16
1133  /*************************************************  /*************************************************
1134  *             Print character string             *  *           Print 16-bit character string        *
1135  *************************************************/  *************************************************/
1136    
1137  /* Character string printing function. Must handle UTF-8 strings in utf8  /* Must handle UTF-16 strings in utf mode. Yields number of characters printed.
1138  mode. Yields number of characters printed. If handed a NULL file, just counts  If handed a NULL file, just counts chars without printing. */
 chars without printing. */  
1139    
1140  static int pchars(unsigned char *p, int length, FILE *f)  static int pchars16(PCRE_SPTR16 p, int length, FILE *f)
1141  {  {
 int c = 0;  
1142  int yield = 0;  int yield = 0;
1143    
1144  while (length-- > 0)  while (length-- > 0)
1145    {    {
1146      int c = *p++ & 0xffff;
1147  #if !defined NOUTF8  #if !defined NOUTF8
1148    if (use_utf8)    if (use_utf && c >= 0xD800 && c < 0xDC00 && length > 0)
1149      {      {
1150      int rc = utf82ord(p, &c);      int d = *p & 0xffff;
1151        if (d >= 0xDC00 && d < 0xDFFF)
     if (rc > 0 && rc <= length + 1)   /* Mustn't run over the end */  
1152        {        {
1153        length -= rc - 1;        c = ((c & 0x3ff) << 10) + (d & 0x3ff) + 0x10000;
1154        p += rc;        length--;
1155        if (PRINTHEX(c))        p++;
         {  
         if (f != NULL) fprintf(f, "%c", c);  
         yield++;  
         }  
       else  
         {  
         int n = 4;  
         if (f != NULL) fprintf(f, "\\x{%02x}", c);  
         yield += (n <= 0x000000ff)? 2 :  
                  (n <= 0x00000fff)? 3 :  
                  (n <= 0x0000ffff)? 4 :  
                  (n <= 0x000fffff)? 5 : 6;  
         }  
       continue;  
1156        }        }
1157      }      }
1158  #endif  #endif
1159      yield += pchar(c, f);
    /* Not UTF-8, or malformed UTF-8  */  
   
   c = *p++;  
   if (PRINTHEX(c))  
     {  
     if (f != NULL) fprintf(f, "%c", c);  
     yield++;  
     }  
   else  
     {  
     if (f != NULL) fprintf(f, "\\x%02x", c);  
     yield += 4;  
     }  
1160    }    }
1161    
1162  return yield;  return yield;
1163  }  }
1164    #endif
1165    
1166    
1167    
# Line 978  if (callout_extra) Line 1190  if (callout_extra)
1190      else      else
1191        {        {
1192        fprintf(f, "%2d: ", i/2);        fprintf(f, "%2d: ", i/2);
1193        (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],        PCHARSV(cb->subject + cb->offset_vector[i],
1194          cb->offset_vector[i+1] - cb->offset_vector[i], f);          cb->offset_vector[i+1] - cb->offset_vector[i], f);
1195        fprintf(f, "\n");        fprintf(f, "\n");
1196        }        }
# Line 991  printed lengths of the substrings. */ Line 1203  printed lengths of the substrings. */
1203    
1204  if (f != NULL) fprintf(f, "--->");  if (f != NULL) fprintf(f, "--->");
1205    
1206  pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);  PCHARS(pre_start, cb->subject, cb->start_match, f);
1207  post_start = pchars((unsigned char *)(cb->subject + cb->start_match),  PCHARS(post_start, cb->subject + cb->start_match,
1208    cb->current_position - cb->start_match, f);    cb->current_position - cb->start_match, f);
1209    
1210  subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);  PCHARS(subject_length, cb->subject, cb->subject_length, NULL);
1211    
1212  (void)pchars((unsigned char *)(cb->subject + cb->current_position),  PCHARSV(cb->subject + cb->current_position,
1213    cb->subject_length - cb->current_position, f);    cb->subject_length - cb->current_position, f);
1214    
1215  if (f != NULL) fprintf(f, "\n");  if (f != NULL) fprintf(f, "\n");
# Line 1103  free(block); Line 1315  free(block);
1315  *          Call pcre_fullinfo()                  *  *          Call pcre_fullinfo()                  *
1316  *************************************************/  *************************************************/
1317    
1318  /* Get one piece of information from the pcre_fullinfo() function */  /* Get one piece of information from the pcre_fullinfo() function. When only
1319    one of 8-bit or 16-bit is supported, use_pcre16 should always have the correct
1320    value, but the code is defensive. */
1321    
1322  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)  static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
1323  {  {
1324  int rc;  int rc;
1325  if ((rc = (fullinfo)(re, study, option, ptr)) < 0)  
1326    fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);  if (use_pcre16)
1327    #ifdef SUPPORT_PCRE16
1328      rc = pcre16_fullinfo(re, study, option, ptr);
1329    #else
1330      rc = PCRE_ERROR_BADMODE;
1331    #endif
1332    else
1333    #ifdef SUPPORT_PCRE8
1334      rc = pcre_fullinfo(re, study, option, ptr);
1335    #else
1336      rc = PCRE_ERROR_BADMODE;
1337    #endif
1338    
1339    if (rc < 0) fprintf(outfile, "Error %d from pcre%s_fullinfo(%d)\n", rc,
1340      use_pcre16? "16" : "", option);
1341  }  }
1342    
1343    
# Line 1151  for (;;) Line 1379  for (;;)
1379    {    {
1380    *limit = mid;    *limit = mid;
1381    
1382    count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,    PCRE_EXEC(count, re, extra, bptr, len, start_offset, options,
1383      use_offsets, use_size_offsets);      use_offsets, use_size_offsets);
1384    
1385    if (count == errnumber)    if (count == errnumber)
# Line 1313  int posix = 0; Line 1541  int posix = 0;
1541  int debug = 0;  int debug = 0;
1542  int done = 0;  int done = 0;
1543  int all_use_dfa = 0;  int all_use_dfa = 0;
 int use_pcre16 = 0;  
1544  int yield = 0;  int yield = 0;
1545  int stack_size;  int stack_size;
1546    
# Line 1329  pcre_uchar *copynamesptr; Line 1556  pcre_uchar *copynamesptr;
1556  pcre_uchar *getnamesptr;  pcre_uchar *getnamesptr;
1557    
1558  /* Get buffers from malloc() so that valgrind will check their misuse when  /* Get buffers from malloc() so that valgrind will check their misuse when
1559  debugging. They grow automatically when very long lines are read. The 16-bit  debugging. They grow automatically when very long lines are read. The 16-bit
1560  buffer (buffer16) is obtained only if needed. */  buffer (buffer16) is obtained only if needed. */
1561    
1562  buffer = (pcre_uint8 *)malloc(buffer_size);  buffer = (pcre_uint8 *)malloc(buffer_size);
# Line 1353  _setmode( _fileno( stdout ), _O_BINARY ) Line 1580  _setmode( _fileno( stdout ), _O_BINARY )
1580    
1581  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
1582    {    {
1583    unsigned char *endptr;    pcre_uint8 *endptr;
1584    
1585    if (strcmp(argv[op], "-16") == 0) use_pcre16 = 1;    if (strcmp(argv[op], "-m") == 0) showstore = 1;
   else if (strcmp(argv[op], "-m") == 0) showstore = 1;  
1586    else if (strcmp(argv[op], "-s") == 0) force_study = 0;    else if (strcmp(argv[op], "-s") == 0) force_study = 0;
1587    else if (strcmp(argv[op], "-s+") == 0)    else if (strcmp(argv[op], "-s+") == 0)
1588      {      {
1589      force_study = 1;      force_study = 1;
1590      force_study_options = PCRE_STUDY_JIT_COMPILE;      force_study_options = PCRE_STUDY_JIT_COMPILE;
1591      }      }
1592    #ifdef SUPPORT_PCRE16
1593      else if (strcmp(argv[op], "-16") == 0) use_pcre16 = 1;
1594    #endif
1595    
1596    else if (strcmp(argv[op], "-q") == 0) quiet = 1;    else if (strcmp(argv[op], "-q") == 0) quiet = 1;
1597    else if (strcmp(argv[op], "-b") == 0) debug = 1;    else if (strcmp(argv[op], "-b") == 0) debug = 1;
1598    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
# Line 1372  while (argc > 1 && argv[op][0] == '-') Line 1602  while (argc > 1 && argv[op][0] == '-')
1602    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;    else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
1603  #endif  #endif
1604    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
1605        ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),        ((size_offsets = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1606          *endptr == 0))          *endptr == 0))
1607      {      {
1608      op++;      op++;
# Line 1382  while (argc > 1 && argv[op][0] == '-') Line 1612  while (argc > 1 && argv[op][0] == '-')
1612      {      {
1613      int both = argv[op][2] == 0;      int both = argv[op][2] == 0;
1614      int temp;      int temp;
1615      if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),      if (argc > 2 && (temp = get_value((pcre_uint8 *)argv[op+1], &endptr),
1616                       *endptr == 0))                       *endptr == 0))
1617        {        {
1618        timeitm = temp;        timeitm = temp;
# Line 1393  while (argc > 1 && argv[op][0] == '-') Line 1623  while (argc > 1 && argv[op][0] == '-')
1623      if (both) timeit = timeitm;      if (both) timeit = timeitm;
1624      }      }
1625    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&    else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
1626        ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),        ((stack_size = get_value((pcre_uint8 *)argv[op+1], &endptr)),
1627          *endptr == 0))          *endptr == 0))
1628      {      {
1629  #if defined(_WIN32) || defined(WIN32) || defined(__minix)  #if defined(_WIN32) || defined(WIN32) || defined(__minix)
# Line 1423  while (argc > 1 && argv[op][0] == '-') Line 1653  while (argc > 1 && argv[op][0] == '-')
1653      unsigned long int lrc;      unsigned long int lrc;
1654      printf("PCRE version %s\n", pcre_version());      printf("PCRE version %s\n", pcre_version());
1655      printf("Compiled with\n");      printf("Compiled with\n");
1656    
1657  /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. */  /* At least one of SUPPORT_PCRE8 and SUPPORT_PCRE16 will be set. If both
1658    are set, either both UTFs are supported or both are not supported. */
1659    
1660  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16  #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
1661      printf("  8-bit and 16-bit support\n");      printf("  8-bit and 16-bit support\n");
1662      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1663      printf("  %sUTF-8 support\n", rc? "" : "No ");      if (rc)
1664      (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);        printf("  UTF-8 and UTF-16 support\n");
1665      printf("  %sUTF-16 support\n", rc? "" : "No ");      else
1666          printf("  No UTF-8 or UTF-16 support\n");
1667  #elif defined SUPPORT_PCRE8  #elif defined SUPPORT_PCRE8
1668      printf("  8-bit support only\n");      printf("  8-bit support only\n");
1669      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);      (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
1670      printf("  %sUTF-8 support\n", rc? "" : "No ");      printf("  %sUTF-8 support\n", rc? "" : "No ");
1671  #else  #else
1672      printf("  16-bit support only\n");      printf("  16-bit support only\n");
1673      (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);      (void)pcre16_config(PCRE_CONFIG_UTF16, &rc);
1674      printf("  %sUTF-16 support\n", rc? "" : "No ");      printf("  %sUTF-16 support\n", rc? "" : "No ");
1675  #endif  #endif
1676    
1677      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);      (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
1678      printf("  %sUnicode properties support\n", rc? "" : "No ");      printf("  %sUnicode properties support\n", rc? "" : "No ");
1679      (void)pcre_config(PCRE_CONFIG_JIT, &rc);      (void)pcre_config(PCRE_CONFIG_JIT, &rc);
# Line 1488  while (argc > 1 && argv[op][0] == '-') Line 1720  while (argc > 1 && argv[op][0] == '-')
1720    argc--;    argc--;
1721    }    }
1722    
 /* Select which fullinfo function to use. */  
   
 fullinfo = use_pcre16? pcre16_fullinfo : pcre_fullinfo;  
   
1723  /* Get the store for the offsets vector, and remember what it was */  /* Get the store for the offsets vector, and remember what it was */
1724    
1725  size_offsets_max = size_offsets;  size_offsets_max = size_offsets;
# Line 1561  while (!done) Line 1789  while (!done)
1789  #endif  #endif
1790    
1791    const char *error;    const char *error;
1792    unsigned char *markptr;    pcre_uint8 *markptr;
1793    unsigned char *p, *pp, *ppp;    pcre_uint8 *p, *pp, *ppp;
1794    unsigned char *to_file = NULL;    pcre_uint8 *to_file = NULL;
1795    const unsigned char *tables = NULL;    const pcre_uint8 *tables = NULL;
1796    unsigned long int true_size, true_study_size = 0;    unsigned long int true_size, true_study_size = 0;
1797    size_t size, regex_gotten_store;    size_t size, regex_gotten_store;
1798    int do_allcaps = 0;    int do_allcaps = 0;
# Line 1580  while (!done) Line 1808  while (!done)
1808    int do_flip = 0;    int do_flip = 0;
1809    int erroroffset, len, delimiter, poffset;    int erroroffset, len, delimiter, poffset;
1810    
1811    use_utf8 = 0;    use_utf = 0;
1812    debug_lengths = 1;    debug_lengths = 1;
1813    
1814    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;    if (extend_inputline(infile, buffer, "  re> ") == NULL) break;
# Line 1641  while (!done) Line 1869  while (!done)
1869      fprintf(outfile, "Compiled pattern%s loaded from %s\n",      fprintf(outfile, "Compiled pattern%s loaded from %s\n",
1870        do_flip? " (byte-inverted)" : "", p);        do_flip? " (byte-inverted)" : "", p);
1871    
     /* Need to know if UTF-8 for printing data strings */  
   
     new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);  
     use_utf8 = (get_options & PCRE_UTF8) != 0;  
   
1872      /* Now see if there is any following study data. */      /* Now see if there is any following study data. */
1873    
1874      if (true_study_size != 0)      if (true_study_size != 0)
# Line 1662  while (!done) Line 1885  while (!done)
1885          {          {
1886          FAIL_READ:          FAIL_READ:
1887          fprintf(outfile, "Failed to read data from %s\n", p);          fprintf(outfile, "Failed to read data from %s\n", p);
1888          if (extra != NULL) pcre_free_study(extra);          if (extra != NULL)
1889              {
1890              PCRE_FREE_STUDY(extra);
1891              }
1892          if (re != NULL) new_free(re);          if (re != NULL) new_free(re);
1893          fclose(f);          fclose(f);
1894          continue;          continue;
# Line 1672  while (!done) Line 1898  while (!done)
1898        }        }
1899      else fprintf(outfile, "No study data\n");      else fprintf(outfile, "No study data\n");
1900    
1901        /* Flip the necessary bytes. */
1902        if (do_flip != 0)
1903          {
1904          PCRE_PATTERN_TO_HOST_BYTE_ORDER(re, extra, NULL);
1905          }
1906    
1907        /* Need to know if UTF-8 for printing data strings */
1908    
1909        new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1910        use_utf = (get_options & PCRE_UTF8) != 0;
1911    
1912      fclose(f);      fclose(f);
1913      goto SHOW_INFO;      goto SHOW_INFO;
1914      }      }
1915    
1916    /* In-line pattern (the usual case). Get the delimiter and seek the end of    /* In-line pattern (the usual case). Get the delimiter and seek the end of
1917    the pattern; if is isn't complete, read more. */    the pattern; if it isn't complete, read more. */
1918    
1919    delimiter = *p++;    delimiter = *p++;
1920    
# Line 1786  while (!done) Line 2023  while (!done)
2023        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
2024        case 'Y': options |= PCRE_NO_START_OPTIMISE; break;        case 'Y': options |= PCRE_NO_START_OPTIMISE; break;
2025        case 'Z': debug_lengths = 0; break;        case 'Z': debug_lengths = 0; break;
2026        case '8': options |= PCRE_UTF8; use_utf8 = 1; break;        case '8': options |= PCRE_UTF8; use_utf = 1; break;
2027        case '?': options |= PCRE_NO_UTF8_CHECK; break;        case '?': options |= PCRE_NO_UTF8_CHECK; break;
2028    
2029        case 'T':        case 'T':
# Line 1898  while (!done) Line 2135  while (!done)
2135    
2136      {      {
2137      unsigned long int get_options;      unsigned long int get_options;
2138    
2139      /* In 16-bit mode, convert the input. The space needed for a non-UTF string      /* In 16-bit mode, convert the input. */
2140      is exactly double the 8-bit size. For a UTF-8 string, the size needed for  
     UTF-16 is no more than double, because up to 0xffff uses no more than 3  
     bytes in UTF-8 but possibly 4 in UTF-16. Higher values use 4 bytes in UTF-8  
     and up to 4 bytes in UTF-16. */  
   
2141  #ifdef SUPPORT_PCRE16  #ifdef SUPPORT_PCRE16
2142      if (use_pcre16) (void)to16(p, options & PCRE_UTF8);      if (use_pcre16)
2143          {
2144          if (to16(p, options & PCRE_UTF8, (int)strlen((char *)p)) < 0)
2145            {
2146            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
2147              "converted to UTF-16\n");
2148            goto SKIP_DATA;
2149            }
2150          p = (pcre_uint8 *)buffer16;
2151          }
2152  #endif  #endif
2153    
2154      /* Compile many times when timing */      /* Compile many times when timing */
# Line 1918  while (!done) Line 2160  while (!done)
2160        clock_t start_time = clock();        clock_t start_time = clock();
2161        for (i = 0; i < timeit; i++)        for (i = 0; i < timeit; i++)
2162          {          {
2163  #ifdef SUPPORT_PCRE16          PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
         if (use_pcre16)  
           re = pcre16_compile((PCRE_SPTR16)buffer16, options, &error, &erroroffset, tables);  
         else  
 #endif  
           re = pcre_compile((char *)p, options, &error, &erroroffset, tables);  
2164          if (re != NULL) free(re);          if (re != NULL) free(re);
2165          }          }
2166        time_taken = clock() - start_time;        time_taken = clock() - start_time;
# Line 1933  while (!done) Line 2170  while (!done)
2170        }        }
2171    
2172      first_gotten_store = 0;      first_gotten_store = 0;
2173        PCRE_COMPILE(re, p, options, &error, &erroroffset, tables);
 #ifdef SUPPORT_PCRE16  
     if (use_pcre16)  
       re = pcre16_compile((PCRE_SPTR16)buffer16, options, &error, &erroroffset, tables);  
     else  
 #endif  
       re = pcre_compile((char *)p, options, &error, &erroroffset, tables);  
2174    
2175      /* Compilation failed; go back for another re, skipping to blank line      /* Compilation failed; go back for another re, skipping to blank line
2176      if non-interactive. */      if non-interactive. */
# Line 1971  while (!done) Line 2202  while (!done)
2202      lines. */      lines. */
2203    
2204      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);      new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
2205      if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;      if ((get_options & PCRE_UTF8) != 0) use_utf = 1;
2206    
2207      /* Extract the size for possible writing before possibly flipping it,      /* Extract the size for possible writing before possibly flipping it,
2208      and remember the store that was got. */      and remember the store that was got. */
# Line 2001  while (!done) Line 2232  while (!done)
2232          clock_t start_time = clock();          clock_t start_time = clock();
2233          for (i = 0; i < timeit; i++)          for (i = 0; i < timeit; i++)
2234            {            {
2235            if (use_pcre16)            PCRE_STUDY(extra, re, study_options | force_study_options, &error);
2236              extra = pcre16_study(re, study_options | force_study_options, &error);            }
           else  
             extra = pcre_study(re, study_options | force_study_options, &error);  
           }  
2237          time_taken = clock() - start_time;          time_taken = clock() - start_time;
2238          if (extra != NULL) pcre_free_study(extra);          if (extra != NULL)
2239              {
2240              PCRE_FREE_STUDY(extra);
2241              }
2242          fprintf(outfile, "  Study time %.4f milliseconds\n",          fprintf(outfile, "  Study time %.4f milliseconds\n",
2243            (((double)time_taken * 1000.0) / (double)timeit) /            (((double)time_taken * 1000.0) / (double)timeit) /
2244              (double)CLOCKS_PER_SEC);              (double)CLOCKS_PER_SEC);
2245          }          }
2246        if (use_pcre16)        PCRE_STUDY(extra, re, study_options | force_study_options, &error);
         extra = pcre16_study(re, study_options | force_study_options, &error);  
       else  
         extra = pcre_study(re, study_options | force_study_options, &error);  
2247        if (error != NULL)        if (error != NULL)
2248          fprintf(outfile, "Failed to study: %s\n", error);          fprintf(outfile, "Failed to study: %s\n", error);
2249        else if (extra != NULL)        else if (extra != NULL)
# Line 2044  while (!done) Line 2272  while (!done)
2272        extra->flags |= PCRE_EXTRA_MARK;        extra->flags |= PCRE_EXTRA_MARK;
2273        }        }
2274    
     /* If the 'F' option was present, we flip the bytes of all the integer  
     fields in the regex data block and the study block. This is to make it  
     possible to test PCRE's handling of byte-flipped patterns, e.g. those  
     compiled on a different architecture. */  
   
     if (do_flip)  
       {  
       real_pcre *rre = (real_pcre *)re;  
       rre->magic_number =  
         byteflip(rre->magic_number, sizeof(rre->magic_number));  
       rre->size = byteflip(rre->size, sizeof(rre->size));  
       rre->options = byteflip(rre->options, sizeof(rre->options));  
       rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));  
       rre->top_bracket =  
         (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));  
       rre->top_backref =  
         (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));  
       rre->first_char =  
         (pcre_uint16)byteflip(rre->first_char, sizeof(rre->first_char));  
       rre->req_char =  
         (pcre_uint16)byteflip(rre->req_char, sizeof(rre->req_char));  
       rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,  
         sizeof(rre->name_table_offset));  
       rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,  
         sizeof(rre->name_entry_size));  
       rre->name_count = (pcre_uint16)byteflip(rre->name_count,  
         sizeof(rre->name_count));  
   
       if (extra != NULL)  
         {  
         pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);  
         rsd->size = byteflip(rsd->size, sizeof(rsd->size));  
         rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));  
         rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));  
         }  
       }  
   
2275      /* Extract and display information from the compiled data if required. */      /* Extract and display information from the compiled data if required. */
2276    
2277      SHOW_INFO:      SHOW_INFO:
# Line 2088  while (!done) Line 2279  while (!done)
2279      if (do_debug)      if (do_debug)
2280        {        {
2281        fprintf(outfile, "------------------------------------------------------------------\n");        fprintf(outfile, "------------------------------------------------------------------\n");
2282    #if defined SUPPORT_PCRE8 && defined SUPPORT_PCRE16
2283        if (use_pcre16)        if (use_pcre16)
2284          pcre16_printint(re, outfile, debug_lengths);          pcre16_printint(re, outfile, debug_lengths);
2285        else        else
2286          pcre_printint(re, outfile, debug_lengths);          pcre_printint(re, outfile, debug_lengths);
2287    #elif defined SUPPORT_PCRE8
2288          pcre_printint(re, outfile, debug_lengths);
2289    #else
2290          pcre16_printint(re, outfile, debug_lengths);
2291    #endif
2292        }        }
2293    
2294      /* We already have the options in get_options (see above) */      /* We already have the options in get_options (see above) */
# Line 2121  while (!done) Line 2318  while (!done)
2318    
2319        /* The old, obsolete function pcre_info() works only in 8-bit mode. Check        /* The old, obsolete function pcre_info() works only in 8-bit mode. Check
2320        that it gives the same results as the new function. */        that it gives the same results as the new function. */
2321    
2322  #if !defined NOINFOCHECK  #if !defined NOINFOCHECK
2323        if (!use_pcre16)        if (!use_pcre16)
2324          {          {
2325          old_count = pcre_info(re, &old_options, &old_first_char);          old_count = pcre_info(re, &old_options, &old_first_char);
2326          if (count < 0) fprintf(outfile,          if (count < 0) fprintf(outfile,
2327            "Error %d from pcre_info()\n", count);            "Error %d from pcre_info()\n", count);
# Line 2133  while (!done) Line 2330  while (!done)
2330            if (old_count != count) fprintf(outfile,            if (old_count != count) fprintf(outfile,
2331              "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,              "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
2332                old_count);                old_count);
2333    
2334            if (old_first_char != first_char) fprintf(outfile,            if (old_first_char != first_char) fprintf(outfile,
2335              "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",              "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
2336                first_char, old_first_char);                first_char, old_first_char);
2337    
2338            if (old_options != (int)get_options) fprintf(outfile,            if (old_options != (int)get_options) fprintf(outfile,
2339              "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",              "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
2340                get_options, old_options);                get_options, old_options);
2341            }            }
2342          }          }
2343  #endif  #endif
2344    
2345        if (size != regex_gotten_store) fprintf(outfile,        if (size != regex_gotten_store) fprintf(outfile,
# Line 2185  while (!done) Line 2382  while (!done)
2382            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",            ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
2383            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",            ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
2384            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",            ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
2385            ((get_options & PCRE_UTF8) != 0)? " utf8" : "",            ((get_options & PCRE_UTF8) != 0)? " utf" : "",
2386            ((get_options & PCRE_UCP) != 0)? " ucp" : "",            ((get_options & PCRE_UCP) != 0)? " ucp" : "",
2387            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",            ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf_check" : "",
2388            ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",            ((get_options & PCRE_NO_START_OPTIMIZE) != 0)? " no_start_optimize" : "",
2389            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");            ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
2390    
# Line 2232  while (!done) Line 2429  while (!done)
2429          const char *caseless =          const char *caseless =
2430            ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?            ((((real_pcre *)re)->flags & PCRE_FCH_CASELESS) == 0)?
2431            "" : " (caseless)";            "" : " (caseless)";
2432    
2433          if (PRINTHEX(first_char))          if (PRINTOK(first_char))
2434            fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);            fprintf(outfile, "First char = \'%c\'%s\n", first_char, caseless);
2435          else          else
2436            fprintf(outfile, "First char = %d%s\n", first_char, caseless);            {
2437              fprintf(outfile, "First char = ");
2438              pchar(first_char, outfile);
2439              fprintf(outfile, "%s\n", caseless);
2440              }
2441          }          }
2442    
2443        if (need_char < 0)        if (need_char < 0)
# Line 2249  while (!done) Line 2450  while (!done)
2450            ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?            ((((real_pcre *)re)->flags & PCRE_RCH_CASELESS) == 0)?
2451            "" : " (caseless)";            "" : " (caseless)";
2452    
2453          if (PRINTHEX(need_char))          if (PRINTOK(need_char))
2454            fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);            fprintf(outfile, "Need char = \'%c\'%s\n", need_char, caseless);
2455          else          else
2456            fprintf(outfile, "Need char = %d%s\n", need_char, caseless);            fprintf(outfile, "Need char = %d%s\n", need_char, caseless);
# Line 2292  while (!done) Line 2493  while (!done)
2493                    fprintf(outfile, "\n  ");                    fprintf(outfile, "\n  ");
2494                    c = 2;                    c = 2;
2495                    }                    }
2496                  if (PRINTHEX(i) && i != ' ')                  if (PRINTOK(i) && i != ' ')
2497                    {                    {
2498                    fprintf(outfile, "%c ", i);                    fprintf(outfile, "%c ", i);
2499                    c += 2;                    c += 2;
# Line 2332  while (!done) Line 2533  while (!done)
2533    
2534      if (to_file != NULL)      if (to_file != NULL)
2535        {        {
2536          /* If the 'F' option was present, we flip the bytes of all the integer
2537          fields in the regex data block and the study block. This is to make it
2538          possible to test PCRE's handling of byte-flipped patterns, e.g. those
2539          compiled on a different architecture. */
2540    
2541          if (do_flip)
2542            {
2543            real_pcre *rre = (real_pcre *)re;
2544            rre->magic_number =
2545              byteflip(rre->magic_number, sizeof(rre->magic_number));
2546            rre->size = byteflip(rre->size, sizeof(rre->size));
2547            rre->options = byteflip(rre->options, sizeof(rre->options));
2548            rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
2549            rre->top_bracket =
2550              (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
2551            rre->top_backref =
2552              (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
2553            rre->first_char =
2554              (pcre_uint16)byteflip(rre->first_char, sizeof(rre->first_char));
2555            rre->req_char =
2556              (pcre_uint16)byteflip(rre->req_char, sizeof(rre->req_char));
2557            rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
2558              sizeof(rre->name_table_offset));
2559            rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
2560              sizeof(rre->name_entry_size));
2561            rre->name_count = (pcre_uint16)byteflip(rre->name_count,
2562              sizeof(rre->name_count));
2563    
2564            if (extra != NULL)
2565              {
2566              pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
2567              rsd->size = byteflip(rsd->size, sizeof(rsd->size));
2568              rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
2569              rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
2570              }
2571            }
2572    
2573        FILE *f = fopen((char *)to_file, "wb");        FILE *f = fopen((char *)to_file, "wb");
2574        if (f == NULL)        if (f == NULL)
2575          {          {
# Line 2376  while (!done) Line 2614  while (!done)
2614          }          }
2615    
2616        new_free(re);        new_free(re);
2617        if (extra != NULL) pcre_free_study(extra);        if (extra != NULL)
2618            {
2619            PCRE_FREE_STUDY(extra);
2620            }
2621        if (locale_set)        if (locale_set)
2622          {          {
2623          new_free((void *)tables);          new_free((void *)tables);
# Line 2477  while (!done) Line 2718  while (!done)
2718            c = c * 8 + *p++ - '0';            c = c * 8 + *p++ - '0';
2719    
2720  #if !defined NOUTF8  #if !defined NOUTF8
2721          if (use_utf8 && c > 255)          if (use_utf && c > 255)
2722            {            {
2723            unsigned char buff8[8];            pcre_uint8 buff8[8];
2724            int ii, utn;            int ii, utn;
2725            utn = ord2utf8(c, buff8);            utn = ord2utf8(c, buff8);
2726            for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];            for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
# Line 2495  while (!done) Line 2736  while (!done)
2736  #if !defined NOUTF8  #if !defined NOUTF8
2737          if (*p == '{')          if (*p == '{')
2738            {            {
2739            unsigned char *pt = p;            pcre_uint8 *pt = p;
2740            c = 0;            c = 0;
2741    
2742            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails            /* We used to have "while (isxdigit(*(++pt)))" here, but it fails
# Line 2507  while (!done) Line 2748  while (!done)
2748              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);              c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'a' - 10);
2749            if (*pt == '}')            if (*pt == '}')
2750              {              {
2751              unsigned char buff8[8];              pcre_uint8 buff8[8];
2752              int ii, utn;              int ii, utn;
2753              if (use_utf8)              if (use_utf)
2754                {                {
2755                utn = ord2utf8(c, buff8);                utn = ord2utf8(c, buff8);
2756                for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];                for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
# Line 2817  while (!done) Line 3058  while (!done)
3058            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
3059              {              {
3060              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
3061              (void)pchars(dbuffer + pmatch[i].rm_so,              PCHARSV(dbuffer + pmatch[i].rm_so,
3062                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);                pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
3063              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3064              if (do_showcaprest || (i == 0 && do_showrest))              if (do_showcaprest || (i == 0 && do_showrest))
3065                {                {
3066                fprintf(outfile, "%2d+ ", (int)i);                fprintf(outfile, "%2d+ ", (int)i);
3067                (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,                PCHARSV(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
3068                  outfile);                  outfile);
3069                fprintf(outfile, "\n");                fprintf(outfile, "\n");
3070                }                }
# Line 2831  while (!done) Line 3072  while (!done)
3072            }            }
3073          }          }
3074        free(pmatch);        free(pmatch);
3075          goto NEXT_DATA;
3076        }        }
3077    
3078    #endif  /* !defined NOPOSIX */
3079    
3080      /* Handle matching via the native interface - repeats for /g and /G */      /* Handle matching via the native interface - repeats for /g and /G */
3081    
3082      else  #ifdef SUPPORT_PCRE16
3083  #endif  /* !defined NOPOSIX */      if (use_pcre16)
3084          {
3085          len = to16(bptr, (((real_pcre *)re)->options) & PCRE_UTF8, len);
3086          if (len < 0)
3087            {
3088            fprintf(outfile, "**Failed: invalid UTF-8 string cannot be "
3089              "converted to UTF-16\n");
3090            goto NEXT_DATA;
3091            }
3092          bptr = (pcre_uint8 *)buffer16;
3093          }
3094    #endif
3095    
3096      for (;; gmatched++)    /* Loop for /g or /G */      for (;; gmatched++)    /* Loop for /g or /G */
3097        {        {
# Line 2847  while (!done) Line 3102  while (!done)
3102          register int i;          register int i;
3103          clock_t time_taken;          clock_t time_taken;
3104          clock_t start_time = clock();          clock_t start_time = clock();
   
 #ifdef SUPPORT_PCRE16  
         if (use_pcre16) len = to16(bptr, options & PCRE_UTF8);  
 #endif  
   
3105    
3106  #if !defined NODFA  #if !defined NODFA
3107          if (all_use_dfa || use_dfa)          if (all_use_dfa || use_dfa)
# Line 2866  while (!done) Line 3116  while (!done)
3116  #endif  #endif
3117    
3118          for (i = 0; i < timeitm; i++)          for (i = 0; i < timeitm; i++)
3119            count = pcre_exec(re, extra, (char *)bptr, len,            {
3120              PCRE_EXEC(count, re, extra, bptr, len,
3121              start_offset, options | g_notempty, use_offsets, use_size_offsets);              start_offset, options | g_notempty, use_offsets, use_size_offsets);
3122              }
3123          time_taken = clock() - start_time;          time_taken = clock() - start_time;
3124          fprintf(outfile, "Execute time %.4f milliseconds\n",          fprintf(outfile, "Execute time %.4f milliseconds\n",
3125            (((double)time_taken * 1000.0) / (double)timeitm) /            (((double)time_taken * 1000.0) / (double)timeitm) /
# Line 2913  while (!done) Line 3164  while (!done)
3164            }            }
3165          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;          extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
3166          extra->callout_data = &callout_data;          extra->callout_data = &callout_data;
3167          count = pcre_exec(re, extra, (char *)bptr, len, start_offset,          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3168            options | g_notempty, use_offsets, use_size_offsets);            options | g_notempty, use_offsets, use_size_offsets);
3169          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;          extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
3170          }          }
# Line 2938  while (!done) Line 3189  while (!done)
3189    
3190        else        else
3191          {          {
3192          if (use_pcre16)          PCRE_EXEC(count, re, extra, bptr, len, start_offset,
3193            count = pcre16_exec(re, extra, (PCRE_SPTR16)buffer16, len,            options | g_notempty, use_offsets, use_size_offsets);
             start_offset, options | g_notempty, use_offsets, use_size_offsets);  
         else  
           count = pcre_exec(re, extra, (char *)bptr, len,  
             start_offset, options | g_notempty, use_offsets, use_size_offsets);  
3194          if (count == 0)          if (count == 0)
3195            {            {
3196            fprintf(outfile, "Matched, but too many substrings\n");            fprintf(outfile, "Matched, but too many substrings\n");
# Line 3004  while (!done) Line 3251  while (!done)
3251            else            else
3252              {              {
3253              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
3254              (void)pchars(bptr + use_offsets[i],              PCHARSV(bptr + use_offsets[i],
3255                use_offsets[i+1] - use_offsets[i], outfile);                use_offsets[i+1] - use_offsets[i], outfile);
3256              fprintf(outfile, "\n");              fprintf(outfile, "\n");
3257              if (do_showcaprest || (i == 0 && do_showrest))              if (do_showcaprest || (i == 0 && do_showrest))
3258                {                {
3259                fprintf(outfile, "%2d+ ", i/2);                fprintf(outfile, "%2d+ ", i/2);
3260                (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],                PCHARSV(bptr + use_offsets[i+1], len - use_offsets[i+1],
3261                  outfile);                  outfile);
3262                fprintf(outfile, "\n");                fprintf(outfile, "\n");
3263                }                }
# Line 3106  while (!done) Line 3353  while (!done)
3353          if (use_size_offsets > 1)          if (use_size_offsets > 1)
3354            {            {
3355            fprintf(outfile, ": ");            fprintf(outfile, ": ");
3356            pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],            PCHARSV(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
3357              outfile);              outfile);
3358            }            }
3359          fprintf(outfile, "\n");          fprintf(outfile, "\n");
# Line 3156  while (!done) Line 3403  while (!done)
3403                bptr[start_offset] == '\r' &&                bptr[start_offset] == '\r' &&
3404                bptr[start_offset+1] == '\n')                bptr[start_offset+1] == '\n')
3405              onechar++;              onechar++;
3406            else if (use_utf8)            else if (use_utf)
3407              {              {
3408              while (start_offset + onechar < len)              while (start_offset + onechar < len)
3409                {                {
# Line 3242  while (!done) Line 3489  while (!done)
3489  #endif  #endif
3490    
3491    if (re != NULL) new_free(re);    if (re != NULL) new_free(re);
3492    if (extra != NULL) pcre_free_study(extra);    if (extra != NULL)
3493        {
3494        PCRE_FREE_STUDY(extra);
3495        }
3496    if (locale_set)    if (locale_set)
3497      {      {
3498      new_free((void *)tables);      new_free((void *)tables);

Legend:
Removed from v.805  
changed lines
  Added in v.811

  ViewVC Help
Powered by ViewVC 1.1.5