/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 230 - (show annotations)
Mon Sep 10 13:23:56 2007 UTC (7 years, 10 months ago) by ph10
File MIME type: text/plain
File size: 69401 byte(s)
Error occurred while calculating annotation data.
(1) Move internal flags out of the options field, to make room.
(2) \r and \n must be explicit to trigger the special CRLF handline exception.
(3) (?J) at the start now sets JCHANGED as well as DUPNAMES.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include <config.h>
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51
52 /* A number of things vary for Windows builds. Originally, pcretest opened its
53 input and output without "b"; then I was told that "b" was needed in some
54 environments, so it was added for release 5.0 to both the input and output. (It
55 makes no difference on Unix-like systems.) Later I was told that it is wrong
56 for the input on Windows. I've now abstracted the modes into two macros that
57 are set here, to make it easier to fiddle with them, and removed "b" from the
58 input mode under Windows. */
59
60 #if defined(_WIN32) || defined(WIN32)
61 #include <io.h> /* For _setmode() */
62 #include <fcntl.h> /* For _O_BINARY */
63 #define INPUT_MODE "r"
64 #define OUTPUT_MODE "wb"
65
66 #else
67 #include <sys/time.h> /* These two includes are needed */
68 #include <sys/resource.h> /* for setrlimit(). */
69 #define INPUT_MODE "rb"
70 #define OUTPUT_MODE "wb"
71 #endif
72
73
74 /* We have to include pcre_internal.h because we need the internal info for
75 displaying the results of pcre_study() and we also need to know about the
76 internal macros, structures, and other internal data values; pcretest has
77 "inside information" compared to a program that strictly follows the PCRE API.
78
79 Although pcre_internal.h does itself include pcre.h, we explicitly include it
80 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
81 appropriately for an application, not for building PCRE. */
82
83 #include "pcre.h"
84 #include "pcre_internal.h"
85
86 /* We need access to the data tables that PCRE uses. So as not to have to keep
87 two copies, we include the source file here, changing the names of the external
88 symbols to prevent clashes. */
89
90 #define _pcre_utf8_table1 utf8_table1
91 #define _pcre_utf8_table1_size utf8_table1_size
92 #define _pcre_utf8_table2 utf8_table2
93 #define _pcre_utf8_table3 utf8_table3
94 #define _pcre_utf8_table4 utf8_table4
95 #define _pcre_utt utt
96 #define _pcre_utt_size utt_size
97 #define _pcre_OP_lengths OP_lengths
98
99 #include "pcre_tables.c"
100
101 /* We also need the pcre_printint() function for printing out compiled
102 patterns. This function is in a separate file so that it can be included in
103 pcre_compile.c when that module is compiled with debugging enabled.
104
105 The definition of the macro PRINTABLE, which determines whether to print an
106 output character as-is or as a hex value when showing compiled patterns, is
107 contained in this file. We uses it here also, in cases when the locale has not
108 been explicitly changed, so as to get consistent output from systems that
109 differ in their output from isprint() even in the "C" locale. */
110
111 #include "pcre_printint.src"
112
113 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
114
115
116 /* It is possible to compile this test program without including support for
117 testing the POSIX interface, though this is not available via the standard
118 Makefile. */
119
120 #if !defined NOPOSIX
121 #include "pcreposix.h"
122 #endif
123
124 /* It is also possible, for the benefit of the version currently imported into
125 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
126 interface to the DFA matcher (NODFA), and without the doublecheck of the old
127 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
128 UTF8 support if PCRE is built without it. */
129
130 #ifndef SUPPORT_UTF8
131 #ifndef NOUTF8
132 #define NOUTF8
133 #endif
134 #endif
135
136
137 /* Other parameters */
138
139 #ifndef CLOCKS_PER_SEC
140 #ifdef CLK_TCK
141 #define CLOCKS_PER_SEC CLK_TCK
142 #else
143 #define CLOCKS_PER_SEC 100
144 #endif
145 #endif
146
147 /* This is the default loop count for timing. */
148
149 #define LOOPREPEAT 500000
150
151 /* Static variables */
152
153 static FILE *outfile;
154 static int log_store = 0;
155 static int callout_count;
156 static int callout_extra;
157 static int callout_fail_count;
158 static int callout_fail_id;
159 static int debug_lengths;
160 static int first_callout;
161 static int locale_set = 0;
162 static int show_malloc;
163 static int use_utf8;
164 static size_t gotten_store;
165
166 /* The buffers grow automatically if very long input lines are encountered. */
167
168 static int buffer_size = 50000;
169 static uschar *buffer = NULL;
170 static uschar *dbuffer = NULL;
171 static uschar *pbuffer = NULL;
172
173
174
175 /*************************************************
176 * Read or extend an input line *
177 *************************************************/
178
179 /* Input lines are read into buffer, but both patterns and data lines can be
180 continued over multiple input lines. In addition, if the buffer fills up, we
181 want to automatically expand it so as to be able to handle extremely large
182 lines that are needed for certain stress tests. When the input buffer is
183 expanded, the other two buffers must also be expanded likewise, and the
184 contents of pbuffer, which are a copy of the input for callouts, must be
185 preserved (for when expansion happens for a data line). This is not the most
186 optimal way of handling this, but hey, this is just a test program!
187
188 Arguments:
189 f the file to read
190 start where in buffer to start (this *must* be within buffer)
191
192 Returns: pointer to the start of new data
193 could be a copy of start, or could be moved
194 NULL if no data read and EOF reached
195 */
196
197 static uschar *
198 extend_inputline(FILE *f, uschar *start)
199 {
200 uschar *here = start;
201
202 for (;;)
203 {
204 int rlen = buffer_size - (here - buffer);
205
206 if (rlen > 1000)
207 {
208 int dlen;
209 if (fgets((char *)here, rlen, f) == NULL)
210 return (here == start)? NULL : start;
211 dlen = (int)strlen((char *)here);
212 if (dlen > 0 && here[dlen - 1] == '\n') return start;
213 here += dlen;
214 }
215
216 else
217 {
218 int new_buffer_size = 2*buffer_size;
219 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
220 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
221 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
222
223 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
224 {
225 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
226 exit(1);
227 }
228
229 memcpy(new_buffer, buffer, buffer_size);
230 memcpy(new_pbuffer, pbuffer, buffer_size);
231
232 buffer_size = new_buffer_size;
233
234 start = new_buffer + (start - buffer);
235 here = new_buffer + (here - buffer);
236
237 free(buffer);
238 free(dbuffer);
239 free(pbuffer);
240
241 buffer = new_buffer;
242 dbuffer = new_dbuffer;
243 pbuffer = new_pbuffer;
244 }
245 }
246
247 return NULL; /* Control never gets here */
248 }
249
250
251
252
253
254
255
256 /*************************************************
257 * Read number from string *
258 *************************************************/
259
260 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
261 around with conditional compilation, just do the job by hand. It is only used
262 for unpicking arguments, so just keep it simple.
263
264 Arguments:
265 str string to be converted
266 endptr where to put the end pointer
267
268 Returns: the unsigned long
269 */
270
271 static int
272 get_value(unsigned char *str, unsigned char **endptr)
273 {
274 int result = 0;
275 while(*str != 0 && isspace(*str)) str++;
276 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
277 *endptr = str;
278 return(result);
279 }
280
281
282
283
284 /*************************************************
285 * Convert UTF-8 string to value *
286 *************************************************/
287
288 /* This function takes one or more bytes that represents a UTF-8 character,
289 and returns the value of the character.
290
291 Argument:
292 utf8bytes a pointer to the byte vector
293 vptr a pointer to an int to receive the value
294
295 Returns: > 0 => the number of bytes consumed
296 -6 to 0 => malformed UTF-8 character at offset = (-return)
297 */
298
299 #if !defined NOUTF8
300
301 static int
302 utf82ord(unsigned char *utf8bytes, int *vptr)
303 {
304 int c = *utf8bytes++;
305 int d = c;
306 int i, j, s;
307
308 for (i = -1; i < 6; i++) /* i is number of additional bytes */
309 {
310 if ((d & 0x80) == 0) break;
311 d <<= 1;
312 }
313
314 if (i == -1) { *vptr = c; return 1; } /* ascii character */
315 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
316
317 /* i now has a value in the range 1-5 */
318
319 s = 6*i;
320 d = (c & utf8_table3[i]) << s;
321
322 for (j = 0; j < i; j++)
323 {
324 c = *utf8bytes++;
325 if ((c & 0xc0) != 0x80) return -(j+1);
326 s -= 6;
327 d |= (c & 0x3f) << s;
328 }
329
330 /* Check that encoding was the correct unique one */
331
332 for (j = 0; j < utf8_table1_size; j++)
333 if (d <= utf8_table1[j]) break;
334 if (j != i) return -(i+1);
335
336 /* Valid value */
337
338 *vptr = d;
339 return i+1;
340 }
341
342 #endif
343
344
345
346 /*************************************************
347 * Convert character value to UTF-8 *
348 *************************************************/
349
350 /* This function takes an integer value in the range 0 - 0x7fffffff
351 and encodes it as a UTF-8 character in 0 to 6 bytes.
352
353 Arguments:
354 cvalue the character value
355 utf8bytes pointer to buffer for result - at least 6 bytes long
356
357 Returns: number of characters placed in the buffer
358 */
359
360 #if !defined NOUTF8
361
362 static int
363 ord2utf8(int cvalue, uschar *utf8bytes)
364 {
365 register int i, j;
366 for (i = 0; i < utf8_table1_size; i++)
367 if (cvalue <= utf8_table1[i]) break;
368 utf8bytes += i;
369 for (j = i; j > 0; j--)
370 {
371 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
372 cvalue >>= 6;
373 }
374 *utf8bytes = utf8_table2[i] | cvalue;
375 return i + 1;
376 }
377
378 #endif
379
380
381
382 /*************************************************
383 * Print character string *
384 *************************************************/
385
386 /* Character string printing function. Must handle UTF-8 strings in utf8
387 mode. Yields number of characters printed. If handed a NULL file, just counts
388 chars without printing. */
389
390 static int pchars(unsigned char *p, int length, FILE *f)
391 {
392 int c = 0;
393 int yield = 0;
394
395 while (length-- > 0)
396 {
397 #if !defined NOUTF8
398 if (use_utf8)
399 {
400 int rc = utf82ord(p, &c);
401
402 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
403 {
404 length -= rc - 1;
405 p += rc;
406 if (PRINTHEX(c))
407 {
408 if (f != NULL) fprintf(f, "%c", c);
409 yield++;
410 }
411 else
412 {
413 int n = 4;
414 if (f != NULL) fprintf(f, "\\x{%02x}", c);
415 yield += (n <= 0x000000ff)? 2 :
416 (n <= 0x00000fff)? 3 :
417 (n <= 0x0000ffff)? 4 :
418 (n <= 0x000fffff)? 5 : 6;
419 }
420 continue;
421 }
422 }
423 #endif
424
425 /* Not UTF-8, or malformed UTF-8 */
426
427 c = *p++;
428 if (PRINTHEX(c))
429 {
430 if (f != NULL) fprintf(f, "%c", c);
431 yield++;
432 }
433 else
434 {
435 if (f != NULL) fprintf(f, "\\x%02x", c);
436 yield += 4;
437 }
438 }
439
440 return yield;
441 }
442
443
444
445 /*************************************************
446 * Callout function *
447 *************************************************/
448
449 /* Called from PCRE as a result of the (?C) item. We print out where we are in
450 the match. Yield zero unless more callouts than the fail count, or the callout
451 data is not zero. */
452
453 static int callout(pcre_callout_block *cb)
454 {
455 FILE *f = (first_callout | callout_extra)? outfile : NULL;
456 int i, pre_start, post_start, subject_length;
457
458 if (callout_extra)
459 {
460 fprintf(f, "Callout %d: last capture = %d\n",
461 cb->callout_number, cb->capture_last);
462
463 for (i = 0; i < cb->capture_top * 2; i += 2)
464 {
465 if (cb->offset_vector[i] < 0)
466 fprintf(f, "%2d: <unset>\n", i/2);
467 else
468 {
469 fprintf(f, "%2d: ", i/2);
470 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
471 cb->offset_vector[i+1] - cb->offset_vector[i], f);
472 fprintf(f, "\n");
473 }
474 }
475 }
476
477 /* Re-print the subject in canonical form, the first time or if giving full
478 datails. On subsequent calls in the same match, we use pchars just to find the
479 printed lengths of the substrings. */
480
481 if (f != NULL) fprintf(f, "--->");
482
483 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
484 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
485 cb->current_position - cb->start_match, f);
486
487 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
488
489 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
490 cb->subject_length - cb->current_position, f);
491
492 if (f != NULL) fprintf(f, "\n");
493
494 /* Always print appropriate indicators, with callout number if not already
495 shown. For automatic callouts, show the pattern offset. */
496
497 if (cb->callout_number == 255)
498 {
499 fprintf(outfile, "%+3d ", cb->pattern_position);
500 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
501 }
502 else
503 {
504 if (callout_extra) fprintf(outfile, " ");
505 else fprintf(outfile, "%3d ", cb->callout_number);
506 }
507
508 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
509 fprintf(outfile, "^");
510
511 if (post_start > 0)
512 {
513 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
514 fprintf(outfile, "^");
515 }
516
517 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
518 fprintf(outfile, " ");
519
520 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
521 pbuffer + cb->pattern_position);
522
523 fprintf(outfile, "\n");
524 first_callout = 0;
525
526 if (cb->callout_data != NULL)
527 {
528 int callout_data = *((int *)(cb->callout_data));
529 if (callout_data != 0)
530 {
531 fprintf(outfile, "Callout data = %d\n", callout_data);
532 return callout_data;
533 }
534 }
535
536 return (cb->callout_number != callout_fail_id)? 0 :
537 (++callout_count >= callout_fail_count)? 1 : 0;
538 }
539
540
541 /*************************************************
542 * Local malloc functions *
543 *************************************************/
544
545 /* Alternative malloc function, to test functionality and show the size of the
546 compiled re. */
547
548 static void *new_malloc(size_t size)
549 {
550 void *block = malloc(size);
551 gotten_store = size;
552 if (show_malloc)
553 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
554 return block;
555 }
556
557 static void new_free(void *block)
558 {
559 if (show_malloc)
560 fprintf(outfile, "free %p\n", block);
561 free(block);
562 }
563
564
565 /* For recursion malloc/free, to test stacking calls */
566
567 static void *stack_malloc(size_t size)
568 {
569 void *block = malloc(size);
570 if (show_malloc)
571 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
572 return block;
573 }
574
575 static void stack_free(void *block)
576 {
577 if (show_malloc)
578 fprintf(outfile, "stack_free %p\n", block);
579 free(block);
580 }
581
582
583 /*************************************************
584 * Call pcre_fullinfo() *
585 *************************************************/
586
587 /* Get one piece of information from the pcre_fullinfo() function */
588
589 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
590 {
591 int rc;
592 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
593 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
594 }
595
596
597
598 /*************************************************
599 * Byte flipping function *
600 *************************************************/
601
602 static unsigned long int
603 byteflip(unsigned long int value, int n)
604 {
605 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
606 return ((value & 0x000000ff) << 24) |
607 ((value & 0x0000ff00) << 8) |
608 ((value & 0x00ff0000) >> 8) |
609 ((value & 0xff000000) >> 24);
610 }
611
612
613
614
615 /*************************************************
616 * Check match or recursion limit *
617 *************************************************/
618
619 static int
620 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
621 int start_offset, int options, int *use_offsets, int use_size_offsets,
622 int flag, unsigned long int *limit, int errnumber, const char *msg)
623 {
624 int count;
625 int min = 0;
626 int mid = 64;
627 int max = -1;
628
629 extra->flags |= flag;
630
631 for (;;)
632 {
633 *limit = mid;
634
635 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
636 use_offsets, use_size_offsets);
637
638 if (count == errnumber)
639 {
640 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
641 min = mid;
642 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
643 }
644
645 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
646 count == PCRE_ERROR_PARTIAL)
647 {
648 if (mid == min + 1)
649 {
650 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
651 break;
652 }
653 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
654 max = mid;
655 mid = (min + mid)/2;
656 }
657 else break; /* Some other error */
658 }
659
660 extra->flags &= ~flag;
661 return count;
662 }
663
664
665
666 /*************************************************
667 * Case-independent strncmp() function *
668 *************************************************/
669
670 /*
671 Arguments:
672 s first string
673 t second string
674 n number of characters to compare
675
676 Returns: < 0, = 0, or > 0, according to the comparison
677 */
678
679 static int
680 strncmpic(uschar *s, uschar *t, int n)
681 {
682 while (n--)
683 {
684 int c = tolower(*s++) - tolower(*t++);
685 if (c) return c;
686 }
687 return 0;
688 }
689
690
691
692 /*************************************************
693 * Check newline indicator *
694 *************************************************/
695
696 /* This is used both at compile and run-time to check for <xxx> escapes, where
697 xxx is LF, CR, CRLF, ANYCRLF, or ANY. Print a message and return 0 if there is
698 no match.
699
700 Arguments:
701 p points after the leading '<'
702 f file for error message
703
704 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
705 */
706
707 static int
708 check_newline(uschar *p, FILE *f)
709 {
710 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
711 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
712 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
713 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
714 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
715 fprintf(f, "Unknown newline type at: <%s\n", p);
716 return 0;
717 }
718
719
720
721 /*************************************************
722 * Usage function *
723 *************************************************/
724
725 static void
726 usage(void)
727 {
728 printf("Usage: pcretest [options] [<input> [<output>]]\n");
729 printf(" -b show compiled code (bytecode)\n");
730 printf(" -C show PCRE compile-time options and exit\n");
731 printf(" -d debug: show compiled code and information (-b and -i)\n");
732 #if !defined NODFA
733 printf(" -dfa force DFA matching for all subjects\n");
734 #endif
735 printf(" -help show usage information\n");
736 printf(" -i show information about compiled patterns\n"
737 " -m output memory used information\n"
738 " -o <n> set size of offsets vector to <n>\n");
739 #if !defined NOPOSIX
740 printf(" -p use POSIX interface\n");
741 #endif
742 printf(" -q quiet: do not output PCRE version number at start\n");
743 printf(" -S <n> set stack size to <n> megabytes\n");
744 printf(" -s output store (memory) used information\n"
745 " -t time compilation and execution\n");
746 printf(" -t <n> time compilation and execution, repeating <n> times\n");
747 printf(" -tm time execution (matching) only\n");
748 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
749 }
750
751
752
753 /*************************************************
754 * Main Program *
755 *************************************************/
756
757 /* Read lines from named file or stdin and write to named file or stdout; lines
758 consist of a regular expression, in delimiters and optionally followed by
759 options, followed by a set of test data, terminated by an empty line. */
760
761 int main(int argc, char **argv)
762 {
763 FILE *infile = stdin;
764 int options = 0;
765 int study_options = 0;
766 int op = 1;
767 int timeit = 0;
768 int timeitm = 0;
769 int showinfo = 0;
770 int showstore = 0;
771 int quiet = 0;
772 int size_offsets = 45;
773 int size_offsets_max;
774 int *offsets = NULL;
775 #if !defined NOPOSIX
776 int posix = 0;
777 #endif
778 int debug = 0;
779 int done = 0;
780 int all_use_dfa = 0;
781 int yield = 0;
782 int stack_size;
783
784 /* These vectors store, end-to-end, a list of captured substring names. Assume
785 that 1024 is plenty long enough for the few names we'll be testing. */
786
787 uschar copynames[1024];
788 uschar getnames[1024];
789
790 uschar *copynamesptr;
791 uschar *getnamesptr;
792
793 /* Get buffers from malloc() so that Electric Fence will check their misuse
794 when I am debugging. They grow automatically when very long lines are read. */
795
796 buffer = (unsigned char *)malloc(buffer_size);
797 dbuffer = (unsigned char *)malloc(buffer_size);
798 pbuffer = (unsigned char *)malloc(buffer_size);
799
800 /* The outfile variable is static so that new_malloc can use it. */
801
802 outfile = stdout;
803
804 /* The following _setmode() stuff is some Windows magic that tells its runtime
805 library to translate CRLF into a single LF character. At least, that's what
806 I've been told: never having used Windows I take this all on trust. Originally
807 it set 0x8000, but then I was advised that _O_BINARY was better. */
808
809 #if defined(_WIN32) || defined(WIN32)
810 _setmode( _fileno( stdout ), _O_BINARY );
811 #endif
812
813 /* Scan options */
814
815 while (argc > 1 && argv[op][0] == '-')
816 {
817 unsigned char *endptr;
818
819 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
820 showstore = 1;
821 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
822 else if (strcmp(argv[op], "-b") == 0) debug = 1;
823 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
824 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
825 #if !defined NODFA
826 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
827 #endif
828 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
829 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
830 *endptr == 0))
831 {
832 op++;
833 argc--;
834 }
835 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
836 {
837 int both = argv[op][2] == 0;
838 int temp;
839 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
840 *endptr == 0))
841 {
842 timeitm = temp;
843 op++;
844 argc--;
845 }
846 else timeitm = LOOPREPEAT;
847 if (both) timeit = timeitm;
848 }
849 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
850 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
851 *endptr == 0))
852 {
853 #if defined(_WIN32) || defined(WIN32)
854 printf("PCRE: -S not supported on this OS\n");
855 exit(1);
856 #else
857 int rc;
858 struct rlimit rlim;
859 getrlimit(RLIMIT_STACK, &rlim);
860 rlim.rlim_cur = stack_size * 1024 * 1024;
861 rc = setrlimit(RLIMIT_STACK, &rlim);
862 if (rc != 0)
863 {
864 printf("PCRE: setrlimit() failed with error %d\n", rc);
865 exit(1);
866 }
867 op++;
868 argc--;
869 #endif
870 }
871 #if !defined NOPOSIX
872 else if (strcmp(argv[op], "-p") == 0) posix = 1;
873 #endif
874 else if (strcmp(argv[op], "-C") == 0)
875 {
876 int rc;
877 printf("PCRE version %s\n", pcre_version());
878 printf("Compiled with\n");
879 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
880 printf(" %sUTF-8 support\n", rc? "" : "No ");
881 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
882 printf(" %sUnicode properties support\n", rc? "" : "No ");
883 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
884 printf(" Newline sequence is %s\n", (rc == '\r')? "CR" :
885 (rc == '\n')? "LF" : (rc == ('\r'<<8 | '\n'))? "CRLF" :
886 (rc == -2)? "ANYCRLF" :
887 (rc == -1)? "ANY" : "???");
888 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
889 printf(" Internal link size = %d\n", rc);
890 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
891 printf(" POSIX malloc threshold = %d\n", rc);
892 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &rc);
893 printf(" Default match limit = %d\n", rc);
894 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &rc);
895 printf(" Default recursion depth limit = %d\n", rc);
896 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
897 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
898 goto EXIT;
899 }
900 else if (strcmp(argv[op], "-help") == 0 ||
901 strcmp(argv[op], "--help") == 0)
902 {
903 usage();
904 goto EXIT;
905 }
906 else
907 {
908 printf("** Unknown or malformed option %s\n", argv[op]);
909 usage();
910 yield = 1;
911 goto EXIT;
912 }
913 op++;
914 argc--;
915 }
916
917 /* Get the store for the offsets vector, and remember what it was */
918
919 size_offsets_max = size_offsets;
920 offsets = (int *)malloc(size_offsets_max * sizeof(int));
921 if (offsets == NULL)
922 {
923 printf("** Failed to get %d bytes of memory for offsets vector\n",
924 (int)(size_offsets_max * sizeof(int)));
925 yield = 1;
926 goto EXIT;
927 }
928
929 /* Sort out the input and output files */
930
931 if (argc > 1)
932 {
933 infile = fopen(argv[op], INPUT_MODE);
934 if (infile == NULL)
935 {
936 printf("** Failed to open %s\n", argv[op]);
937 yield = 1;
938 goto EXIT;
939 }
940 }
941
942 if (argc > 2)
943 {
944 outfile = fopen(argv[op+1], OUTPUT_MODE);
945 if (outfile == NULL)
946 {
947 printf("** Failed to open %s\n", argv[op+1]);
948 yield = 1;
949 goto EXIT;
950 }
951 }
952
953 /* Set alternative malloc function */
954
955 pcre_malloc = new_malloc;
956 pcre_free = new_free;
957 pcre_stack_malloc = stack_malloc;
958 pcre_stack_free = stack_free;
959
960 /* Heading line unless quiet, then prompt for first regex if stdin */
961
962 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
963
964 /* Main loop */
965
966 while (!done)
967 {
968 pcre *re = NULL;
969 pcre_extra *extra = NULL;
970
971 #if !defined NOPOSIX /* There are still compilers that require no indent */
972 regex_t preg;
973 int do_posix = 0;
974 #endif
975
976 const char *error;
977 unsigned char *p, *pp, *ppp;
978 unsigned char *to_file = NULL;
979 const unsigned char *tables = NULL;
980 unsigned long int true_size, true_study_size = 0;
981 size_t size, regex_gotten_store;
982 int do_study = 0;
983 int do_debug = debug;
984 int do_G = 0;
985 int do_g = 0;
986 int do_showinfo = showinfo;
987 int do_showrest = 0;
988 int do_flip = 0;
989 int erroroffset, len, delimiter, poffset;
990
991 use_utf8 = 0;
992 debug_lengths = 1;
993
994 if (infile == stdin) printf(" re> ");
995 if (extend_inputline(infile, buffer) == NULL) break;
996 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
997 fflush(outfile);
998
999 p = buffer;
1000 while (isspace(*p)) p++;
1001 if (*p == 0) continue;
1002
1003 /* See if the pattern is to be loaded pre-compiled from a file. */
1004
1005 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1006 {
1007 unsigned long int magic, get_options;
1008 uschar sbuf[8];
1009 FILE *f;
1010
1011 p++;
1012 pp = p + (int)strlen((char *)p);
1013 while (isspace(pp[-1])) pp--;
1014 *pp = 0;
1015
1016 f = fopen((char *)p, "rb");
1017 if (f == NULL)
1018 {
1019 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1020 continue;
1021 }
1022
1023 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1024
1025 true_size =
1026 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1027 true_study_size =
1028 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1029
1030 re = (real_pcre *)new_malloc(true_size);
1031 regex_gotten_store = gotten_store;
1032
1033 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1034
1035 magic = ((real_pcre *)re)->magic_number;
1036 if (magic != MAGIC_NUMBER)
1037 {
1038 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1039 {
1040 do_flip = 1;
1041 }
1042 else
1043 {
1044 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1045 fclose(f);
1046 continue;
1047 }
1048 }
1049
1050 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1051 do_flip? " (byte-inverted)" : "", p);
1052
1053 /* Need to know if UTF-8 for printing data strings */
1054
1055 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1056 use_utf8 = (get_options & PCRE_UTF8) != 0;
1057
1058 /* Now see if there is any following study data */
1059
1060 if (true_study_size != 0)
1061 {
1062 pcre_study_data *psd;
1063
1064 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1065 extra->flags = PCRE_EXTRA_STUDY_DATA;
1066
1067 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1068 extra->study_data = psd;
1069
1070 if (fread(psd, 1, true_study_size, f) != true_study_size)
1071 {
1072 FAIL_READ:
1073 fprintf(outfile, "Failed to read data from %s\n", p);
1074 if (extra != NULL) new_free(extra);
1075 if (re != NULL) new_free(re);
1076 fclose(f);
1077 continue;
1078 }
1079 fprintf(outfile, "Study data loaded from %s\n", p);
1080 do_study = 1; /* To get the data output if requested */
1081 }
1082 else fprintf(outfile, "No study data\n");
1083
1084 fclose(f);
1085 goto SHOW_INFO;
1086 }
1087
1088 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1089 the pattern; if is isn't complete, read more. */
1090
1091 delimiter = *p++;
1092
1093 if (isalnum(delimiter) || delimiter == '\\')
1094 {
1095 fprintf(outfile, "** Delimiter must not be alphameric or \\\n");
1096 goto SKIP_DATA;
1097 }
1098
1099 pp = p;
1100 poffset = p - buffer;
1101
1102 for(;;)
1103 {
1104 while (*pp != 0)
1105 {
1106 if (*pp == '\\' && pp[1] != 0) pp++;
1107 else if (*pp == delimiter) break;
1108 pp++;
1109 }
1110 if (*pp != 0) break;
1111 if (infile == stdin) printf(" > ");
1112 if ((pp = extend_inputline(infile, pp)) == NULL)
1113 {
1114 fprintf(outfile, "** Unexpected EOF\n");
1115 done = 1;
1116 goto CONTINUE;
1117 }
1118 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1119 }
1120
1121 /* The buffer may have moved while being extended; reset the start of data
1122 pointer to the correct relative point in the buffer. */
1123
1124 p = buffer + poffset;
1125
1126 /* If the first character after the delimiter is backslash, make
1127 the pattern end with backslash. This is purely to provide a way
1128 of testing for the error message when a pattern ends with backslash. */
1129
1130 if (pp[1] == '\\') *pp++ = '\\';
1131
1132 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1133 for callouts. */
1134
1135 *pp++ = 0;
1136 strcpy((char *)pbuffer, (char *)p);
1137
1138 /* Look for options after final delimiter */
1139
1140 options = 0;
1141 study_options = 0;
1142 log_store = showstore; /* default from command line */
1143
1144 while (*pp != 0)
1145 {
1146 switch (*pp++)
1147 {
1148 case 'f': options |= PCRE_FIRSTLINE; break;
1149 case 'g': do_g = 1; break;
1150 case 'i': options |= PCRE_CASELESS; break;
1151 case 'm': options |= PCRE_MULTILINE; break;
1152 case 's': options |= PCRE_DOTALL; break;
1153 case 'x': options |= PCRE_EXTENDED; break;
1154
1155 case '+': do_showrest = 1; break;
1156 case 'A': options |= PCRE_ANCHORED; break;
1157 case 'B': do_debug = 1; break;
1158 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1159 case 'D': do_debug = do_showinfo = 1; break;
1160 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1161 case 'F': do_flip = 1; break;
1162 case 'G': do_G = 1; break;
1163 case 'I': do_showinfo = 1; break;
1164 case 'J': options |= PCRE_DUPNAMES; break;
1165 case 'M': log_store = 1; break;
1166 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1167
1168 #if !defined NOPOSIX
1169 case 'P': do_posix = 1; break;
1170 #endif
1171
1172 case 'S': do_study = 1; break;
1173 case 'U': options |= PCRE_UNGREEDY; break;
1174 case 'X': options |= PCRE_EXTRA; break;
1175 case 'Z': debug_lengths = 0; break;
1176 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1177 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1178
1179 case 'L':
1180 ppp = pp;
1181 /* The '\r' test here is so that it works on Windows. */
1182 /* The '0' test is just in case this is an unterminated line. */
1183 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1184 *ppp = 0;
1185 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1186 {
1187 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1188 goto SKIP_DATA;
1189 }
1190 locale_set = 1;
1191 tables = pcre_maketables();
1192 pp = ppp;
1193 break;
1194
1195 case '>':
1196 to_file = pp;
1197 while (*pp != 0) pp++;
1198 while (isspace(pp[-1])) pp--;
1199 *pp = 0;
1200 break;
1201
1202 case '<':
1203 {
1204 int x = check_newline(pp, outfile);
1205 if (x == 0) goto SKIP_DATA;
1206 options |= x;
1207 while (*pp++ != '>');
1208 }
1209 break;
1210
1211 case '\r': /* So that it works in Windows */
1212 case '\n':
1213 case ' ':
1214 break;
1215
1216 default:
1217 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1218 goto SKIP_DATA;
1219 }
1220 }
1221
1222 /* Handle compiling via the POSIX interface, which doesn't support the
1223 timing, showing, or debugging options, nor the ability to pass over
1224 local character tables. */
1225
1226 #if !defined NOPOSIX
1227 if (posix || do_posix)
1228 {
1229 int rc;
1230 int cflags = 0;
1231
1232 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1233 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1234 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1235 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1236 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1237
1238 rc = regcomp(&preg, (char *)p, cflags);
1239
1240 /* Compilation failed; go back for another re, skipping to blank line
1241 if non-interactive. */
1242
1243 if (rc != 0)
1244 {
1245 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1246 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1247 goto SKIP_DATA;
1248 }
1249 }
1250
1251 /* Handle compiling via the native interface */
1252
1253 else
1254 #endif /* !defined NOPOSIX */
1255
1256 {
1257 if (timeit > 0)
1258 {
1259 register int i;
1260 clock_t time_taken;
1261 clock_t start_time = clock();
1262 for (i = 0; i < timeit; i++)
1263 {
1264 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1265 if (re != NULL) free(re);
1266 }
1267 time_taken = clock() - start_time;
1268 fprintf(outfile, "Compile time %.4f milliseconds\n",
1269 (((double)time_taken * 1000.0) / (double)timeit) /
1270 (double)CLOCKS_PER_SEC);
1271 }
1272
1273 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1274
1275 /* Compilation failed; go back for another re, skipping to blank line
1276 if non-interactive. */
1277
1278 if (re == NULL)
1279 {
1280 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1281 SKIP_DATA:
1282 if (infile != stdin)
1283 {
1284 for (;;)
1285 {
1286 if (extend_inputline(infile, buffer) == NULL)
1287 {
1288 done = 1;
1289 goto CONTINUE;
1290 }
1291 len = (int)strlen((char *)buffer);
1292 while (len > 0 && isspace(buffer[len-1])) len--;
1293 if (len == 0) break;
1294 }
1295 fprintf(outfile, "\n");
1296 }
1297 goto CONTINUE;
1298 }
1299
1300 /* Compilation succeeded; print data if required. There are now two
1301 info-returning functions. The old one has a limited interface and
1302 returns only limited data. Check that it agrees with the newer one. */
1303
1304 if (log_store)
1305 fprintf(outfile, "Memory allocation (code space): %d\n",
1306 (int)(gotten_store -
1307 sizeof(real_pcre) -
1308 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1309
1310 /* Extract the size for possible writing before possibly flipping it,
1311 and remember the store that was got. */
1312
1313 true_size = ((real_pcre *)re)->size;
1314 regex_gotten_store = gotten_store;
1315
1316 /* If /S was present, study the regexp to generate additional info to
1317 help with the matching. */
1318
1319 if (do_study)
1320 {
1321 if (timeit > 0)
1322 {
1323 register int i;
1324 clock_t time_taken;
1325 clock_t start_time = clock();
1326 for (i = 0; i < timeit; i++)
1327 extra = pcre_study(re, study_options, &error);
1328 time_taken = clock() - start_time;
1329 if (extra != NULL) free(extra);
1330 fprintf(outfile, " Study time %.4f milliseconds\n",
1331 (((double)time_taken * 1000.0) / (double)timeit) /
1332 (double)CLOCKS_PER_SEC);
1333 }
1334 extra = pcre_study(re, study_options, &error);
1335 if (error != NULL)
1336 fprintf(outfile, "Failed to study: %s\n", error);
1337 else if (extra != NULL)
1338 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1339 }
1340
1341 /* If the 'F' option was present, we flip the bytes of all the integer
1342 fields in the regex data block and the study block. This is to make it
1343 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1344 compiled on a different architecture. */
1345
1346 if (do_flip)
1347 {
1348 real_pcre *rre = (real_pcre *)re;
1349 rre->magic_number = byteflip(rre->magic_number, sizeof(rre->magic_number));
1350 rre->size = byteflip(rre->size, sizeof(rre->size));
1351 rre->options = byteflip(rre->options, sizeof(rre->options));
1352 rre->flags = byteflip(rre->flags, sizeof(rre->flags));
1353 rre->top_bracket = byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1354 rre->top_backref = byteflip(rre->top_backref, sizeof(rre->top_backref));
1355 rre->first_byte = byteflip(rre->first_byte, sizeof(rre->first_byte));
1356 rre->req_byte = byteflip(rre->req_byte, sizeof(rre->req_byte));
1357 rre->name_table_offset = byteflip(rre->name_table_offset,
1358 sizeof(rre->name_table_offset));
1359 rre->name_entry_size = byteflip(rre->name_entry_size,
1360 sizeof(rre->name_entry_size));
1361 rre->name_count = byteflip(rre->name_count, sizeof(rre->name_count));
1362
1363 if (extra != NULL)
1364 {
1365 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1366 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1367 rsd->options = byteflip(rsd->options, sizeof(rsd->options));
1368 }
1369 }
1370
1371 /* Extract information from the compiled data if required */
1372
1373 SHOW_INFO:
1374
1375 if (do_debug)
1376 {
1377 fprintf(outfile, "------------------------------------------------------------------\n");
1378 pcre_printint(re, outfile, debug_lengths);
1379 }
1380
1381 if (do_showinfo)
1382 {
1383 unsigned long int get_options, all_options;
1384 #if !defined NOINFOCHECK
1385 int old_first_char, old_options, old_count;
1386 #endif
1387 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1388 hascrorlf;
1389 int nameentrysize, namecount;
1390 const uschar *nametable;
1391
1392 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1393 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1394 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1395 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1396 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1397 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1398 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1399 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1400 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1401 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1402 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1403 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1404
1405 #if !defined NOINFOCHECK
1406 old_count = pcre_info(re, &old_options, &old_first_char);
1407 if (count < 0) fprintf(outfile,
1408 "Error %d from pcre_info()\n", count);
1409 else
1410 {
1411 if (old_count != count) fprintf(outfile,
1412 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1413 old_count);
1414
1415 if (old_first_char != first_char) fprintf(outfile,
1416 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1417 first_char, old_first_char);
1418
1419 if (old_options != (int)get_options) fprintf(outfile,
1420 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1421 get_options, old_options);
1422 }
1423 #endif
1424
1425 if (size != regex_gotten_store) fprintf(outfile,
1426 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1427 (int)size, (int)regex_gotten_store);
1428
1429 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1430 if (backrefmax > 0)
1431 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1432
1433 if (namecount > 0)
1434 {
1435 fprintf(outfile, "Named capturing subpatterns:\n");
1436 while (namecount-- > 0)
1437 {
1438 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1439 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1440 GET2(nametable, 0));
1441 nametable += nameentrysize;
1442 }
1443 }
1444
1445 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1446 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1447
1448 all_options = ((real_pcre *)re)->options;
1449 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1450
1451 if (get_options == 0) fprintf(outfile, "No options\n");
1452 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1453 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1454 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1455 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1456 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1457 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1458 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1459 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1460 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1461 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1462 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1463 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1464 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1465 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1466
1467 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1468
1469 switch (get_options & PCRE_NEWLINE_BITS)
1470 {
1471 case PCRE_NEWLINE_CR:
1472 fprintf(outfile, "Forced newline sequence: CR\n");
1473 break;
1474
1475 case PCRE_NEWLINE_LF:
1476 fprintf(outfile, "Forced newline sequence: LF\n");
1477 break;
1478
1479 case PCRE_NEWLINE_CRLF:
1480 fprintf(outfile, "Forced newline sequence: CRLF\n");
1481 break;
1482
1483 case PCRE_NEWLINE_ANYCRLF:
1484 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1485 break;
1486
1487 case PCRE_NEWLINE_ANY:
1488 fprintf(outfile, "Forced newline sequence: ANY\n");
1489 break;
1490
1491 default:
1492 break;
1493 }
1494
1495 if (first_char == -1)
1496 {
1497 fprintf(outfile, "First char at start or follows newline\n");
1498 }
1499 else if (first_char < 0)
1500 {
1501 fprintf(outfile, "No first char\n");
1502 }
1503 else
1504 {
1505 int ch = first_char & 255;
1506 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1507 "" : " (caseless)";
1508 if (PRINTHEX(ch))
1509 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1510 else
1511 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1512 }
1513
1514 if (need_char < 0)
1515 {
1516 fprintf(outfile, "No need char\n");
1517 }
1518 else
1519 {
1520 int ch = need_char & 255;
1521 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1522 "" : " (caseless)";
1523 if (PRINTHEX(ch))
1524 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1525 else
1526 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1527 }
1528
1529 /* Don't output study size; at present it is in any case a fixed
1530 value, but it varies, depending on the computer architecture, and
1531 so messes up the test suite. (And with the /F option, it might be
1532 flipped.) */
1533
1534 if (do_study)
1535 {
1536 if (extra == NULL)
1537 fprintf(outfile, "Study returned NULL\n");
1538 else
1539 {
1540 uschar *start_bits = NULL;
1541 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1542
1543 if (start_bits == NULL)
1544 fprintf(outfile, "No starting byte set\n");
1545 else
1546 {
1547 int i;
1548 int c = 24;
1549 fprintf(outfile, "Starting byte set: ");
1550 for (i = 0; i < 256; i++)
1551 {
1552 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1553 {
1554 if (c > 75)
1555 {
1556 fprintf(outfile, "\n ");
1557 c = 2;
1558 }
1559 if (PRINTHEX(i) && i != ' ')
1560 {
1561 fprintf(outfile, "%c ", i);
1562 c += 2;
1563 }
1564 else
1565 {
1566 fprintf(outfile, "\\x%02x ", i);
1567 c += 5;
1568 }
1569 }
1570 }
1571 fprintf(outfile, "\n");
1572 }
1573 }
1574 }
1575 }
1576
1577 /* If the '>' option was present, we write out the regex to a file, and
1578 that is all. The first 8 bytes of the file are the regex length and then
1579 the study length, in big-endian order. */
1580
1581 if (to_file != NULL)
1582 {
1583 FILE *f = fopen((char *)to_file, "wb");
1584 if (f == NULL)
1585 {
1586 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1587 }
1588 else
1589 {
1590 uschar sbuf[8];
1591 sbuf[0] = (true_size >> 24) & 255;
1592 sbuf[1] = (true_size >> 16) & 255;
1593 sbuf[2] = (true_size >> 8) & 255;
1594 sbuf[3] = (true_size) & 255;
1595
1596 sbuf[4] = (true_study_size >> 24) & 255;
1597 sbuf[5] = (true_study_size >> 16) & 255;
1598 sbuf[6] = (true_study_size >> 8) & 255;
1599 sbuf[7] = (true_study_size) & 255;
1600
1601 if (fwrite(sbuf, 1, 8, f) < 8 ||
1602 fwrite(re, 1, true_size, f) < true_size)
1603 {
1604 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1605 }
1606 else
1607 {
1608 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1609 if (extra != NULL)
1610 {
1611 if (fwrite(extra->study_data, 1, true_study_size, f) <
1612 true_study_size)
1613 {
1614 fprintf(outfile, "Write error on %s: %s\n", to_file,
1615 strerror(errno));
1616 }
1617 else fprintf(outfile, "Study data written to %s\n", to_file);
1618
1619 }
1620 }
1621 fclose(f);
1622 }
1623
1624 new_free(re);
1625 if (extra != NULL) new_free(extra);
1626 if (tables != NULL) new_free((void *)tables);
1627 continue; /* With next regex */
1628 }
1629 } /* End of non-POSIX compile */
1630
1631 /* Read data lines and test them */
1632
1633 for (;;)
1634 {
1635 uschar *q;
1636 uschar *bptr;
1637 int *use_offsets = offsets;
1638 int use_size_offsets = size_offsets;
1639 int callout_data = 0;
1640 int callout_data_set = 0;
1641 int count, c;
1642 int copystrings = 0;
1643 int find_match_limit = 0;
1644 int getstrings = 0;
1645 int getlist = 0;
1646 int gmatched = 0;
1647 int start_offset = 0;
1648 int g_notempty = 0;
1649 int use_dfa = 0;
1650
1651 options = 0;
1652
1653 *copynames = 0;
1654 *getnames = 0;
1655
1656 copynamesptr = copynames;
1657 getnamesptr = getnames;
1658
1659 pcre_callout = callout;
1660 first_callout = 1;
1661 callout_extra = 0;
1662 callout_count = 0;
1663 callout_fail_count = 999999;
1664 callout_fail_id = -1;
1665 show_malloc = 0;
1666
1667 if (extra != NULL) extra->flags &=
1668 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1669
1670 len = 0;
1671 for (;;)
1672 {
1673 if (infile == stdin) printf("data> ");
1674 if (extend_inputline(infile, buffer + len) == NULL)
1675 {
1676 if (len > 0) break;
1677 done = 1;
1678 goto CONTINUE;
1679 }
1680 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1681 len = (int)strlen((char *)buffer);
1682 if (buffer[len-1] == '\n') break;
1683 }
1684
1685 while (len > 0 && isspace(buffer[len-1])) len--;
1686 buffer[len] = 0;
1687 if (len == 0) break;
1688
1689 p = buffer;
1690 while (isspace(*p)) p++;
1691
1692 bptr = q = dbuffer;
1693 while ((c = *p++) != 0)
1694 {
1695 int i = 0;
1696 int n = 0;
1697
1698 if (c == '\\') switch ((c = *p++))
1699 {
1700 case 'a': c = 7; break;
1701 case 'b': c = '\b'; break;
1702 case 'e': c = 27; break;
1703 case 'f': c = '\f'; break;
1704 case 'n': c = '\n'; break;
1705 case 'r': c = '\r'; break;
1706 case 't': c = '\t'; break;
1707 case 'v': c = '\v'; break;
1708
1709 case '0': case '1': case '2': case '3':
1710 case '4': case '5': case '6': case '7':
1711 c -= '0';
1712 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1713 c = c * 8 + *p++ - '0';
1714
1715 #if !defined NOUTF8
1716 if (use_utf8 && c > 255)
1717 {
1718 unsigned char buff8[8];
1719 int ii, utn;
1720 utn = ord2utf8(c, buff8);
1721 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1722 c = buff8[ii]; /* Last byte */
1723 }
1724 #endif
1725 break;
1726
1727 case 'x':
1728
1729 /* Handle \x{..} specially - new Perl thing for utf8 */
1730
1731 #if !defined NOUTF8
1732 if (*p == '{')
1733 {
1734 unsigned char *pt = p;
1735 c = 0;
1736 while (isxdigit(*(++pt)))
1737 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1738 if (*pt == '}')
1739 {
1740 unsigned char buff8[8];
1741 int ii, utn;
1742 utn = ord2utf8(c, buff8);
1743 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1744 c = buff8[ii]; /* Last byte */
1745 p = pt + 1;
1746 break;
1747 }
1748 /* Not correct form; fall through */
1749 }
1750 #endif
1751
1752 /* Ordinary \x */
1753
1754 c = 0;
1755 while (i++ < 2 && isxdigit(*p))
1756 {
1757 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1758 p++;
1759 }
1760 break;
1761
1762 case 0: /* \ followed by EOF allows for an empty line */
1763 p--;
1764 continue;
1765
1766 case '>':
1767 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1768 continue;
1769
1770 case 'A': /* Option setting */
1771 options |= PCRE_ANCHORED;
1772 continue;
1773
1774 case 'B':
1775 options |= PCRE_NOTBOL;
1776 continue;
1777
1778 case 'C':
1779 if (isdigit(*p)) /* Set copy string */
1780 {
1781 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1782 copystrings |= 1 << n;
1783 }
1784 else if (isalnum(*p))
1785 {
1786 uschar *npp = copynamesptr;
1787 while (isalnum(*p)) *npp++ = *p++;
1788 *npp++ = 0;
1789 *npp = 0;
1790 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1791 if (n < 0)
1792 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1793 copynamesptr = npp;
1794 }
1795 else if (*p == '+')
1796 {
1797 callout_extra = 1;
1798 p++;
1799 }
1800 else if (*p == '-')
1801 {
1802 pcre_callout = NULL;
1803 p++;
1804 }
1805 else if (*p == '!')
1806 {
1807 callout_fail_id = 0;
1808 p++;
1809 while(isdigit(*p))
1810 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1811 callout_fail_count = 0;
1812 if (*p == '!')
1813 {
1814 p++;
1815 while(isdigit(*p))
1816 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1817 }
1818 }
1819 else if (*p == '*')
1820 {
1821 int sign = 1;
1822 callout_data = 0;
1823 if (*(++p) == '-') { sign = -1; p++; }
1824 while(isdigit(*p))
1825 callout_data = callout_data * 10 + *p++ - '0';
1826 callout_data *= sign;
1827 callout_data_set = 1;
1828 }
1829 continue;
1830
1831 #if !defined NODFA
1832 case 'D':
1833 #if !defined NOPOSIX
1834 if (posix || do_posix)
1835 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1836 else
1837 #endif
1838 use_dfa = 1;
1839 continue;
1840
1841 case 'F':
1842 options |= PCRE_DFA_SHORTEST;
1843 continue;
1844 #endif
1845
1846 case 'G':
1847 if (isdigit(*p))
1848 {
1849 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1850 getstrings |= 1 << n;
1851 }
1852 else if (isalnum(*p))
1853 {
1854 uschar *npp = getnamesptr;
1855 while (isalnum(*p)) *npp++ = *p++;
1856 *npp++ = 0;
1857 *npp = 0;
1858 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1859 if (n < 0)
1860 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1861 getnamesptr = npp;
1862 }
1863 continue;
1864
1865 case 'L':
1866 getlist = 1;
1867 continue;
1868
1869 case 'M':
1870 find_match_limit = 1;
1871 continue;
1872
1873 case 'N':
1874 options |= PCRE_NOTEMPTY;
1875 continue;
1876
1877 case 'O':
1878 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1879 if (n > size_offsets_max)
1880 {
1881 size_offsets_max = n;
1882 free(offsets);
1883 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
1884 if (offsets == NULL)
1885 {
1886 printf("** Failed to get %d bytes of memory for offsets vector\n",
1887 (int)(size_offsets_max * sizeof(int)));
1888 yield = 1;
1889 goto EXIT;
1890 }
1891 }
1892 use_size_offsets = n;
1893 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
1894 continue;
1895
1896 case 'P':
1897 options |= PCRE_PARTIAL;
1898 continue;
1899
1900 case 'Q':
1901 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1902 if (extra == NULL)
1903 {
1904 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1905 extra->flags = 0;
1906 }
1907 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
1908 extra->match_limit_recursion = n;
1909 continue;
1910
1911 case 'q':
1912 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1913 if (extra == NULL)
1914 {
1915 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1916 extra->flags = 0;
1917 }
1918 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
1919 extra->match_limit = n;
1920 continue;
1921
1922 #if !defined NODFA
1923 case 'R':
1924 options |= PCRE_DFA_RESTART;
1925 continue;
1926 #endif
1927
1928 case 'S':
1929 show_malloc = 1;
1930 continue;
1931
1932 case 'Z':
1933 options |= PCRE_NOTEOL;
1934 continue;
1935
1936 case '?':
1937 options |= PCRE_NO_UTF8_CHECK;
1938 continue;
1939
1940 case '<':
1941 {
1942 int x = check_newline(p, outfile);
1943 if (x == 0) goto NEXT_DATA;
1944 options |= x;
1945 while (*p++ != '>');
1946 }
1947 continue;
1948 }
1949 *q++ = c;
1950 }
1951 *q = 0;
1952 len = q - dbuffer;
1953
1954 if ((all_use_dfa || use_dfa) && find_match_limit)
1955 {
1956 printf("**Match limit not relevant for DFA matching: ignored\n");
1957 find_match_limit = 0;
1958 }
1959
1960 /* Handle matching via the POSIX interface, which does not
1961 support timing or playing with the match limit or callout data. */
1962
1963 #if !defined NOPOSIX
1964 if (posix || do_posix)
1965 {
1966 int rc;
1967 int eflags = 0;
1968 regmatch_t *pmatch = NULL;
1969 if (use_size_offsets > 0)
1970 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
1971 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1972 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1973
1974 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
1975
1976 if (rc != 0)
1977 {
1978 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1979 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
1980 }
1981 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
1982 != 0)
1983 {
1984 fprintf(outfile, "Matched with REG_NOSUB\n");
1985 }
1986 else
1987 {
1988 size_t i;
1989 for (i = 0; i < (size_t)use_size_offsets; i++)
1990 {
1991 if (pmatch[i].rm_so >= 0)
1992 {
1993 fprintf(outfile, "%2d: ", (int)i);
1994 (void)pchars(dbuffer + pmatch[i].rm_so,
1995 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
1996 fprintf(outfile, "\n");
1997 if (i == 0 && do_showrest)
1998 {
1999 fprintf(outfile, " 0+ ");
2000 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2001 outfile);
2002 fprintf(outfile, "\n");
2003 }
2004 }
2005 }
2006 }
2007 free(pmatch);
2008 }
2009
2010 /* Handle matching via the native interface - repeats for /g and /G */
2011
2012 else
2013 #endif /* !defined NOPOSIX */
2014
2015 for (;; gmatched++) /* Loop for /g or /G */
2016 {
2017 if (timeitm > 0)
2018 {
2019 register int i;
2020 clock_t time_taken;
2021 clock_t start_time = clock();
2022
2023 #if !defined NODFA
2024 if (all_use_dfa || use_dfa)
2025 {
2026 int workspace[1000];
2027 for (i = 0; i < timeitm; i++)
2028 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2029 options | g_notempty, use_offsets, use_size_offsets, workspace,
2030 sizeof(workspace)/sizeof(int));
2031 }
2032 else
2033 #endif
2034
2035 for (i = 0; i < timeitm; i++)
2036 count = pcre_exec(re, extra, (char *)bptr, len,
2037 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2038
2039 time_taken = clock() - start_time;
2040 fprintf(outfile, "Execute time %.4f milliseconds\n",
2041 (((double)time_taken * 1000.0) / (double)timeitm) /
2042 (double)CLOCKS_PER_SEC);
2043 }
2044
2045 /* If find_match_limit is set, we want to do repeated matches with
2046 varying limits in order to find the minimum value for the match limit and
2047 for the recursion limit. */
2048
2049 if (find_match_limit)
2050 {
2051 if (extra == NULL)
2052 {
2053 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2054 extra->flags = 0;
2055 }
2056
2057 (void)check_match_limit(re, extra, bptr, len, start_offset,
2058 options|g_notempty, use_offsets, use_size_offsets,
2059 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2060 PCRE_ERROR_MATCHLIMIT, "match()");
2061
2062 count = check_match_limit(re, extra, bptr, len, start_offset,
2063 options|g_notempty, use_offsets, use_size_offsets,
2064 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2065 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2066 }
2067
2068 /* If callout_data is set, use the interface with additional data */
2069
2070 else if (callout_data_set)
2071 {
2072 if (extra == NULL)
2073 {
2074 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2075 extra->flags = 0;
2076 }
2077 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2078 extra->callout_data = &callout_data;
2079 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2080 options | g_notempty, use_offsets, use_size_offsets);
2081 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2082 }
2083
2084 /* The normal case is just to do the match once, with the default
2085 value of match_limit. */
2086
2087 #if !defined NODFA
2088 else if (all_use_dfa || use_dfa)
2089 {
2090 int workspace[1000];
2091 count = pcre_dfa_exec(re, NULL, (char *)bptr, len, start_offset,
2092 options | g_notempty, use_offsets, use_size_offsets, workspace,
2093 sizeof(workspace)/sizeof(int));
2094 if (count == 0)
2095 {
2096 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2097 count = use_size_offsets/2;
2098 }
2099 }
2100 #endif
2101
2102 else
2103 {
2104 count = pcre_exec(re, extra, (char *)bptr, len,
2105 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2106 if (count == 0)
2107 {
2108 fprintf(outfile, "Matched, but too many substrings\n");
2109 count = use_size_offsets/3;
2110 }
2111 }
2112
2113 /* Matched */
2114
2115 if (count >= 0)
2116 {
2117 int i, maxcount;
2118
2119 #if !defined NODFA
2120 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2121 #endif
2122 maxcount = use_size_offsets/3;
2123
2124 /* This is a check against a lunatic return value. */
2125
2126 if (count > maxcount)
2127 {
2128 fprintf(outfile,
2129 "** PCRE error: returned count %d is too big for offset size %d\n",
2130 count, use_size_offsets);
2131 count = use_size_offsets/3;
2132 if (do_g || do_G)
2133 {
2134 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2135 do_g = do_G = FALSE; /* Break g/G loop */
2136 }
2137 }
2138
2139 for (i = 0; i < count * 2; i += 2)
2140 {
2141 if (use_offsets[i] < 0)
2142 fprintf(outfile, "%2d: <unset>\n", i/2);
2143 else
2144 {
2145 fprintf(outfile, "%2d: ", i/2);
2146 (void)pchars(bptr + use_offsets[i],
2147 use_offsets[i+1] - use_offsets[i], outfile);
2148 fprintf(outfile, "\n");
2149 if (i == 0)
2150 {
2151 if (do_showrest)
2152 {
2153 fprintf(outfile, " 0+ ");
2154 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2155 outfile);
2156 fprintf(outfile, "\n");
2157 }
2158 }
2159 }
2160 }
2161
2162 for (i = 0; i < 32; i++)
2163 {
2164 if ((copystrings & (1 << i)) != 0)
2165 {
2166 char copybuffer[256];
2167 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2168 i, copybuffer, sizeof(copybuffer));
2169 if (rc < 0)
2170 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2171 else
2172 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2173 }
2174 }
2175
2176 for (copynamesptr = copynames;
2177 *copynamesptr != 0;
2178 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2179 {
2180 char copybuffer[256];
2181 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2182 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2183 if (rc < 0)
2184 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2185 else
2186 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2187 }
2188
2189 for (i = 0; i < 32; i++)
2190 {
2191 if ((getstrings & (1 << i)) != 0)
2192 {
2193 const char *substring;
2194 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2195 i, &substring);
2196 if (rc < 0)
2197 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2198 else
2199 {
2200 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2201 pcre_free_substring(substring);
2202 }
2203 }
2204 }
2205
2206 for (getnamesptr = getnames;
2207 *getnamesptr != 0;
2208 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2209 {
2210 const char *substring;
2211 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2212 count, (char *)getnamesptr, &substring);
2213 if (rc < 0)
2214 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2215 else
2216 {
2217 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2218 pcre_free_substring(substring);
2219 }
2220 }
2221
2222 if (getlist)
2223 {
2224 const char **stringlist;
2225 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2226 &stringlist);
2227 if (rc < 0)
2228 fprintf(outfile, "get substring list failed %d\n", rc);
2229 else
2230 {
2231 for (i = 0; i < count; i++)
2232 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2233 if (stringlist[i] != NULL)
2234 fprintf(outfile, "string list not terminated by NULL\n");
2235 /* free((void *)stringlist); */
2236 pcre_free_substring_list(stringlist);
2237 }
2238 }
2239 }
2240
2241 /* There was a partial match */
2242
2243 else if (count == PCRE_ERROR_PARTIAL)
2244 {
2245 fprintf(outfile, "Partial match");
2246 #if !defined NODFA
2247 if ((all_use_dfa || use_dfa) && use_size_offsets > 2)
2248 fprintf(outfile, ": %.*s", use_offsets[1] - use_offsets[0],
2249 bptr + use_offsets[0]);
2250 #endif
2251 fprintf(outfile, "\n");
2252 break; /* Out of the /g loop */
2253 }
2254
2255 /* Failed to match. If this is a /g or /G loop and we previously set
2256 g_notempty after a null match, this is not necessarily the end. We want
2257 to advance the start offset, and continue. We won't be at the end of the
2258 string - that was checked before setting g_notempty.
2259
2260 Complication arises in the case when the newline option is "any" or
2261 "anycrlf". If the previous match was at the end of a line terminated by
2262 CRLF, an advance of one character just passes the \r, whereas we should
2263 prefer the longer newline sequence, as does the code in pcre_exec().
2264 Fudge the offset value to achieve this.
2265
2266 Otherwise, in the case of UTF-8 matching, the advance must be one
2267 character, not one byte. */
2268
2269 else
2270 {
2271 if (g_notempty != 0)
2272 {
2273 int onechar = 1;
2274 unsigned int obits = ((real_pcre *)re)->options;
2275 use_offsets[0] = start_offset;
2276 if ((obits & PCRE_NEWLINE_BITS) == 0)
2277 {
2278 int d;
2279 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2280 obits = (d == '\r')? PCRE_NEWLINE_CR :
2281 (d == '\n')? PCRE_NEWLINE_LF :
2282 (d == ('\r'<<8 | '\n'))? PCRE_NEWLINE_CRLF :
2283 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2284 (d == -1)? PCRE_NEWLINE_ANY : 0;
2285 }
2286 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2287 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2288 &&
2289 start_offset < len - 1 &&
2290 bptr[start_offset] == '\r' &&
2291 bptr[start_offset+1] == '\n')
2292 onechar++;
2293 else if (use_utf8)
2294 {
2295 while (start_offset + onechar < len)
2296 {
2297 int tb = bptr[start_offset+onechar];
2298 if (tb <= 127) break;
2299 tb &= 0xc0;
2300 if (tb != 0 && tb != 0xc0) onechar++;
2301 }
2302 }
2303 use_offsets[1] = start_offset + onechar;
2304 }
2305 else
2306 {
2307 if (count == PCRE_ERROR_NOMATCH)
2308 {
2309 if (gmatched == 0) fprintf(outfile, "No match\n");
2310 }
2311 else fprintf(outfile, "Error %d\n", count);
2312 break; /* Out of the /g loop */
2313 }
2314 }
2315
2316 /* If not /g or /G we are done */
2317
2318 if (!do_g && !do_G) break;
2319
2320 /* If we have matched an empty string, first check to see if we are at
2321 the end of the subject. If so, the /g loop is over. Otherwise, mimic
2322 what Perl's /g options does. This turns out to be rather cunning. First
2323 we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
2324 same point. If this fails (picked up above) we advance to the next
2325 character. */
2326
2327 g_notempty = 0;
2328
2329 if (use_offsets[0] == use_offsets[1])
2330 {
2331 if (use_offsets[0] == len) break;
2332 g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
2333 }
2334
2335 /* For /g, update the start offset, leaving the rest alone */
2336
2337 if (do_g) start_offset = use_offsets[1];
2338
2339 /* For /G, update the pointer and length */
2340
2341 else
2342 {
2343 bptr += use_offsets[1];
2344 len -= use_offsets[1];
2345 }
2346 } /* End of loop for /g and /G */
2347
2348 NEXT_DATA: continue;
2349 } /* End of loop for data lines */
2350
2351 CONTINUE:
2352
2353 #if !defined NOPOSIX
2354 if (posix || do_posix) regfree(&preg);
2355 #endif
2356
2357 if (re != NULL) new_free(re);
2358 if (extra != NULL) new_free(extra);
2359 if (tables != NULL)
2360 {
2361 new_free((void *)tables);
2362 setlocale(LC_CTYPE, "C");
2363 locale_set = 0;
2364 }
2365 }
2366
2367 if (infile == stdin) fprintf(outfile, "\n");
2368
2369 EXIT:
2370
2371 if (infile != NULL && infile != stdin) fclose(infile);
2372 if (outfile != NULL && outfile != stdout) fclose(outfile);
2373
2374 free(buffer);
2375 free(dbuffer);
2376 free(pbuffer);
2377 free(offsets);
2378
2379 return yield;
2380 }
2381
2382 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5