/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Contents of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log


Revision 537 - (show annotations)
Tue Jun 8 15:28:26 2010 UTC (9 years, 5 months ago) by ph10
File MIME type: text/plain
File size: 75141 byte(s)
Error occurred while calculating annotation data.
Add newline in pcretest output if the last data line doesn't have one.
1 /*************************************************
2 * PCRE testing program *
3 *************************************************/
4
5 /* This program was hacked up as a tester for PCRE. I really should have
6 written it more tidily in the first place. Will I ever learn? It has grown and
7 been extended and consequently is now rather, er, *very* untidy in places.
8
9 -----------------------------------------------------------------------------
10 Redistribution and use in source and binary forms, with or without
11 modification, are permitted provided that the following conditions are met:
12
13 * Redistributions of source code must retain the above copyright notice,
14 this list of conditions and the following disclaimer.
15
16 * Redistributions in binary form must reproduce the above copyright
17 notice, this list of conditions and the following disclaimer in the
18 documentation and/or other materials provided with the distribution.
19
20 * Neither the name of the University of Cambridge nor the names of its
21 contributors may be used to endorse or promote products derived from
22 this software without specific prior written permission.
23
24 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
25 AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
26 IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
27 ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
28 LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
29 CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
30 SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
31 INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
32 CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
33 ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
34 POSSIBILITY OF SUCH DAMAGE.
35 -----------------------------------------------------------------------------
36 */
37
38
39 #ifdef HAVE_CONFIG_H
40 #include "config.h"
41 #endif
42
43 #include <ctype.h>
44 #include <stdio.h>
45 #include <string.h>
46 #include <stdlib.h>
47 #include <time.h>
48 #include <locale.h>
49 #include <errno.h>
50
51 #ifdef SUPPORT_LIBREADLINE
52 #ifdef HAVE_UNISTD_H
53 #include <unistd.h>
54 #endif
55 #include <readline/readline.h>
56 #include <readline/history.h>
57 #endif
58
59
60 /* A number of things vary for Windows builds. Originally, pcretest opened its
61 input and output without "b"; then I was told that "b" was needed in some
62 environments, so it was added for release 5.0 to both the input and output. (It
63 makes no difference on Unix-like systems.) Later I was told that it is wrong
64 for the input on Windows. I've now abstracted the modes into two macros that
65 are set here, to make it easier to fiddle with them, and removed "b" from the
66 input mode under Windows. */
67
68 #if defined(_WIN32) || defined(WIN32)
69 #include <io.h> /* For _setmode() */
70 #include <fcntl.h> /* For _O_BINARY */
71 #define INPUT_MODE "r"
72 #define OUTPUT_MODE "wb"
73
74 #ifndef isatty
75 #define isatty _isatty /* This is what Windows calls them, I'm told, */
76 #endif /* though in some environments they seem to */
77 /* be already defined, hence the #ifndefs. */
78 #ifndef fileno
79 #define fileno _fileno
80 #endif
81
82 #else
83 #include <sys/time.h> /* These two includes are needed */
84 #include <sys/resource.h> /* for setrlimit(). */
85 #define INPUT_MODE "rb"
86 #define OUTPUT_MODE "wb"
87 #endif
88
89
90 /* We have to include pcre_internal.h because we need the internal info for
91 displaying the results of pcre_study() and we also need to know about the
92 internal macros, structures, and other internal data values; pcretest has
93 "inside information" compared to a program that strictly follows the PCRE API.
94
95 Although pcre_internal.h does itself include pcre.h, we explicitly include it
96 here before pcre_internal.h so that the PCRE_EXP_xxx macros get set
97 appropriately for an application, not for building PCRE. */
98
99 #include "pcre.h"
100 #include "pcre_internal.h"
101
102 /* We need access to some of the data tables that PCRE uses. So as not to have
103 to keep two copies, we include the source file here, changing the names of the
104 external symbols to prevent clashes. */
105
106 #define _pcre_ucp_gentype ucp_gentype
107 #define _pcre_utf8_table1 utf8_table1
108 #define _pcre_utf8_table1_size utf8_table1_size
109 #define _pcre_utf8_table2 utf8_table2
110 #define _pcre_utf8_table3 utf8_table3
111 #define _pcre_utf8_table4 utf8_table4
112 #define _pcre_utt utt
113 #define _pcre_utt_size utt_size
114 #define _pcre_utt_names utt_names
115 #define _pcre_OP_lengths OP_lengths
116
117 #include "pcre_tables.c"
118
119 /* We also need the pcre_printint() function for printing out compiled
120 patterns. This function is in a separate file so that it can be included in
121 pcre_compile.c when that module is compiled with debugging enabled. It needs to
122 know which case is being compiled. */
123
124 #define COMPILING_PCRETEST
125 #include "pcre_printint.src"
126
127 /* The definition of the macro PRINTABLE, which determines whether to print an
128 output character as-is or as a hex value when showing compiled patterns, is
129 contained in the printint.src file. We uses it here also, in cases when the
130 locale has not been explicitly changed, so as to get consistent output from
131 systems that differ in their output from isprint() even in the "C" locale. */
132
133 #define PRINTHEX(c) (locale_set? isprint(c) : PRINTABLE(c))
134
135 /* It is possible to compile this test program without including support for
136 testing the POSIX interface, though this is not available via the standard
137 Makefile. */
138
139 #if !defined NOPOSIX
140 #include "pcreposix.h"
141 #endif
142
143 /* It is also possible, for the benefit of the version currently imported into
144 Exim, to build pcretest without support for UTF8 (define NOUTF8), without the
145 interface to the DFA matcher (NODFA), and without the doublecheck of the old
146 "info" function (define NOINFOCHECK). In fact, we automatically cut out the
147 UTF8 support if PCRE is built without it. */
148
149 #ifndef SUPPORT_UTF8
150 #ifndef NOUTF8
151 #define NOUTF8
152 #endif
153 #endif
154
155
156 /* Other parameters */
157
158 #ifndef CLOCKS_PER_SEC
159 #ifdef CLK_TCK
160 #define CLOCKS_PER_SEC CLK_TCK
161 #else
162 #define CLOCKS_PER_SEC 100
163 #endif
164 #endif
165
166 /* This is the default loop count for timing. */
167
168 #define LOOPREPEAT 500000
169
170 /* Static variables */
171
172 static FILE *outfile;
173 static int log_store = 0;
174 static int callout_count;
175 static int callout_extra;
176 static int callout_fail_count;
177 static int callout_fail_id;
178 static int debug_lengths;
179 static int first_callout;
180 static int locale_set = 0;
181 static int show_malloc;
182 static int use_utf8;
183 static size_t gotten_store;
184
185 /* The buffers grow automatically if very long input lines are encountered. */
186
187 static int buffer_size = 50000;
188 static uschar *buffer = NULL;
189 static uschar *dbuffer = NULL;
190 static uschar *pbuffer = NULL;
191
192
193
194 /*************************************************
195 * Read or extend an input line *
196 *************************************************/
197
198 /* Input lines are read into buffer, but both patterns and data lines can be
199 continued over multiple input lines. In addition, if the buffer fills up, we
200 want to automatically expand it so as to be able to handle extremely large
201 lines that are needed for certain stress tests. When the input buffer is
202 expanded, the other two buffers must also be expanded likewise, and the
203 contents of pbuffer, which are a copy of the input for callouts, must be
204 preserved (for when expansion happens for a data line). This is not the most
205 optimal way of handling this, but hey, this is just a test program!
206
207 Arguments:
208 f the file to read
209 start where in buffer to start (this *must* be within buffer)
210 prompt for stdin or readline()
211
212 Returns: pointer to the start of new data
213 could be a copy of start, or could be moved
214 NULL if no data read and EOF reached
215 */
216
217 static uschar *
218 extend_inputline(FILE *f, uschar *start, const char *prompt)
219 {
220 uschar *here = start;
221
222 for (;;)
223 {
224 int rlen = (int)(buffer_size - (here - buffer));
225
226 if (rlen > 1000)
227 {
228 int dlen;
229
230 /* If libreadline support is required, use readline() to read a line if the
231 input is a terminal. Note that readline() removes the trailing newline, so
232 we must put it back again, to be compatible with fgets(). */
233
234 #ifdef SUPPORT_LIBREADLINE
235 if (isatty(fileno(f)))
236 {
237 size_t len;
238 char *s = readline(prompt);
239 if (s == NULL) return (here == start)? NULL : start;
240 len = strlen(s);
241 if (len > 0) add_history(s);
242 if (len > rlen - 1) len = rlen - 1;
243 memcpy(here, s, len);
244 here[len] = '\n';
245 here[len+1] = 0;
246 free(s);
247 }
248 else
249 #endif
250
251 /* Read the next line by normal means, prompting if the file is stdin. */
252
253 {
254 if (f == stdin) printf("%s", prompt);
255 if (fgets((char *)here, rlen, f) == NULL)
256 return (here == start)? NULL : start;
257 }
258
259 dlen = (int)strlen((char *)here);
260 if (dlen > 0 && here[dlen - 1] == '\n') return start;
261 here += dlen;
262 }
263
264 else
265 {
266 int new_buffer_size = 2*buffer_size;
267 uschar *new_buffer = (unsigned char *)malloc(new_buffer_size);
268 uschar *new_dbuffer = (unsigned char *)malloc(new_buffer_size);
269 uschar *new_pbuffer = (unsigned char *)malloc(new_buffer_size);
270
271 if (new_buffer == NULL || new_dbuffer == NULL || new_pbuffer == NULL)
272 {
273 fprintf(stderr, "pcretest: malloc(%d) failed\n", new_buffer_size);
274 exit(1);
275 }
276
277 memcpy(new_buffer, buffer, buffer_size);
278 memcpy(new_pbuffer, pbuffer, buffer_size);
279
280 buffer_size = new_buffer_size;
281
282 start = new_buffer + (start - buffer);
283 here = new_buffer + (here - buffer);
284
285 free(buffer);
286 free(dbuffer);
287 free(pbuffer);
288
289 buffer = new_buffer;
290 dbuffer = new_dbuffer;
291 pbuffer = new_pbuffer;
292 }
293 }
294
295 return NULL; /* Control never gets here */
296 }
297
298
299
300
301
302
303
304 /*************************************************
305 * Read number from string *
306 *************************************************/
307
308 /* We don't use strtoul() because SunOS4 doesn't have it. Rather than mess
309 around with conditional compilation, just do the job by hand. It is only used
310 for unpicking arguments, so just keep it simple.
311
312 Arguments:
313 str string to be converted
314 endptr where to put the end pointer
315
316 Returns: the unsigned long
317 */
318
319 static int
320 get_value(unsigned char *str, unsigned char **endptr)
321 {
322 int result = 0;
323 while(*str != 0 && isspace(*str)) str++;
324 while (isdigit(*str)) result = result * 10 + (int)(*str++ - '0');
325 *endptr = str;
326 return(result);
327 }
328
329
330
331
332 /*************************************************
333 * Convert UTF-8 string to value *
334 *************************************************/
335
336 /* This function takes one or more bytes that represents a UTF-8 character,
337 and returns the value of the character.
338
339 Argument:
340 utf8bytes a pointer to the byte vector
341 vptr a pointer to an int to receive the value
342
343 Returns: > 0 => the number of bytes consumed
344 -6 to 0 => malformed UTF-8 character at offset = (-return)
345 */
346
347 #if !defined NOUTF8
348
349 static int
350 utf82ord(unsigned char *utf8bytes, int *vptr)
351 {
352 int c = *utf8bytes++;
353 int d = c;
354 int i, j, s;
355
356 for (i = -1; i < 6; i++) /* i is number of additional bytes */
357 {
358 if ((d & 0x80) == 0) break;
359 d <<= 1;
360 }
361
362 if (i == -1) { *vptr = c; return 1; } /* ascii character */
363 if (i == 0 || i == 6) return 0; /* invalid UTF-8 */
364
365 /* i now has a value in the range 1-5 */
366
367 s = 6*i;
368 d = (c & utf8_table3[i]) << s;
369
370 for (j = 0; j < i; j++)
371 {
372 c = *utf8bytes++;
373 if ((c & 0xc0) != 0x80) return -(j+1);
374 s -= 6;
375 d |= (c & 0x3f) << s;
376 }
377
378 /* Check that encoding was the correct unique one */
379
380 for (j = 0; j < utf8_table1_size; j++)
381 if (d <= utf8_table1[j]) break;
382 if (j != i) return -(i+1);
383
384 /* Valid value */
385
386 *vptr = d;
387 return i+1;
388 }
389
390 #endif
391
392
393
394 /*************************************************
395 * Convert character value to UTF-8 *
396 *************************************************/
397
398 /* This function takes an integer value in the range 0 - 0x7fffffff
399 and encodes it as a UTF-8 character in 0 to 6 bytes.
400
401 Arguments:
402 cvalue the character value
403 utf8bytes pointer to buffer for result - at least 6 bytes long
404
405 Returns: number of characters placed in the buffer
406 */
407
408 #if !defined NOUTF8
409
410 static int
411 ord2utf8(int cvalue, uschar *utf8bytes)
412 {
413 register int i, j;
414 for (i = 0; i < utf8_table1_size; i++)
415 if (cvalue <= utf8_table1[i]) break;
416 utf8bytes += i;
417 for (j = i; j > 0; j--)
418 {
419 *utf8bytes-- = 0x80 | (cvalue & 0x3f);
420 cvalue >>= 6;
421 }
422 *utf8bytes = utf8_table2[i] | cvalue;
423 return i + 1;
424 }
425
426 #endif
427
428
429
430 /*************************************************
431 * Print character string *
432 *************************************************/
433
434 /* Character string printing function. Must handle UTF-8 strings in utf8
435 mode. Yields number of characters printed. If handed a NULL file, just counts
436 chars without printing. */
437
438 static int pchars(unsigned char *p, int length, FILE *f)
439 {
440 int c = 0;
441 int yield = 0;
442
443 while (length-- > 0)
444 {
445 #if !defined NOUTF8
446 if (use_utf8)
447 {
448 int rc = utf82ord(p, &c);
449
450 if (rc > 0 && rc <= length + 1) /* Mustn't run over the end */
451 {
452 length -= rc - 1;
453 p += rc;
454 if (PRINTHEX(c))
455 {
456 if (f != NULL) fprintf(f, "%c", c);
457 yield++;
458 }
459 else
460 {
461 int n = 4;
462 if (f != NULL) fprintf(f, "\\x{%02x}", c);
463 yield += (n <= 0x000000ff)? 2 :
464 (n <= 0x00000fff)? 3 :
465 (n <= 0x0000ffff)? 4 :
466 (n <= 0x000fffff)? 5 : 6;
467 }
468 continue;
469 }
470 }
471 #endif
472
473 /* Not UTF-8, or malformed UTF-8 */
474
475 c = *p++;
476 if (PRINTHEX(c))
477 {
478 if (f != NULL) fprintf(f, "%c", c);
479 yield++;
480 }
481 else
482 {
483 if (f != NULL) fprintf(f, "\\x%02x", c);
484 yield += 4;
485 }
486 }
487
488 return yield;
489 }
490
491
492
493 /*************************************************
494 * Callout function *
495 *************************************************/
496
497 /* Called from PCRE as a result of the (?C) item. We print out where we are in
498 the match. Yield zero unless more callouts than the fail count, or the callout
499 data is not zero. */
500
501 static int callout(pcre_callout_block *cb)
502 {
503 FILE *f = (first_callout | callout_extra)? outfile : NULL;
504 int i, pre_start, post_start, subject_length;
505
506 if (callout_extra)
507 {
508 fprintf(f, "Callout %d: last capture = %d\n",
509 cb->callout_number, cb->capture_last);
510
511 for (i = 0; i < cb->capture_top * 2; i += 2)
512 {
513 if (cb->offset_vector[i] < 0)
514 fprintf(f, "%2d: <unset>\n", i/2);
515 else
516 {
517 fprintf(f, "%2d: ", i/2);
518 (void)pchars((unsigned char *)cb->subject + cb->offset_vector[i],
519 cb->offset_vector[i+1] - cb->offset_vector[i], f);
520 fprintf(f, "\n");
521 }
522 }
523 }
524
525 /* Re-print the subject in canonical form, the first time or if giving full
526 datails. On subsequent calls in the same match, we use pchars just to find the
527 printed lengths of the substrings. */
528
529 if (f != NULL) fprintf(f, "--->");
530
531 pre_start = pchars((unsigned char *)cb->subject, cb->start_match, f);
532 post_start = pchars((unsigned char *)(cb->subject + cb->start_match),
533 cb->current_position - cb->start_match, f);
534
535 subject_length = pchars((unsigned char *)cb->subject, cb->subject_length, NULL);
536
537 (void)pchars((unsigned char *)(cb->subject + cb->current_position),
538 cb->subject_length - cb->current_position, f);
539
540 if (f != NULL) fprintf(f, "\n");
541
542 /* Always print appropriate indicators, with callout number if not already
543 shown. For automatic callouts, show the pattern offset. */
544
545 if (cb->callout_number == 255)
546 {
547 fprintf(outfile, "%+3d ", cb->pattern_position);
548 if (cb->pattern_position > 99) fprintf(outfile, "\n ");
549 }
550 else
551 {
552 if (callout_extra) fprintf(outfile, " ");
553 else fprintf(outfile, "%3d ", cb->callout_number);
554 }
555
556 for (i = 0; i < pre_start; i++) fprintf(outfile, " ");
557 fprintf(outfile, "^");
558
559 if (post_start > 0)
560 {
561 for (i = 0; i < post_start - 1; i++) fprintf(outfile, " ");
562 fprintf(outfile, "^");
563 }
564
565 for (i = 0; i < subject_length - pre_start - post_start + 4; i++)
566 fprintf(outfile, " ");
567
568 fprintf(outfile, "%.*s", (cb->next_item_length == 0)? 1 : cb->next_item_length,
569 pbuffer + cb->pattern_position);
570
571 fprintf(outfile, "\n");
572 first_callout = 0;
573
574 if (cb->callout_data != NULL)
575 {
576 int callout_data = *((int *)(cb->callout_data));
577 if (callout_data != 0)
578 {
579 fprintf(outfile, "Callout data = %d\n", callout_data);
580 return callout_data;
581 }
582 }
583
584 return (cb->callout_number != callout_fail_id)? 0 :
585 (++callout_count >= callout_fail_count)? 1 : 0;
586 }
587
588
589 /*************************************************
590 * Local malloc functions *
591 *************************************************/
592
593 /* Alternative malloc function, to test functionality and show the size of the
594 compiled re. */
595
596 static void *new_malloc(size_t size)
597 {
598 void *block = malloc(size);
599 gotten_store = size;
600 if (show_malloc)
601 fprintf(outfile, "malloc %3d %p\n", (int)size, block);
602 return block;
603 }
604
605 static void new_free(void *block)
606 {
607 if (show_malloc)
608 fprintf(outfile, "free %p\n", block);
609 free(block);
610 }
611
612
613 /* For recursion malloc/free, to test stacking calls */
614
615 static void *stack_malloc(size_t size)
616 {
617 void *block = malloc(size);
618 if (show_malloc)
619 fprintf(outfile, "stack_malloc %3d %p\n", (int)size, block);
620 return block;
621 }
622
623 static void stack_free(void *block)
624 {
625 if (show_malloc)
626 fprintf(outfile, "stack_free %p\n", block);
627 free(block);
628 }
629
630
631 /*************************************************
632 * Call pcre_fullinfo() *
633 *************************************************/
634
635 /* Get one piece of information from the pcre_fullinfo() function */
636
637 static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
638 {
639 int rc;
640 if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
641 fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
642 }
643
644
645
646 /*************************************************
647 * Byte flipping function *
648 *************************************************/
649
650 static unsigned long int
651 byteflip(unsigned long int value, int n)
652 {
653 if (n == 2) return ((value & 0x00ff) << 8) | ((value & 0xff00) >> 8);
654 return ((value & 0x000000ff) << 24) |
655 ((value & 0x0000ff00) << 8) |
656 ((value & 0x00ff0000) >> 8) |
657 ((value & 0xff000000) >> 24);
658 }
659
660
661
662
663 /*************************************************
664 * Check match or recursion limit *
665 *************************************************/
666
667 static int
668 check_match_limit(pcre *re, pcre_extra *extra, uschar *bptr, int len,
669 int start_offset, int options, int *use_offsets, int use_size_offsets,
670 int flag, unsigned long int *limit, int errnumber, const char *msg)
671 {
672 int count;
673 int min = 0;
674 int mid = 64;
675 int max = -1;
676
677 extra->flags |= flag;
678
679 for (;;)
680 {
681 *limit = mid;
682
683 count = pcre_exec(re, extra, (char *)bptr, len, start_offset, options,
684 use_offsets, use_size_offsets);
685
686 if (count == errnumber)
687 {
688 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
689 min = mid;
690 mid = (mid == max - 1)? max : (max > 0)? (min + max)/2 : mid*2;
691 }
692
693 else if (count >= 0 || count == PCRE_ERROR_NOMATCH ||
694 count == PCRE_ERROR_PARTIAL)
695 {
696 if (mid == min + 1)
697 {
698 fprintf(outfile, "Minimum %s limit = %d\n", msg, mid);
699 break;
700 }
701 /* fprintf(outfile, "Testing %s limit = %d\n", msg, mid); */
702 max = mid;
703 mid = (min + mid)/2;
704 }
705 else break; /* Some other error */
706 }
707
708 extra->flags &= ~flag;
709 return count;
710 }
711
712
713
714 /*************************************************
715 * Case-independent strncmp() function *
716 *************************************************/
717
718 /*
719 Arguments:
720 s first string
721 t second string
722 n number of characters to compare
723
724 Returns: < 0, = 0, or > 0, according to the comparison
725 */
726
727 static int
728 strncmpic(uschar *s, uschar *t, int n)
729 {
730 while (n--)
731 {
732 int c = tolower(*s++) - tolower(*t++);
733 if (c) return c;
734 }
735 return 0;
736 }
737
738
739
740 /*************************************************
741 * Check newline indicator *
742 *************************************************/
743
744 /* This is used both at compile and run-time to check for <xxx> escapes. Print
745 a message and return 0 if there is no match.
746
747 Arguments:
748 p points after the leading '<'
749 f file for error message
750
751 Returns: appropriate PCRE_NEWLINE_xxx flags, or 0
752 */
753
754 static int
755 check_newline(uschar *p, FILE *f)
756 {
757 if (strncmpic(p, (uschar *)"cr>", 3) == 0) return PCRE_NEWLINE_CR;
758 if (strncmpic(p, (uschar *)"lf>", 3) == 0) return PCRE_NEWLINE_LF;
759 if (strncmpic(p, (uschar *)"crlf>", 5) == 0) return PCRE_NEWLINE_CRLF;
760 if (strncmpic(p, (uschar *)"anycrlf>", 8) == 0) return PCRE_NEWLINE_ANYCRLF;
761 if (strncmpic(p, (uschar *)"any>", 4) == 0) return PCRE_NEWLINE_ANY;
762 if (strncmpic(p, (uschar *)"bsr_anycrlf>", 12) == 0) return PCRE_BSR_ANYCRLF;
763 if (strncmpic(p, (uschar *)"bsr_unicode>", 12) == 0) return PCRE_BSR_UNICODE;
764 fprintf(f, "Unknown newline type at: <%s\n", p);
765 return 0;
766 }
767
768
769
770 /*************************************************
771 * Usage function *
772 *************************************************/
773
774 static void
775 usage(void)
776 {
777 printf("Usage: pcretest [options] [<input file> [<output file>]]\n\n");
778 printf("Input and output default to stdin and stdout.\n");
779 #ifdef SUPPORT_LIBREADLINE
780 printf("If input is a terminal, readline() is used to read from it.\n");
781 #else
782 printf("This version of pcretest is not linked with readline().\n");
783 #endif
784 printf("\nOptions:\n");
785 printf(" -b show compiled code (bytecode)\n");
786 printf(" -C show PCRE compile-time options and exit\n");
787 printf(" -d debug: show compiled code and information (-b and -i)\n");
788 #if !defined NODFA
789 printf(" -dfa force DFA matching for all subjects\n");
790 #endif
791 printf(" -help show usage information\n");
792 printf(" -i show information about compiled patterns\n"
793 " -M find MATCH_LIMIT minimum for each subject\n"
794 " -m output memory used information\n"
795 " -o <n> set size of offsets vector to <n>\n");
796 #if !defined NOPOSIX
797 printf(" -p use POSIX interface\n");
798 #endif
799 printf(" -q quiet: do not output PCRE version number at start\n");
800 printf(" -S <n> set stack size to <n> megabytes\n");
801 printf(" -s output store (memory) used information\n"
802 " -t time compilation and execution\n");
803 printf(" -t <n> time compilation and execution, repeating <n> times\n");
804 printf(" -tm time execution (matching) only\n");
805 printf(" -tm <n> time execution (matching) only, repeating <n> times\n");
806 }
807
808
809
810 /*************************************************
811 * Main Program *
812 *************************************************/
813
814 /* Read lines from named file or stdin and write to named file or stdout; lines
815 consist of a regular expression, in delimiters and optionally followed by
816 options, followed by a set of test data, terminated by an empty line. */
817
818 int main(int argc, char **argv)
819 {
820 FILE *infile = stdin;
821 int options = 0;
822 int study_options = 0;
823 int default_find_match_limit = FALSE;
824 int op = 1;
825 int timeit = 0;
826 int timeitm = 0;
827 int showinfo = 0;
828 int showstore = 0;
829 int quiet = 0;
830 int size_offsets = 45;
831 int size_offsets_max;
832 int *offsets = NULL;
833 #if !defined NOPOSIX
834 int posix = 0;
835 #endif
836 int debug = 0;
837 int done = 0;
838 int all_use_dfa = 0;
839 int yield = 0;
840 int stack_size;
841
842 /* These vectors store, end-to-end, a list of captured substring names. Assume
843 that 1024 is plenty long enough for the few names we'll be testing. */
844
845 uschar copynames[1024];
846 uschar getnames[1024];
847
848 uschar *copynamesptr;
849 uschar *getnamesptr;
850
851 /* Get buffers from malloc() so that Electric Fence will check their misuse
852 when I am debugging. They grow automatically when very long lines are read. */
853
854 buffer = (unsigned char *)malloc(buffer_size);
855 dbuffer = (unsigned char *)malloc(buffer_size);
856 pbuffer = (unsigned char *)malloc(buffer_size);
857
858 /* The outfile variable is static so that new_malloc can use it. */
859
860 outfile = stdout;
861
862 /* The following _setmode() stuff is some Windows magic that tells its runtime
863 library to translate CRLF into a single LF character. At least, that's what
864 I've been told: never having used Windows I take this all on trust. Originally
865 it set 0x8000, but then I was advised that _O_BINARY was better. */
866
867 #if defined(_WIN32) || defined(WIN32)
868 _setmode( _fileno( stdout ), _O_BINARY );
869 #endif
870
871 /* Scan options */
872
873 while (argc > 1 && argv[op][0] == '-')
874 {
875 unsigned char *endptr;
876
877 if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
878 showstore = 1;
879 else if (strcmp(argv[op], "-q") == 0) quiet = 1;
880 else if (strcmp(argv[op], "-b") == 0) debug = 1;
881 else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
882 else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
883 else if (strcmp(argv[op], "-M") == 0) default_find_match_limit = TRUE;
884 #if !defined NODFA
885 else if (strcmp(argv[op], "-dfa") == 0) all_use_dfa = 1;
886 #endif
887 else if (strcmp(argv[op], "-o") == 0 && argc > 2 &&
888 ((size_offsets = get_value((unsigned char *)argv[op+1], &endptr)),
889 *endptr == 0))
890 {
891 op++;
892 argc--;
893 }
894 else if (strcmp(argv[op], "-t") == 0 || strcmp(argv[op], "-tm") == 0)
895 {
896 int both = argv[op][2] == 0;
897 int temp;
898 if (argc > 2 && (temp = get_value((unsigned char *)argv[op+1], &endptr),
899 *endptr == 0))
900 {
901 timeitm = temp;
902 op++;
903 argc--;
904 }
905 else timeitm = LOOPREPEAT;
906 if (both) timeit = timeitm;
907 }
908 else if (strcmp(argv[op], "-S") == 0 && argc > 2 &&
909 ((stack_size = get_value((unsigned char *)argv[op+1], &endptr)),
910 *endptr == 0))
911 {
912 #if defined(_WIN32) || defined(WIN32)
913 printf("PCRE: -S not supported on this OS\n");
914 exit(1);
915 #else
916 int rc;
917 struct rlimit rlim;
918 getrlimit(RLIMIT_STACK, &rlim);
919 rlim.rlim_cur = stack_size * 1024 * 1024;
920 rc = setrlimit(RLIMIT_STACK, &rlim);
921 if (rc != 0)
922 {
923 printf("PCRE: setrlimit() failed with error %d\n", rc);
924 exit(1);
925 }
926 op++;
927 argc--;
928 #endif
929 }
930 #if !defined NOPOSIX
931 else if (strcmp(argv[op], "-p") == 0) posix = 1;
932 #endif
933 else if (strcmp(argv[op], "-C") == 0)
934 {
935 int rc;
936 unsigned long int lrc;
937 printf("PCRE version %s\n", pcre_version());
938 printf("Compiled with\n");
939 (void)pcre_config(PCRE_CONFIG_UTF8, &rc);
940 printf(" %sUTF-8 support\n", rc? "" : "No ");
941 (void)pcre_config(PCRE_CONFIG_UNICODE_PROPERTIES, &rc);
942 printf(" %sUnicode properties support\n", rc? "" : "No ");
943 (void)pcre_config(PCRE_CONFIG_NEWLINE, &rc);
944 /* Note that these values are always the ASCII values, even
945 in EBCDIC environments. CR is 13 and NL is 10. */
946 printf(" Newline sequence is %s\n", (rc == 13)? "CR" :
947 (rc == 10)? "LF" : (rc == (13<<8 | 10))? "CRLF" :
948 (rc == -2)? "ANYCRLF" :
949 (rc == -1)? "ANY" : "???");
950 (void)pcre_config(PCRE_CONFIG_BSR, &rc);
951 printf(" \\R matches %s\n", rc? "CR, LF, or CRLF only" :
952 "all Unicode newlines");
953 (void)pcre_config(PCRE_CONFIG_LINK_SIZE, &rc);
954 printf(" Internal link size = %d\n", rc);
955 (void)pcre_config(PCRE_CONFIG_POSIX_MALLOC_THRESHOLD, &rc);
956 printf(" POSIX malloc threshold = %d\n", rc);
957 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT, &lrc);
958 printf(" Default match limit = %ld\n", lrc);
959 (void)pcre_config(PCRE_CONFIG_MATCH_LIMIT_RECURSION, &lrc);
960 printf(" Default recursion depth limit = %ld\n", lrc);
961 (void)pcre_config(PCRE_CONFIG_STACKRECURSE, &rc);
962 printf(" Match recursion uses %s\n", rc? "stack" : "heap");
963 goto EXIT;
964 }
965 else if (strcmp(argv[op], "-help") == 0 ||
966 strcmp(argv[op], "--help") == 0)
967 {
968 usage();
969 goto EXIT;
970 }
971 else
972 {
973 printf("** Unknown or malformed option %s\n", argv[op]);
974 usage();
975 yield = 1;
976 goto EXIT;
977 }
978 op++;
979 argc--;
980 }
981
982 /* Get the store for the offsets vector, and remember what it was */
983
984 size_offsets_max = size_offsets;
985 offsets = (int *)malloc(size_offsets_max * sizeof(int));
986 if (offsets == NULL)
987 {
988 printf("** Failed to get %d bytes of memory for offsets vector\n",
989 (int)(size_offsets_max * sizeof(int)));
990 yield = 1;
991 goto EXIT;
992 }
993
994 /* Sort out the input and output files */
995
996 if (argc > 1)
997 {
998 infile = fopen(argv[op], INPUT_MODE);
999 if (infile == NULL)
1000 {
1001 printf("** Failed to open %s\n", argv[op]);
1002 yield = 1;
1003 goto EXIT;
1004 }
1005 }
1006
1007 if (argc > 2)
1008 {
1009 outfile = fopen(argv[op+1], OUTPUT_MODE);
1010 if (outfile == NULL)
1011 {
1012 printf("** Failed to open %s\n", argv[op+1]);
1013 yield = 1;
1014 goto EXIT;
1015 }
1016 }
1017
1018 /* Set alternative malloc function */
1019
1020 pcre_malloc = new_malloc;
1021 pcre_free = new_free;
1022 pcre_stack_malloc = stack_malloc;
1023 pcre_stack_free = stack_free;
1024
1025 /* Heading line unless quiet, then prompt for first regex if stdin */
1026
1027 if (!quiet) fprintf(outfile, "PCRE version %s\n\n", pcre_version());
1028
1029 /* Main loop */
1030
1031 while (!done)
1032 {
1033 pcre *re = NULL;
1034 pcre_extra *extra = NULL;
1035
1036 #if !defined NOPOSIX /* There are still compilers that require no indent */
1037 regex_t preg;
1038 int do_posix = 0;
1039 #endif
1040
1041 const char *error;
1042 unsigned char *markptr;
1043 unsigned char *p, *pp, *ppp;
1044 unsigned char *to_file = NULL;
1045 const unsigned char *tables = NULL;
1046 unsigned long int true_size, true_study_size = 0;
1047 size_t size, regex_gotten_store;
1048 int do_mark = 0;
1049 int do_study = 0;
1050 int do_debug = debug;
1051 int do_G = 0;
1052 int do_g = 0;
1053 int do_showinfo = showinfo;
1054 int do_showrest = 0;
1055 int do_flip = 0;
1056 int erroroffset, len, delimiter, poffset;
1057
1058 use_utf8 = 0;
1059 debug_lengths = 1;
1060
1061 if (extend_inputline(infile, buffer, " re> ") == NULL) break;
1062 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1063 fflush(outfile);
1064
1065 p = buffer;
1066 while (isspace(*p)) p++;
1067 if (*p == 0) continue;
1068
1069 /* See if the pattern is to be loaded pre-compiled from a file. */
1070
1071 if (*p == '<' && strchr((char *)(p+1), '<') == NULL)
1072 {
1073 unsigned long int magic, get_options;
1074 uschar sbuf[8];
1075 FILE *f;
1076
1077 p++;
1078 pp = p + (int)strlen((char *)p);
1079 while (isspace(pp[-1])) pp--;
1080 *pp = 0;
1081
1082 f = fopen((char *)p, "rb");
1083 if (f == NULL)
1084 {
1085 fprintf(outfile, "Failed to open %s: %s\n", p, strerror(errno));
1086 continue;
1087 }
1088
1089 if (fread(sbuf, 1, 8, f) != 8) goto FAIL_READ;
1090
1091 true_size =
1092 (sbuf[0] << 24) | (sbuf[1] << 16) | (sbuf[2] << 8) | sbuf[3];
1093 true_study_size =
1094 (sbuf[4] << 24) | (sbuf[5] << 16) | (sbuf[6] << 8) | sbuf[7];
1095
1096 re = (real_pcre *)new_malloc(true_size);
1097 regex_gotten_store = gotten_store;
1098
1099 if (fread(re, 1, true_size, f) != true_size) goto FAIL_READ;
1100
1101 magic = ((real_pcre *)re)->magic_number;
1102 if (magic != MAGIC_NUMBER)
1103 {
1104 if (byteflip(magic, sizeof(magic)) == MAGIC_NUMBER)
1105 {
1106 do_flip = 1;
1107 }
1108 else
1109 {
1110 fprintf(outfile, "Data in %s is not a compiled PCRE regex\n", p);
1111 fclose(f);
1112 continue;
1113 }
1114 }
1115
1116 fprintf(outfile, "Compiled regex%s loaded from %s\n",
1117 do_flip? " (byte-inverted)" : "", p);
1118
1119 /* Need to know if UTF-8 for printing data strings */
1120
1121 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1122 use_utf8 = (get_options & PCRE_UTF8) != 0;
1123
1124 /* Now see if there is any following study data */
1125
1126 if (true_study_size != 0)
1127 {
1128 pcre_study_data *psd;
1129
1130 extra = (pcre_extra *)new_malloc(sizeof(pcre_extra) + true_study_size);
1131 extra->flags = PCRE_EXTRA_STUDY_DATA;
1132
1133 psd = (pcre_study_data *)(((char *)extra) + sizeof(pcre_extra));
1134 extra->study_data = psd;
1135
1136 if (fread(psd, 1, true_study_size, f) != true_study_size)
1137 {
1138 FAIL_READ:
1139 fprintf(outfile, "Failed to read data from %s\n", p);
1140 if (extra != NULL) new_free(extra);
1141 if (re != NULL) new_free(re);
1142 fclose(f);
1143 continue;
1144 }
1145 fprintf(outfile, "Study data loaded from %s\n", p);
1146 do_study = 1; /* To get the data output if requested */
1147 }
1148 else fprintf(outfile, "No study data\n");
1149
1150 fclose(f);
1151 goto SHOW_INFO;
1152 }
1153
1154 /* In-line pattern (the usual case). Get the delimiter and seek the end of
1155 the pattern; if is isn't complete, read more. */
1156
1157 delimiter = *p++;
1158
1159 if (isalnum(delimiter) || delimiter == '\\')
1160 {
1161 fprintf(outfile, "** Delimiter must not be alphanumeric or \\\n");
1162 goto SKIP_DATA;
1163 }
1164
1165 pp = p;
1166 poffset = (int)(p - buffer);
1167
1168 for(;;)
1169 {
1170 while (*pp != 0)
1171 {
1172 if (*pp == '\\' && pp[1] != 0) pp++;
1173 else if (*pp == delimiter) break;
1174 pp++;
1175 }
1176 if (*pp != 0) break;
1177 if ((pp = extend_inputline(infile, pp, " > ")) == NULL)
1178 {
1179 fprintf(outfile, "** Unexpected EOF\n");
1180 done = 1;
1181 goto CONTINUE;
1182 }
1183 if (infile != stdin) fprintf(outfile, "%s", (char *)pp);
1184 }
1185
1186 /* The buffer may have moved while being extended; reset the start of data
1187 pointer to the correct relative point in the buffer. */
1188
1189 p = buffer + poffset;
1190
1191 /* If the first character after the delimiter is backslash, make
1192 the pattern end with backslash. This is purely to provide a way
1193 of testing for the error message when a pattern ends with backslash. */
1194
1195 if (pp[1] == '\\') *pp++ = '\\';
1196
1197 /* Terminate the pattern at the delimiter, and save a copy of the pattern
1198 for callouts. */
1199
1200 *pp++ = 0;
1201 strcpy((char *)pbuffer, (char *)p);
1202
1203 /* Look for options after final delimiter */
1204
1205 options = 0;
1206 study_options = 0;
1207 log_store = showstore; /* default from command line */
1208
1209 while (*pp != 0)
1210 {
1211 switch (*pp++)
1212 {
1213 case 'f': options |= PCRE_FIRSTLINE; break;
1214 case 'g': do_g = 1; break;
1215 case 'i': options |= PCRE_CASELESS; break;
1216 case 'm': options |= PCRE_MULTILINE; break;
1217 case 's': options |= PCRE_DOTALL; break;
1218 case 'x': options |= PCRE_EXTENDED; break;
1219
1220 case '+': do_showrest = 1; break;
1221 case 'A': options |= PCRE_ANCHORED; break;
1222 case 'B': do_debug = 1; break;
1223 case 'C': options |= PCRE_AUTO_CALLOUT; break;
1224 case 'D': do_debug = do_showinfo = 1; break;
1225 case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
1226 case 'F': do_flip = 1; break;
1227 case 'G': do_G = 1; break;
1228 case 'I': do_showinfo = 1; break;
1229 case 'J': options |= PCRE_DUPNAMES; break;
1230 case 'K': do_mark = 1; break;
1231 case 'M': log_store = 1; break;
1232 case 'N': options |= PCRE_NO_AUTO_CAPTURE; break;
1233
1234 #if !defined NOPOSIX
1235 case 'P': do_posix = 1; break;
1236 #endif
1237
1238 case 'S': do_study = 1; break;
1239 case 'U': options |= PCRE_UNGREEDY; break;
1240 case 'W': options |= PCRE_UCP; break;
1241 case 'X': options |= PCRE_EXTRA; break;
1242 case 'Z': debug_lengths = 0; break;
1243 case '8': options |= PCRE_UTF8; use_utf8 = 1; break;
1244 case '?': options |= PCRE_NO_UTF8_CHECK; break;
1245
1246 case 'L':
1247 ppp = pp;
1248 /* The '\r' test here is so that it works on Windows. */
1249 /* The '0' test is just in case this is an unterminated line. */
1250 while (*ppp != 0 && *ppp != '\n' && *ppp != '\r' && *ppp != ' ') ppp++;
1251 *ppp = 0;
1252 if (setlocale(LC_CTYPE, (const char *)pp) == NULL)
1253 {
1254 fprintf(outfile, "** Failed to set locale \"%s\"\n", pp);
1255 goto SKIP_DATA;
1256 }
1257 locale_set = 1;
1258 tables = pcre_maketables();
1259 pp = ppp;
1260 break;
1261
1262 case '>':
1263 to_file = pp;
1264 while (*pp != 0) pp++;
1265 while (isspace(pp[-1])) pp--;
1266 *pp = 0;
1267 break;
1268
1269 case '<':
1270 {
1271 if (strncmpic(pp, (uschar *)"JS>", 3) == 0)
1272 {
1273 options |= PCRE_JAVASCRIPT_COMPAT;
1274 pp += 3;
1275 }
1276 else
1277 {
1278 int x = check_newline(pp, outfile);
1279 if (x == 0) goto SKIP_DATA;
1280 options |= x;
1281 while (*pp++ != '>');
1282 }
1283 }
1284 break;
1285
1286 case '\r': /* So that it works in Windows */
1287 case '\n':
1288 case ' ':
1289 break;
1290
1291 default:
1292 fprintf(outfile, "** Unknown option '%c'\n", pp[-1]);
1293 goto SKIP_DATA;
1294 }
1295 }
1296
1297 /* Handle compiling via the POSIX interface, which doesn't support the
1298 timing, showing, or debugging options, nor the ability to pass over
1299 local character tables. */
1300
1301 #if !defined NOPOSIX
1302 if (posix || do_posix)
1303 {
1304 int rc;
1305 int cflags = 0;
1306
1307 if ((options & PCRE_CASELESS) != 0) cflags |= REG_ICASE;
1308 if ((options & PCRE_MULTILINE) != 0) cflags |= REG_NEWLINE;
1309 if ((options & PCRE_DOTALL) != 0) cflags |= REG_DOTALL;
1310 if ((options & PCRE_NO_AUTO_CAPTURE) != 0) cflags |= REG_NOSUB;
1311 if ((options & PCRE_UTF8) != 0) cflags |= REG_UTF8;
1312 if ((options & PCRE_UCP) != 0) cflags |= REG_UCP;
1313 if ((options & PCRE_UNGREEDY) != 0) cflags |= REG_UNGREEDY;
1314
1315 rc = regcomp(&preg, (char *)p, cflags);
1316
1317 /* Compilation failed; go back for another re, skipping to blank line
1318 if non-interactive. */
1319
1320 if (rc != 0)
1321 {
1322 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
1323 fprintf(outfile, "Failed: POSIX code %d: %s\n", rc, buffer);
1324 goto SKIP_DATA;
1325 }
1326 }
1327
1328 /* Handle compiling via the native interface */
1329
1330 else
1331 #endif /* !defined NOPOSIX */
1332
1333 {
1334 unsigned long int get_options;
1335
1336 if (timeit > 0)
1337 {
1338 register int i;
1339 clock_t time_taken;
1340 clock_t start_time = clock();
1341 for (i = 0; i < timeit; i++)
1342 {
1343 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1344 if (re != NULL) free(re);
1345 }
1346 time_taken = clock() - start_time;
1347 fprintf(outfile, "Compile time %.4f milliseconds\n",
1348 (((double)time_taken * 1000.0) / (double)timeit) /
1349 (double)CLOCKS_PER_SEC);
1350 }
1351
1352 re = pcre_compile((char *)p, options, &error, &erroroffset, tables);
1353
1354 /* Compilation failed; go back for another re, skipping to blank line
1355 if non-interactive. */
1356
1357 if (re == NULL)
1358 {
1359 fprintf(outfile, "Failed: %s at offset %d\n", error, erroroffset);
1360 SKIP_DATA:
1361 if (infile != stdin)
1362 {
1363 for (;;)
1364 {
1365 if (extend_inputline(infile, buffer, NULL) == NULL)
1366 {
1367 done = 1;
1368 goto CONTINUE;
1369 }
1370 len = (int)strlen((char *)buffer);
1371 while (len > 0 && isspace(buffer[len-1])) len--;
1372 if (len == 0) break;
1373 }
1374 fprintf(outfile, "\n");
1375 }
1376 goto CONTINUE;
1377 }
1378
1379 /* Compilation succeeded. It is now possible to set the UTF-8 option from
1380 within the regex; check for this so that we know how to process the data
1381 lines. */
1382
1383 new_info(re, NULL, PCRE_INFO_OPTIONS, &get_options);
1384 if ((get_options & PCRE_UTF8) != 0) use_utf8 = 1;
1385
1386 /* Print information if required. There are now two info-returning
1387 functions. The old one has a limited interface and returns only limited
1388 data. Check that it agrees with the newer one. */
1389
1390 if (log_store)
1391 fprintf(outfile, "Memory allocation (code space): %d\n",
1392 (int)(gotten_store -
1393 sizeof(real_pcre) -
1394 ((real_pcre *)re)->name_count * ((real_pcre *)re)->name_entry_size));
1395
1396 /* Extract the size for possible writing before possibly flipping it,
1397 and remember the store that was got. */
1398
1399 true_size = ((real_pcre *)re)->size;
1400 regex_gotten_store = gotten_store;
1401
1402 /* If /S was present, study the regexp to generate additional info to
1403 help with the matching. */
1404
1405 if (do_study)
1406 {
1407 if (timeit > 0)
1408 {
1409 register int i;
1410 clock_t time_taken;
1411 clock_t start_time = clock();
1412 for (i = 0; i < timeit; i++)
1413 extra = pcre_study(re, study_options, &error);
1414 time_taken = clock() - start_time;
1415 if (extra != NULL) free(extra);
1416 fprintf(outfile, " Study time %.4f milliseconds\n",
1417 (((double)time_taken * 1000.0) / (double)timeit) /
1418 (double)CLOCKS_PER_SEC);
1419 }
1420 extra = pcre_study(re, study_options, &error);
1421 if (error != NULL)
1422 fprintf(outfile, "Failed to study: %s\n", error);
1423 else if (extra != NULL)
1424 true_study_size = ((pcre_study_data *)(extra->study_data))->size;
1425 }
1426
1427 /* If /K was present, we set up for handling MARK data. */
1428
1429 if (do_mark)
1430 {
1431 if (extra == NULL)
1432 {
1433 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
1434 extra->flags = 0;
1435 }
1436 extra->mark = &markptr;
1437 extra->flags |= PCRE_EXTRA_MARK;
1438 }
1439
1440 /* If the 'F' option was present, we flip the bytes of all the integer
1441 fields in the regex data block and the study block. This is to make it
1442 possible to test PCRE's handling of byte-flipped patterns, e.g. those
1443 compiled on a different architecture. */
1444
1445 if (do_flip)
1446 {
1447 real_pcre *rre = (real_pcre *)re;
1448 rre->magic_number =
1449 byteflip(rre->magic_number, sizeof(rre->magic_number));
1450 rre->size = byteflip(rre->size, sizeof(rre->size));
1451 rre->options = byteflip(rre->options, sizeof(rre->options));
1452 rre->flags = (pcre_uint16)byteflip(rre->flags, sizeof(rre->flags));
1453 rre->top_bracket =
1454 (pcre_uint16)byteflip(rre->top_bracket, sizeof(rre->top_bracket));
1455 rre->top_backref =
1456 (pcre_uint16)byteflip(rre->top_backref, sizeof(rre->top_backref));
1457 rre->first_byte =
1458 (pcre_uint16)byteflip(rre->first_byte, sizeof(rre->first_byte));
1459 rre->req_byte =
1460 (pcre_uint16)byteflip(rre->req_byte, sizeof(rre->req_byte));
1461 rre->name_table_offset = (pcre_uint16)byteflip(rre->name_table_offset,
1462 sizeof(rre->name_table_offset));
1463 rre->name_entry_size = (pcre_uint16)byteflip(rre->name_entry_size,
1464 sizeof(rre->name_entry_size));
1465 rre->name_count = (pcre_uint16)byteflip(rre->name_count,
1466 sizeof(rre->name_count));
1467
1468 if (extra != NULL)
1469 {
1470 pcre_study_data *rsd = (pcre_study_data *)(extra->study_data);
1471 rsd->size = byteflip(rsd->size, sizeof(rsd->size));
1472 rsd->flags = byteflip(rsd->flags, sizeof(rsd->flags));
1473 rsd->minlength = byteflip(rsd->minlength, sizeof(rsd->minlength));
1474 }
1475 }
1476
1477 /* Extract information from the compiled data if required */
1478
1479 SHOW_INFO:
1480
1481 if (do_debug)
1482 {
1483 fprintf(outfile, "------------------------------------------------------------------\n");
1484 pcre_printint(re, outfile, debug_lengths);
1485 }
1486
1487 /* We already have the options in get_options (see above) */
1488
1489 if (do_showinfo)
1490 {
1491 unsigned long int all_options;
1492 #if !defined NOINFOCHECK
1493 int old_first_char, old_options, old_count;
1494 #endif
1495 int count, backrefmax, first_char, need_char, okpartial, jchanged,
1496 hascrorlf;
1497 int nameentrysize, namecount;
1498 const uschar *nametable;
1499
1500 new_info(re, NULL, PCRE_INFO_SIZE, &size);
1501 new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
1502 new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
1503 new_info(re, NULL, PCRE_INFO_FIRSTBYTE, &first_char);
1504 new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
1505 new_info(re, NULL, PCRE_INFO_NAMEENTRYSIZE, &nameentrysize);
1506 new_info(re, NULL, PCRE_INFO_NAMECOUNT, &namecount);
1507 new_info(re, NULL, PCRE_INFO_NAMETABLE, (void *)&nametable);
1508 new_info(re, NULL, PCRE_INFO_OKPARTIAL, &okpartial);
1509 new_info(re, NULL, PCRE_INFO_JCHANGED, &jchanged);
1510 new_info(re, NULL, PCRE_INFO_HASCRORLF, &hascrorlf);
1511
1512 #if !defined NOINFOCHECK
1513 old_count = pcre_info(re, &old_options, &old_first_char);
1514 if (count < 0) fprintf(outfile,
1515 "Error %d from pcre_info()\n", count);
1516 else
1517 {
1518 if (old_count != count) fprintf(outfile,
1519 "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
1520 old_count);
1521
1522 if (old_first_char != first_char) fprintf(outfile,
1523 "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
1524 first_char, old_first_char);
1525
1526 if (old_options != (int)get_options) fprintf(outfile,
1527 "Options disagreement: pcre_fullinfo=%ld pcre_info=%d\n",
1528 get_options, old_options);
1529 }
1530 #endif
1531
1532 if (size != regex_gotten_store) fprintf(outfile,
1533 "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
1534 (int)size, (int)regex_gotten_store);
1535
1536 fprintf(outfile, "Capturing subpattern count = %d\n", count);
1537 if (backrefmax > 0)
1538 fprintf(outfile, "Max back reference = %d\n", backrefmax);
1539
1540 if (namecount > 0)
1541 {
1542 fprintf(outfile, "Named capturing subpatterns:\n");
1543 while (namecount-- > 0)
1544 {
1545 fprintf(outfile, " %s %*s%3d\n", nametable + 2,
1546 nameentrysize - 3 - (int)strlen((char *)nametable + 2), "",
1547 GET2(nametable, 0));
1548 nametable += nameentrysize;
1549 }
1550 }
1551
1552 if (!okpartial) fprintf(outfile, "Partial matching not supported\n");
1553 if (hascrorlf) fprintf(outfile, "Contains explicit CR or LF match\n");
1554
1555 all_options = ((real_pcre *)re)->options;
1556 if (do_flip) all_options = byteflip(all_options, sizeof(all_options));
1557
1558 if (get_options == 0) fprintf(outfile, "No options\n");
1559 else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s%s\n",
1560 ((get_options & PCRE_ANCHORED) != 0)? " anchored" : "",
1561 ((get_options & PCRE_CASELESS) != 0)? " caseless" : "",
1562 ((get_options & PCRE_EXTENDED) != 0)? " extended" : "",
1563 ((get_options & PCRE_MULTILINE) != 0)? " multiline" : "",
1564 ((get_options & PCRE_FIRSTLINE) != 0)? " firstline" : "",
1565 ((get_options & PCRE_DOTALL) != 0)? " dotall" : "",
1566 ((get_options & PCRE_BSR_ANYCRLF) != 0)? " bsr_anycrlf" : "",
1567 ((get_options & PCRE_BSR_UNICODE) != 0)? " bsr_unicode" : "",
1568 ((get_options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
1569 ((get_options & PCRE_EXTRA) != 0)? " extra" : "",
1570 ((get_options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
1571 ((get_options & PCRE_NO_AUTO_CAPTURE) != 0)? " no_auto_capture" : "",
1572 ((get_options & PCRE_UTF8) != 0)? " utf8" : "",
1573 ((get_options & PCRE_UCP) != 0)? " ucp" : "",
1574 ((get_options & PCRE_NO_UTF8_CHECK) != 0)? " no_utf8_check" : "",
1575 ((get_options & PCRE_DUPNAMES) != 0)? " dupnames" : "");
1576
1577 if (jchanged) fprintf(outfile, "Duplicate name status changes\n");
1578
1579 switch (get_options & PCRE_NEWLINE_BITS)
1580 {
1581 case PCRE_NEWLINE_CR:
1582 fprintf(outfile, "Forced newline sequence: CR\n");
1583 break;
1584
1585 case PCRE_NEWLINE_LF:
1586 fprintf(outfile, "Forced newline sequence: LF\n");
1587 break;
1588
1589 case PCRE_NEWLINE_CRLF:
1590 fprintf(outfile, "Forced newline sequence: CRLF\n");
1591 break;
1592
1593 case PCRE_NEWLINE_ANYCRLF:
1594 fprintf(outfile, "Forced newline sequence: ANYCRLF\n");
1595 break;
1596
1597 case PCRE_NEWLINE_ANY:
1598 fprintf(outfile, "Forced newline sequence: ANY\n");
1599 break;
1600
1601 default:
1602 break;
1603 }
1604
1605 if (first_char == -1)
1606 {
1607 fprintf(outfile, "First char at start or follows newline\n");
1608 }
1609 else if (first_char < 0)
1610 {
1611 fprintf(outfile, "No first char\n");
1612 }
1613 else
1614 {
1615 int ch = first_char & 255;
1616 const char *caseless = ((first_char & REQ_CASELESS) == 0)?
1617 "" : " (caseless)";
1618 if (PRINTHEX(ch))
1619 fprintf(outfile, "First char = \'%c\'%s\n", ch, caseless);
1620 else
1621 fprintf(outfile, "First char = %d%s\n", ch, caseless);
1622 }
1623
1624 if (need_char < 0)
1625 {
1626 fprintf(outfile, "No need char\n");
1627 }
1628 else
1629 {
1630 int ch = need_char & 255;
1631 const char *caseless = ((need_char & REQ_CASELESS) == 0)?
1632 "" : " (caseless)";
1633 if (PRINTHEX(ch))
1634 fprintf(outfile, "Need char = \'%c\'%s\n", ch, caseless);
1635 else
1636 fprintf(outfile, "Need char = %d%s\n", ch, caseless);
1637 }
1638
1639 /* Don't output study size; at present it is in any case a fixed
1640 value, but it varies, depending on the computer architecture, and
1641 so messes up the test suite. (And with the /F option, it might be
1642 flipped.) */
1643
1644 if (do_study)
1645 {
1646 if (extra == NULL)
1647 fprintf(outfile, "Study returned NULL\n");
1648 else
1649 {
1650 uschar *start_bits = NULL;
1651 int minlength;
1652
1653 new_info(re, extra, PCRE_INFO_MINLENGTH, &minlength);
1654 fprintf(outfile, "Subject length lower bound = %d\n", minlength);
1655
1656 new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
1657 if (start_bits == NULL)
1658 fprintf(outfile, "No set of starting bytes\n");
1659 else
1660 {
1661 int i;
1662 int c = 24;
1663 fprintf(outfile, "Starting byte set: ");
1664 for (i = 0; i < 256; i++)
1665 {
1666 if ((start_bits[i/8] & (1<<(i&7))) != 0)
1667 {
1668 if (c > 75)
1669 {
1670 fprintf(outfile, "\n ");
1671 c = 2;
1672 }
1673 if (PRINTHEX(i) && i != ' ')
1674 {
1675 fprintf(outfile, "%c ", i);
1676 c += 2;
1677 }
1678 else
1679 {
1680 fprintf(outfile, "\\x%02x ", i);
1681 c += 5;
1682 }
1683 }
1684 }
1685 fprintf(outfile, "\n");
1686 }
1687 }
1688 }
1689 }
1690
1691 /* If the '>' option was present, we write out the regex to a file, and
1692 that is all. The first 8 bytes of the file are the regex length and then
1693 the study length, in big-endian order. */
1694
1695 if (to_file != NULL)
1696 {
1697 FILE *f = fopen((char *)to_file, "wb");
1698 if (f == NULL)
1699 {
1700 fprintf(outfile, "Unable to open %s: %s\n", to_file, strerror(errno));
1701 }
1702 else
1703 {
1704 uschar sbuf[8];
1705 sbuf[0] = (uschar)((true_size >> 24) & 255);
1706 sbuf[1] = (uschar)((true_size >> 16) & 255);
1707 sbuf[2] = (uschar)((true_size >> 8) & 255);
1708 sbuf[3] = (uschar)((true_size) & 255);
1709
1710 sbuf[4] = (uschar)((true_study_size >> 24) & 255);
1711 sbuf[5] = (uschar)((true_study_size >> 16) & 255);
1712 sbuf[6] = (uschar)((true_study_size >> 8) & 255);
1713 sbuf[7] = (uschar)((true_study_size) & 255);
1714
1715 if (fwrite(sbuf, 1, 8, f) < 8 ||
1716 fwrite(re, 1, true_size, f) < true_size)
1717 {
1718 fprintf(outfile, "Write error on %s: %s\n", to_file, strerror(errno));
1719 }
1720 else
1721 {
1722 fprintf(outfile, "Compiled regex written to %s\n", to_file);
1723 if (extra != NULL)
1724 {
1725 if (fwrite(extra->study_data, 1, true_study_size, f) <
1726 true_study_size)
1727 {
1728 fprintf(outfile, "Write error on %s: %s\n", to_file,
1729 strerror(errno));
1730 }
1731 else fprintf(outfile, "Study data written to %s\n", to_file);
1732
1733 }
1734 }
1735 fclose(f);
1736 }
1737
1738 new_free(re);
1739 if (extra != NULL) new_free(extra);
1740 if (tables != NULL) new_free((void *)tables);
1741 continue; /* With next regex */
1742 }
1743 } /* End of non-POSIX compile */
1744
1745 /* Read data lines and test them */
1746
1747 for (;;)
1748 {
1749 uschar *q;
1750 uschar *bptr;
1751 int *use_offsets = offsets;
1752 int use_size_offsets = size_offsets;
1753 int callout_data = 0;
1754 int callout_data_set = 0;
1755 int count, c;
1756 int copystrings = 0;
1757 int find_match_limit = default_find_match_limit;
1758 int getstrings = 0;
1759 int getlist = 0;
1760 int gmatched = 0;
1761 int start_offset = 0;
1762 int g_notempty = 0;
1763 int use_dfa = 0;
1764
1765 options = 0;
1766
1767 *copynames = 0;
1768 *getnames = 0;
1769
1770 copynamesptr = copynames;
1771 getnamesptr = getnames;
1772
1773 pcre_callout = callout;
1774 first_callout = 1;
1775 callout_extra = 0;
1776 callout_count = 0;
1777 callout_fail_count = 999999;
1778 callout_fail_id = -1;
1779 show_malloc = 0;
1780
1781 if (extra != NULL) extra->flags &=
1782 ~(PCRE_EXTRA_MATCH_LIMIT|PCRE_EXTRA_MATCH_LIMIT_RECURSION);
1783
1784 len = 0;
1785 for (;;)
1786 {
1787 if (extend_inputline(infile, buffer + len, "data> ") == NULL)
1788 {
1789 if (len > 0) /* Reached EOF without hitting a newline */
1790 {
1791 fprintf(outfile, "\n");
1792 break;
1793 }
1794 done = 1;
1795 goto CONTINUE;
1796 }
1797 if (infile != stdin) fprintf(outfile, "%s", (char *)buffer);
1798 len = (int)strlen((char *)buffer);
1799 if (buffer[len-1] == '\n') break;
1800 }
1801
1802 while (len > 0 && isspace(buffer[len-1])) len--;
1803 buffer[len] = 0;
1804 if (len == 0) break;
1805
1806 p = buffer;
1807 while (isspace(*p)) p++;
1808
1809 bptr = q = dbuffer;
1810 while ((c = *p++) != 0)
1811 {
1812 int i = 0;
1813 int n = 0;
1814
1815 if (c == '\\') switch ((c = *p++))
1816 {
1817 case 'a': c = 7; break;
1818 case 'b': c = '\b'; break;
1819 case 'e': c = 27; break;
1820 case 'f': c = '\f'; break;
1821 case 'n': c = '\n'; break;
1822 case 'r': c = '\r'; break;
1823 case 't': c = '\t'; break;
1824 case 'v': c = '\v'; break;
1825
1826 case '0': case '1': case '2': case '3':
1827 case '4': case '5': case '6': case '7':
1828 c -= '0';
1829 while (i++ < 2 && isdigit(*p) && *p != '8' && *p != '9')
1830 c = c * 8 + *p++ - '0';
1831
1832 #if !defined NOUTF8
1833 if (use_utf8 && c > 255)
1834 {
1835 unsigned char buff8[8];
1836 int ii, utn;
1837 utn = ord2utf8(c, buff8);
1838 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1839 c = buff8[ii]; /* Last byte */
1840 }
1841 #endif
1842 break;
1843
1844 case 'x':
1845
1846 /* Handle \x{..} specially - new Perl thing for utf8 */
1847
1848 #if !defined NOUTF8
1849 if (*p == '{')
1850 {
1851 unsigned char *pt = p;
1852 c = 0;
1853 while (isxdigit(*(++pt)))
1854 c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
1855 if (*pt == '}')
1856 {
1857 unsigned char buff8[8];
1858 int ii, utn;
1859 if (use_utf8)
1860 {
1861 utn = ord2utf8(c, buff8);
1862 for (ii = 0; ii < utn - 1; ii++) *q++ = buff8[ii];
1863 c = buff8[ii]; /* Last byte */
1864 }
1865 else
1866 {
1867 if (c > 255)
1868 fprintf(outfile, "** Character \\x{%x} is greater than 255 and "
1869 "UTF-8 mode is not enabled.\n"
1870 "** Truncation will probably give the wrong result.\n", c);
1871 }
1872 p = pt + 1;
1873 break;
1874 }
1875 /* Not correct form; fall through */
1876 }
1877 #endif
1878
1879 /* Ordinary \x */
1880
1881 c = 0;
1882 while (i++ < 2 && isxdigit(*p))
1883 {
1884 c = c * 16 + tolower(*p) - ((isdigit(*p))? '0' : 'W');
1885 p++;
1886 }
1887 break;
1888
1889 case 0: /* \ followed by EOF allows for an empty line */
1890 p--;
1891 continue;
1892
1893 case '>':
1894 while(isdigit(*p)) start_offset = start_offset * 10 + *p++ - '0';
1895 continue;
1896
1897 case 'A': /* Option setting */
1898 options |= PCRE_ANCHORED;
1899 continue;
1900
1901 case 'B':
1902 options |= PCRE_NOTBOL;
1903 continue;
1904
1905 case 'C':
1906 if (isdigit(*p)) /* Set copy string */
1907 {
1908 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1909 copystrings |= 1 << n;
1910 }
1911 else if (isalnum(*p))
1912 {
1913 uschar *npp = copynamesptr;
1914 while (isalnum(*p)) *npp++ = *p++;
1915 *npp++ = 0;
1916 *npp = 0;
1917 n = pcre_get_stringnumber(re, (char *)copynamesptr);
1918 if (n < 0)
1919 fprintf(outfile, "no parentheses with name \"%s\"\n", copynamesptr);
1920 copynamesptr = npp;
1921 }
1922 else if (*p == '+')
1923 {
1924 callout_extra = 1;
1925 p++;
1926 }
1927 else if (*p == '-')
1928 {
1929 pcre_callout = NULL;
1930 p++;
1931 }
1932 else if (*p == '!')
1933 {
1934 callout_fail_id = 0;
1935 p++;
1936 while(isdigit(*p))
1937 callout_fail_id = callout_fail_id * 10 + *p++ - '0';
1938 callout_fail_count = 0;
1939 if (*p == '!')
1940 {
1941 p++;
1942 while(isdigit(*p))
1943 callout_fail_count = callout_fail_count * 10 + *p++ - '0';
1944 }
1945 }
1946 else if (*p == '*')
1947 {
1948 int sign = 1;
1949 callout_data = 0;
1950 if (*(++p) == '-') { sign = -1; p++; }
1951 while(isdigit(*p))
1952 callout_data = callout_data * 10 + *p++ - '0';
1953 callout_data *= sign;
1954 callout_data_set = 1;
1955 }
1956 continue;
1957
1958 #if !defined NODFA
1959 case 'D':
1960 #if !defined NOPOSIX
1961 if (posix || do_posix)
1962 printf("** Can't use dfa matching in POSIX mode: \\D ignored\n");
1963 else
1964 #endif
1965 use_dfa = 1;
1966 continue;
1967
1968 case 'F':
1969 options |= PCRE_DFA_SHORTEST;
1970 continue;
1971 #endif
1972
1973 case 'G':
1974 if (isdigit(*p))
1975 {
1976 while(isdigit(*p)) n = n * 10 + *p++ - '0';
1977 getstrings |= 1 << n;
1978 }
1979 else if (isalnum(*p))
1980 {
1981 uschar *npp = getnamesptr;
1982 while (isalnum(*p)) *npp++ = *p++;
1983 *npp++ = 0;
1984 *npp = 0;
1985 n = pcre_get_stringnumber(re, (char *)getnamesptr);
1986 if (n < 0)
1987 fprintf(outfile, "no parentheses with name \"%s\"\n", getnamesptr);
1988 getnamesptr = npp;
1989 }
1990 continue;
1991
1992 case 'L':
1993 getlist = 1;
1994 continue;
1995
1996 case 'M':
1997 find_match_limit = 1;
1998 continue;
1999
2000 case 'N':
2001 if ((options & PCRE_NOTEMPTY) != 0)
2002 options = (options & ~PCRE_NOTEMPTY) | PCRE_NOTEMPTY_ATSTART;
2003 else
2004 options |= PCRE_NOTEMPTY;
2005 continue;
2006
2007 case 'O':
2008 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2009 if (n > size_offsets_max)
2010 {
2011 size_offsets_max = n;
2012 free(offsets);
2013 use_offsets = offsets = (int *)malloc(size_offsets_max * sizeof(int));
2014 if (offsets == NULL)
2015 {
2016 printf("** Failed to get %d bytes of memory for offsets vector\n",
2017 (int)(size_offsets_max * sizeof(int)));
2018 yield = 1;
2019 goto EXIT;
2020 }
2021 }
2022 use_size_offsets = n;
2023 if (n == 0) use_offsets = NULL; /* Ensures it can't write to it */
2024 continue;
2025
2026 case 'P':
2027 options |= ((options & PCRE_PARTIAL_SOFT) == 0)?
2028 PCRE_PARTIAL_SOFT : PCRE_PARTIAL_HARD;
2029 continue;
2030
2031 case 'Q':
2032 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2033 if (extra == NULL)
2034 {
2035 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2036 extra->flags = 0;
2037 }
2038 extra->flags |= PCRE_EXTRA_MATCH_LIMIT_RECURSION;
2039 extra->match_limit_recursion = n;
2040 continue;
2041
2042 case 'q':
2043 while(isdigit(*p)) n = n * 10 + *p++ - '0';
2044 if (extra == NULL)
2045 {
2046 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2047 extra->flags = 0;
2048 }
2049 extra->flags |= PCRE_EXTRA_MATCH_LIMIT;
2050 extra->match_limit = n;
2051 continue;
2052
2053 #if !defined NODFA
2054 case 'R':
2055 options |= PCRE_DFA_RESTART;
2056 continue;
2057 #endif
2058
2059 case 'S':
2060 show_malloc = 1;
2061 continue;
2062
2063 case 'Y':
2064 options |= PCRE_NO_START_OPTIMIZE;
2065 continue;
2066
2067 case 'Z':
2068 options |= PCRE_NOTEOL;
2069 continue;
2070
2071 case '?':
2072 options |= PCRE_NO_UTF8_CHECK;
2073 continue;
2074
2075 case '<':
2076 {
2077 int x = check_newline(p, outfile);
2078 if (x == 0) goto NEXT_DATA;
2079 options |= x;
2080 while (*p++ != '>');
2081 }
2082 continue;
2083 }
2084 *q++ = c;
2085 }
2086 *q = 0;
2087 len = (int)(q - dbuffer);
2088
2089 /* Move the data to the end of the buffer so that a read over the end of
2090 the buffer will be seen by valgrind, even if it doesn't cause a crash. If
2091 we are using the POSIX interface, we must include the terminating zero. */
2092
2093 #if !defined NOPOSIX
2094 if (posix || do_posix)
2095 {
2096 memmove(bptr + buffer_size - len - 1, bptr, len + 1);
2097 bptr += buffer_size - len - 1;
2098 }
2099 else
2100 #endif
2101 {
2102 memmove(bptr + buffer_size - len, bptr, len);
2103 bptr += buffer_size - len;
2104 }
2105
2106 if ((all_use_dfa || use_dfa) && find_match_limit)
2107 {
2108 printf("**Match limit not relevant for DFA matching: ignored\n");
2109 find_match_limit = 0;
2110 }
2111
2112 /* Handle matching via the POSIX interface, which does not
2113 support timing or playing with the match limit or callout data. */
2114
2115 #if !defined NOPOSIX
2116 if (posix || do_posix)
2117 {
2118 int rc;
2119 int eflags = 0;
2120 regmatch_t *pmatch = NULL;
2121 if (use_size_offsets > 0)
2122 pmatch = (regmatch_t *)malloc(sizeof(regmatch_t) * use_size_offsets);
2123 if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
2124 if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
2125 if ((options & PCRE_NOTEMPTY) != 0) eflags |= REG_NOTEMPTY;
2126
2127 rc = regexec(&preg, (const char *)bptr, use_size_offsets, pmatch, eflags);
2128
2129 if (rc != 0)
2130 {
2131 (void)regerror(rc, &preg, (char *)buffer, buffer_size);
2132 fprintf(outfile, "No match: POSIX code %d: %s\n", rc, buffer);
2133 }
2134 else if ((((const pcre *)preg.re_pcre)->options & PCRE_NO_AUTO_CAPTURE)
2135 != 0)
2136 {
2137 fprintf(outfile, "Matched with REG_NOSUB\n");
2138 }
2139 else
2140 {
2141 size_t i;
2142 for (i = 0; i < (size_t)use_size_offsets; i++)
2143 {
2144 if (pmatch[i].rm_so >= 0)
2145 {
2146 fprintf(outfile, "%2d: ", (int)i);
2147 (void)pchars(dbuffer + pmatch[i].rm_so,
2148 pmatch[i].rm_eo - pmatch[i].rm_so, outfile);
2149 fprintf(outfile, "\n");
2150 if (i == 0 && do_showrest)
2151 {
2152 fprintf(outfile, " 0+ ");
2153 (void)pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo,
2154 outfile);
2155 fprintf(outfile, "\n");
2156 }
2157 }
2158 }
2159 }
2160 free(pmatch);
2161 }
2162
2163 /* Handle matching via the native interface - repeats for /g and /G */
2164
2165 else
2166 #endif /* !defined NOPOSIX */
2167
2168 for (;; gmatched++) /* Loop for /g or /G */
2169 {
2170 markptr = NULL;
2171
2172 if (timeitm > 0)
2173 {
2174 register int i;
2175 clock_t time_taken;
2176 clock_t start_time = clock();
2177
2178 #if !defined NODFA
2179 if (all_use_dfa || use_dfa)
2180 {
2181 int workspace[1000];
2182 for (i = 0; i < timeitm; i++)
2183 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2184 options | g_notempty, use_offsets, use_size_offsets, workspace,
2185 sizeof(workspace)/sizeof(int));
2186 }
2187 else
2188 #endif
2189
2190 for (i = 0; i < timeitm; i++)
2191 count = pcre_exec(re, extra, (char *)bptr, len,
2192 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2193
2194 time_taken = clock() - start_time;
2195 fprintf(outfile, "Execute time %.4f milliseconds\n",
2196 (((double)time_taken * 1000.0) / (double)timeitm) /
2197 (double)CLOCKS_PER_SEC);
2198 }
2199
2200 /* If find_match_limit is set, we want to do repeated matches with
2201 varying limits in order to find the minimum value for the match limit and
2202 for the recursion limit. */
2203
2204 if (find_match_limit)
2205 {
2206 if (extra == NULL)
2207 {
2208 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2209 extra->flags = 0;
2210 }
2211
2212 (void)check_match_limit(re, extra, bptr, len, start_offset,
2213 options|g_notempty, use_offsets, use_size_offsets,
2214 PCRE_EXTRA_MATCH_LIMIT, &(extra->match_limit),
2215 PCRE_ERROR_MATCHLIMIT, "match()");
2216
2217 count = check_match_limit(re, extra, bptr, len, start_offset,
2218 options|g_notempty, use_offsets, use_size_offsets,
2219 PCRE_EXTRA_MATCH_LIMIT_RECURSION, &(extra->match_limit_recursion),
2220 PCRE_ERROR_RECURSIONLIMIT, "match() recursion");
2221 }
2222
2223 /* If callout_data is set, use the interface with additional data */
2224
2225 else if (callout_data_set)
2226 {
2227 if (extra == NULL)
2228 {
2229 extra = (pcre_extra *)malloc(sizeof(pcre_extra));
2230 extra->flags = 0;
2231 }
2232 extra->flags |= PCRE_EXTRA_CALLOUT_DATA;
2233 extra->callout_data = &callout_data;
2234 count = pcre_exec(re, extra, (char *)bptr, len, start_offset,
2235 options | g_notempty, use_offsets, use_size_offsets);
2236 extra->flags &= ~PCRE_EXTRA_CALLOUT_DATA;
2237 }
2238
2239 /* The normal case is just to do the match once, with the default
2240 value of match_limit. */
2241
2242 #if !defined NODFA
2243 else if (all_use_dfa || use_dfa)
2244 {
2245 int workspace[1000];
2246 count = pcre_dfa_exec(re, extra, (char *)bptr, len, start_offset,
2247 options | g_notempty, use_offsets, use_size_offsets, workspace,
2248 sizeof(workspace)/sizeof(int));
2249 if (count == 0)
2250 {
2251 fprintf(outfile, "Matched, but too many subsidiary matches\n");
2252 count = use_size_offsets/2;
2253 }
2254 }
2255 #endif
2256
2257 else
2258 {
2259 count = pcre_exec(re, extra, (char *)bptr, len,
2260 start_offset, options | g_notempty, use_offsets, use_size_offsets);
2261 if (count == 0)
2262 {
2263 fprintf(outfile, "Matched, but too many substrings\n");
2264 count = use_size_offsets/3;
2265 }
2266 }
2267
2268 /* Matched */
2269
2270 if (count >= 0)
2271 {
2272 int i, maxcount;
2273
2274 #if !defined NODFA
2275 if (all_use_dfa || use_dfa) maxcount = use_size_offsets/2; else
2276 #endif
2277 maxcount = use_size_offsets/3;
2278
2279 /* This is a check against a lunatic return value. */
2280
2281 if (count > maxcount)
2282 {
2283 fprintf(outfile,
2284 "** PCRE error: returned count %d is too big for offset size %d\n",
2285 count, use_size_offsets);
2286 count = use_size_offsets/3;
2287 if (do_g || do_G)
2288 {
2289 fprintf(outfile, "** /%c loop abandoned\n", do_g? 'g' : 'G');
2290 do_g = do_G = FALSE; /* Break g/G loop */
2291 }
2292 }
2293
2294 for (i = 0; i < count * 2; i += 2)
2295 {
2296 if (use_offsets[i] < 0)
2297 fprintf(outfile, "%2d: <unset>\n", i/2);
2298 else
2299 {
2300 fprintf(outfile, "%2d: ", i/2);
2301 (void)pchars(bptr + use_offsets[i],
2302 use_offsets[i+1] - use_offsets[i], outfile);
2303 fprintf(outfile, "\n");
2304 if (i == 0)
2305 {
2306 if (do_showrest)
2307 {
2308 fprintf(outfile, " 0+ ");
2309 (void)pchars(bptr + use_offsets[i+1], len - use_offsets[i+1],
2310 outfile);
2311 fprintf(outfile, "\n");
2312 }
2313 }
2314 }
2315 }
2316
2317 if (markptr != NULL) fprintf(outfile, "MK: %s\n", markptr);
2318
2319 for (i = 0; i < 32; i++)
2320 {
2321 if ((copystrings & (1 << i)) != 0)
2322 {
2323 char copybuffer[256];
2324 int rc = pcre_copy_substring((char *)bptr, use_offsets, count,
2325 i, copybuffer, sizeof(copybuffer));
2326 if (rc < 0)
2327 fprintf(outfile, "copy substring %d failed %d\n", i, rc);
2328 else
2329 fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
2330 }
2331 }
2332
2333 for (copynamesptr = copynames;
2334 *copynamesptr != 0;
2335 copynamesptr += (int)strlen((char*)copynamesptr) + 1)
2336 {
2337 char copybuffer[256];
2338 int rc = pcre_copy_named_substring(re, (char *)bptr, use_offsets,
2339 count, (char *)copynamesptr, copybuffer, sizeof(copybuffer));
2340 if (rc < 0)
2341 fprintf(outfile, "copy substring %s failed %d\n", copynamesptr, rc);
2342 else
2343 fprintf(outfile, " C %s (%d) %s\n", copybuffer, rc, copynamesptr);
2344 }
2345
2346 for (i = 0; i < 32; i++)
2347 {
2348 if ((getstrings & (1 << i)) != 0)
2349 {
2350 const char *substring;
2351 int rc = pcre_get_substring((char *)bptr, use_offsets, count,
2352 i, &substring);
2353 if (rc < 0)
2354 fprintf(outfile, "get substring %d failed %d\n", i, rc);
2355 else
2356 {
2357 fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
2358 pcre_free_substring(substring);
2359 }
2360 }
2361 }
2362
2363 for (getnamesptr = getnames;
2364 *getnamesptr != 0;
2365 getnamesptr += (int)strlen((char*)getnamesptr) + 1)
2366 {
2367 const char *substring;
2368 int rc = pcre_get_named_substring(re, (char *)bptr, use_offsets,
2369 count, (char *)getnamesptr, &substring);
2370 if (rc < 0)
2371 fprintf(outfile, "copy substring %s failed %d\n", getnamesptr, rc);
2372 else
2373 {
2374 fprintf(outfile, " G %s (%d) %s\n", substring, rc, getnamesptr);
2375 pcre_free_substring(substring);
2376 }
2377 }
2378
2379 if (getlist)
2380 {
2381 const char **stringlist;
2382 int rc = pcre_get_substring_list((char *)bptr, use_offsets, count,
2383 &stringlist);
2384 if (rc < 0)
2385 fprintf(outfile, "get substring list failed %d\n", rc);
2386 else
2387 {
2388 for (i = 0; i < count; i++)
2389 fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
2390 if (stringlist[i] != NULL)
2391 fprintf(outfile, "string list not terminated by NULL\n");
2392 /* free((void *)stringlist); */
2393 pcre_free_substring_list(stringlist);
2394 }
2395 }
2396 }
2397
2398 /* There was a partial match */
2399
2400 else if (count == PCRE_ERROR_PARTIAL)
2401 {
2402 if (markptr == NULL) fprintf(outfile, "Partial match");
2403 else fprintf(outfile, "Partial match, mark=%s", markptr);
2404 if (use_size_offsets > 1)
2405 {
2406 fprintf(outfile, ": ");
2407 pchars(bptr + use_offsets[0], use_offsets[1] - use_offsets[0],
2408 outfile);
2409 }
2410 fprintf(outfile, "\n");
2411 break; /* Out of the /g loop */
2412 }
2413
2414 /* Failed to match. If this is a /g or /G loop and we previously set
2415 g_notempty after a null match, this is not necessarily the end. We want
2416 to advance the start offset, and continue. We won't be at the end of the
2417 string - that was checked before setting g_notempty.
2418
2419 Complication arises in the case when the newline option is "any" or
2420 "anycrlf". If the previous match was at the end of a line terminated by
2421 CRLF, an advance of one character just passes the \r, whereas we should
2422 prefer the longer newline sequence, as does the code in pcre_exec().
2423 Fudge the offset value to achieve this.
2424
2425 Otherwise, in the case of UTF-8 matching, the advance must be one
2426 character, not one byte. */
2427
2428 else
2429 {
2430 if (g_notempty != 0)
2431 {
2432 int onechar = 1;
2433 unsigned int obits = ((real_pcre *)re)->options;
2434 use_offsets[0] = start_offset;
2435 if ((obits & PCRE_NEWLINE_BITS) == 0)
2436 {
2437 int d;
2438 (void)pcre_config(PCRE_CONFIG_NEWLINE, &d);
2439 /* Note that these values are always the ASCII ones, even in
2440 EBCDIC environments. CR = 13, NL = 10. */
2441 obits = (d == 13)? PCRE_NEWLINE_CR :
2442 (d == 10)? PCRE_NEWLINE_LF :
2443 (d == (13<<8 | 10))? PCRE_NEWLINE_CRLF :
2444 (d == -2)? PCRE_NEWLINE_ANYCRLF :
2445 (d == -1)? PCRE_NEWLINE_ANY : 0;
2446 }
2447 if (((obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANY ||
2448 (obits & PCRE_NEWLINE_BITS) == PCRE_NEWLINE_ANYCRLF)
2449 &&
2450 start_offset < len - 1 &&
2451 bptr[start_offset] == '\r' &&
2452 bptr[start_offset+1] == '\n')
2453 onechar++;
2454 else if (use_utf8)
2455 {
2456 while (start_offset + onechar < len)
2457 {
2458 int tb = bptr[start_offset+onechar];
2459 if (tb <= 127) break;
2460 tb &= 0xc0;
2461 if (tb != 0 && tb != 0xc0) onechar++;
2462 }
2463 }
2464 use_offsets[1] = start_offset + onechar;
2465 }
2466 else
2467 {
2468 if (count == PCRE_ERROR_NOMATCH)
2469 {
2470 if (gmatched == 0)
2471 {
2472 if (markptr == NULL) fprintf(outfile, "No match\n");
2473 else fprintf(outfile, "No match, mark = %s\n", markptr);
2474 }
2475 }
2476 else fprintf(outfile, "Error %d\n", count);
2477 break; /* Out of the /g loop */
2478 }
2479 }
2480
2481 /* If not /g or /G we are done */
2482
2483 if (!do_g && !do_G) break;
2484
2485 /* If we have matched an empty string, first check to see if we are at
2486 the end of the subject. If so, the /g loop is over. Otherwise, mimic what
2487 Perl's /g options does. This turns out to be rather cunning. First we set
2488 PCRE_NOTEMPTY_ATSTART and PCRE_ANCHORED and try the match again at the
2489 same point. If this fails (picked up above) we advance to the next
2490 character. */
2491
2492 g_notempty = 0;
2493
2494 if (use_offsets[0] == use_offsets[1])
2495 {
2496 if (use_offsets[0] == len) break;
2497 g_notempty = PCRE_NOTEMPTY_ATSTART | PCRE_ANCHORED;
2498 }
2499
2500 /* For /g, update the start offset, leaving the rest alone */
2501
2502 if (do_g) start_offset = use_offsets[1];
2503
2504 /* For /G, update the pointer and length */
2505
2506 else
2507 {
2508 bptr += use_offsets[1];
2509 len -= use_offsets[1];
2510 }
2511 } /* End of loop for /g and /G */
2512
2513 NEXT_DATA: continue;
2514 } /* End of loop for data lines */
2515
2516 CONTINUE:
2517
2518 #if !defined NOPOSIX
2519 if (posix || do_posix) regfree(&preg);
2520 #endif
2521
2522 if (re != NULL) new_free(re);
2523 if (extra != NULL) new_free(extra);
2524 if (tables != NULL)
2525 {
2526 new_free((void *)tables);
2527 setlocale(LC_CTYPE, "C");
2528 locale_set = 0;
2529 }
2530 }
2531
2532 if (infile == stdin) fprintf(outfile, "\n");
2533
2534 EXIT:
2535
2536 if (infile != NULL && infile != stdin) fclose(infile);
2537 if (outfile != NULL && outfile != stdout) fclose(outfile);
2538
2539 free(buffer);
2540 free(dbuffer);
2541 free(pbuffer);
2542 free(offsets);
2543
2544 return yield;
2545 }
2546
2547 /* End of pcretest.c */

Properties

Name Value
svn:eol-style native
svn:keywords "Author Date Id Revision Url"

  ViewVC Help
Powered by ViewVC 1.1.5