/[pcre]/code/trunk/pcretest.c
ViewVC logotype

Diff of /code/trunk/pcretest.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 29 by nigel, Sat Feb 24 21:38:53 2007 UTC revision 49 by nigel, Sat Feb 24 21:39:33 2007 UTC
# Line 12  Line 12 
12  /* Use the internal info for displaying the results of pcre_study(). */  /* Use the internal info for displaying the results of pcre_study(). */
13    
14  #include "internal.h"  #include "internal.h"
15    
16    /* It is possible to compile this test program without including support for
17    testing the POSIX interface, though this is not available via the standard
18    Makefile. */
19    
20    #if !defined NOPOSIX
21  #include "pcreposix.h"  #include "pcreposix.h"
22    #endif
23    
24  #ifndef CLOCKS_PER_SEC  #ifndef CLOCKS_PER_SEC
25  #ifdef CLK_TCK  #ifdef CLK_TCK
# Line 27  Line 34 
34    
35  static FILE *outfile;  static FILE *outfile;
36  static int log_store = 0;  static int log_store = 0;
37    static size_t gotten_store;
38    
39    
40    
41    static int utf8_table1[] = {
42      0x0000007f, 0x000007ff, 0x0000ffff, 0x001fffff, 0x03ffffff, 0x7fffffff};
43    
44    static int utf8_table2[] = {
45      0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
46    
47    static int utf8_table3[] = {
48      0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
49    
50    
51    /*************************************************
52    *       Convert character value to UTF-8         *
53    *************************************************/
54    
55    /* This function takes an integer value in the range 0 - 0x7fffffff
56    and encodes it as a UTF-8 character in 0 to 6 bytes.
57    
58    Arguments:
59      cvalue     the character value
60      buffer     pointer to buffer for result - at least 6 bytes long
61    
62    Returns:     number of characters placed in the buffer
63                 -1 if input character is negative
64                 0 if input character is positive but too big (only when
65                 int is longer than 32 bits)
66    */
67    
68    static int
69    ord2utf8(int cvalue, unsigned char *buffer)
70    {
71    register int i, j;
72    for (i = 0; i < sizeof(utf8_table1)/sizeof(int); i++)
73      if (cvalue <= utf8_table1[i]) break;
74    if (i >= sizeof(utf8_table1)/sizeof(int)) return 0;
75    if (cvalue < 0) return -1;
76    *buffer++ = utf8_table2[i] | (cvalue & utf8_table3[i]);
77    cvalue >>= 6 - i;
78    for (j = 0; j < i; j++)
79      {
80      *buffer++ = 0x80 | (cvalue & 0x3f);
81      cvalue >>= 6;
82      }
83    return i + 1;
84    }
85    
86    
87    /*************************************************
88    *            Convert UTF-8 string to value       *
89    *************************************************/
90    
91    /* This function takes one or more bytes that represents a UTF-8 character,
92    and returns the value of the character.
93    
94    Argument:
95      buffer   a pointer to the byte vector
96      vptr     a pointer to an int to receive the value
97    
98    Returns:   >  0 => the number of bytes consumed
99               -6 to 0 => malformed UTF-8 character at offset = (-return)
100    */
101    
102    int
103    utf82ord(unsigned char *buffer, int *vptr)
104    {
105    int c = *buffer++;
106    int d = c;
107    int i, j, s;
108    
109    for (i = -1; i < 6; i++)               /* i is number of additional bytes */
110      {
111      if ((d & 0x80) == 0) break;
112      d <<= 1;
113      }
114    
115    if (i == -1) { *vptr = c; return 1; }  /* ascii character */
116    if (i == 0 || i == 6) return 0;        /* invalid UTF-8 */
117    
118    /* i now has a value in the range 1-5 */
119    
120    d = c & utf8_table3[i];
121    s = 6 - i;
122    
123    for (j = 0; j < i; j++)
124      {
125      c = *buffer++;
126      if ((c & 0xc0) != 0x80) return -(j+1);
127      d |= (c & 0x3f) << s;
128      s += 6;
129      }
130    
131    /* Check that encoding was the correct unique one */
132    
133    for (j = 0; j < sizeof(utf8_table1)/sizeof(int); j++)
134      if (d <= utf8_table1[j]) break;
135    if (j != i) return -(i+1);
136    
137    /* Valid value */
138    
139    *vptr = d;
140    return i+1;
141    }
142    
143    
144    
145    
146    
147    
# Line 41  static const char *OP_names[] = { Line 156  static const char *OP_names[] = {
156    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
157    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{", "{",
158    "*", "*?", "+", "+?", "?", "??", "{", "{",    "*", "*?", "+", "+?", "?", "??", "{", "{",
159    "class", "Ref",    "class", "Ref", "Recurse",
160    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",    "Alt", "Ket", "KetRmax", "KetRmin", "Assert", "Assert not",
161    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",    "AssertB", "AssertB not", "Reverse", "Once", "Cond", "Cref",
162    "Brazero", "Braminzero", "Bra"    "Brazero", "Braminzero", "Bra"
163  };  };
164    
165    
166  static void print_internals(pcre *re, FILE *outfile)  static void print_internals(pcre *re)
167  {  {
168  unsigned char *code = ((real_pcre *)re)->code;  unsigned char *code = ((real_pcre *)re)->code;
169    
# Line 257  for(;;) Line 372  for(;;)
372    
373    
374    
375  /* Character string printing function. */  /* Character string printing function. A "normal" and a UTF-8 version. */
376    
377  static void pchars(unsigned char *p, int length)  static void pchars(unsigned char *p, int length, int utf8)
378  {  {
379  int c;  int c;
380  while (length-- > 0)  while (length-- > 0)
381      {
382      if (utf8)
383        {
384        int rc = utf82ord(p, &c);
385        if (rc > 0)
386          {
387          length -= rc - 1;
388          p += rc;
389          if (c < 256 && isprint(c)) fprintf(outfile, "%c", c);
390            else fprintf(outfile, "\\x{%02x}", c);
391          continue;
392          }
393        }
394    
395       /* Not UTF-8, or malformed UTF-8  */
396    
397    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);    if (isprint(c = *(p++))) fprintf(outfile, "%c", c);
398      else fprintf(outfile, "\\x%02x", c);      else fprintf(outfile, "\\x%02x", c);
399      }
400  }  }
401    
402    
# Line 274  compiled re. */ Line 406  compiled re. */
406    
407  static void *new_malloc(size_t size)  static void *new_malloc(size_t size)
408  {  {
409  if (log_store) fprintf(outfile, "Store size request: %d\n", (int)size);  gotten_store = size;
410    if (log_store)
411      fprintf(outfile, "Memory allocation (code space): %d\n",
412        (int)((int)size - offsetof(real_pcre, code[0])));
413  return malloc(size);  return malloc(size);
414  }  }
415    
416    
417    
418    
419    /* Get one piece of information from the pcre_fullinfo() function */
420    
421    static void new_info(pcre *re, pcre_extra *study, int option, void *ptr)
422    {
423    int rc;
424    if ((rc = pcre_fullinfo(re, study, option, ptr)) < 0)
425      fprintf(outfile, "Error %d from pcre_fullinfo(%d)\n", rc, option);
426    }
427    
428    
429    
430    
431  /* Read lines from named file or stdin and write to named file or stdout; lines  /* Read lines from named file or stdin and write to named file or stdout; lines
432  consist of a regular expression, in delimiters and optionally followed by  consist of a regular expression, in delimiters and optionally followed by
433  options, followed by a set of test data, terminated by an empty line. */  options, followed by a set of test data, terminated by an empty line. */
# Line 292  int study_options = 0; Line 440  int study_options = 0;
440  int op = 1;  int op = 1;
441  int timeit = 0;  int timeit = 0;
442  int showinfo = 0;  int showinfo = 0;
443    int showstore = 0;
444  int posix = 0;  int posix = 0;
445  int debug = 0;  int debug = 0;
446  int done = 0;  int done = 0;
# Line 306  outfile = stdout; Line 455  outfile = stdout;
455    
456  while (argc > 1 && argv[op][0] == '-')  while (argc > 1 && argv[op][0] == '-')
457    {    {
458    if (strcmp(argv[op], "-s") == 0) log_store = 1;    if (strcmp(argv[op], "-s") == 0 || strcmp(argv[op], "-m") == 0)
459        showstore = 1;
460    else if (strcmp(argv[op], "-t") == 0) timeit = 1;    else if (strcmp(argv[op], "-t") == 0) timeit = 1;
461    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;    else if (strcmp(argv[op], "-i") == 0) showinfo = 1;
462    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;    else if (strcmp(argv[op], "-d") == 0) showinfo = debug = 1;
# Line 362  while (!done) Line 512  while (!done)
512    {    {
513    pcre *re = NULL;    pcre *re = NULL;
514    pcre_extra *extra = NULL;    pcre_extra *extra = NULL;
515    
516    #if !defined NOPOSIX  /* There are still compilers that require no indent */
517    regex_t preg;    regex_t preg;
518      int do_posix = 0;
519    #endif
520    
521    const char *error;    const char *error;
522    unsigned char *p, *pp, *ppp;    unsigned char *p, *pp, *ppp;
523    unsigned const char *tables = NULL;    unsigned const char *tables = NULL;
524    int do_study = 0;    int do_study = 0;
525    int do_debug = debug;    int do_debug = debug;
526      int do_G = 0;
527      int do_g = 0;
528    int do_showinfo = showinfo;    int do_showinfo = showinfo;
529    int do_posix = 0;    int do_showrest = 0;
530      int utf8 = 0;
531    int erroroffset, len, delimiter;    int erroroffset, len, delimiter;
532    
533    if (infile == stdin) printf("  re> ");    if (infile == stdin) printf("  re> ");
# Line 434  while (!done) Line 592  while (!done)
592    
593    options = 0;    options = 0;
594    study_options = 0;    study_options = 0;
595      log_store = showstore;  /* default from command line */
596    
597    while (*pp != 0)    while (*pp != 0)
598      {      {
599      switch (*pp++)      switch (*pp++)
600        {        {
601          case 'g': do_g = 1; break;
602        case 'i': options |= PCRE_CASELESS; break;        case 'i': options |= PCRE_CASELESS; break;
603        case 'm': options |= PCRE_MULTILINE; break;        case 'm': options |= PCRE_MULTILINE; break;
604        case 's': options |= PCRE_DOTALL; break;        case 's': options |= PCRE_DOTALL; break;
605        case 'x': options |= PCRE_EXTENDED; break;        case 'x': options |= PCRE_EXTENDED; break;
606    
607          case '+': do_showrest = 1; break;
608        case 'A': options |= PCRE_ANCHORED; break;        case 'A': options |= PCRE_ANCHORED; break;
609        case 'D': do_debug = do_showinfo = 1; break;        case 'D': do_debug = do_showinfo = 1; break;
610        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;        case 'E': options |= PCRE_DOLLAR_ENDONLY; break;
611          case 'G': do_G = 1; break;
612        case 'I': do_showinfo = 1; break;        case 'I': do_showinfo = 1; break;
613          case 'M': log_store = 1; break;
614    
615    #if !defined NOPOSIX
616        case 'P': do_posix = 1; break;        case 'P': do_posix = 1; break;
617    #endif
618    
619        case 'S': do_study = 1; break;        case 'S': do_study = 1; break;
620        case 'U': options |= PCRE_UNGREEDY; break;        case 'U': options |= PCRE_UNGREEDY; break;
621        case 'X': options |= PCRE_EXTRA; break;        case 'X': options |= PCRE_EXTRA; break;
622          case '8': options |= PCRE_UTF8; utf8 = 1; break;
623    
624        case 'L':        case 'L':
625        ppp = pp;        ppp = pp;
# Line 476  while (!done) Line 645  while (!done)
645    timing, showing, or debugging options, nor the ability to pass over    timing, showing, or debugging options, nor the ability to pass over
646    local character tables. */    local character tables. */
647    
648    #if !defined NOPOSIX
649    if (posix || do_posix)    if (posix || do_posix)
650      {      {
651      int rc;      int rc;
# Line 498  while (!done) Line 668  while (!done)
668    /* Handle compiling via the native interface */    /* Handle compiling via the native interface */
669    
670    else    else
671    #endif  /* !defined NOPOSIX */
672    
673      {      {
674      if (timeit)      if (timeit)
675        {        {
# Line 542  while (!done) Line 714  while (!done)
714        goto CONTINUE;        goto CONTINUE;
715        }        }
716    
717      /* Compilation succeeded; print data if required */      /* Compilation succeeded; print data if required. There are now two
718        info-returning functions. The old one has a limited interface and
719        returns only limited data. Check that it agrees with the newer one. */
720    
721      if (do_showinfo)      if (do_showinfo)
722        {        {
723        int first_char, count;        int old_first_char, old_options, old_count;
724          int count, backrefmax, first_char, need_char;
725        if (do_debug) print_internals(re, outfile);        size_t size;
726    
727          if (do_debug) print_internals(re);
728    
729          new_info(re, NULL, PCRE_INFO_OPTIONS, &options);
730          new_info(re, NULL, PCRE_INFO_SIZE, &size);
731          new_info(re, NULL, PCRE_INFO_CAPTURECOUNT, &count);
732          new_info(re, NULL, PCRE_INFO_BACKREFMAX, &backrefmax);
733          new_info(re, NULL, PCRE_INFO_FIRSTCHAR, &first_char);
734          new_info(re, NULL, PCRE_INFO_LASTLITERAL, &need_char);
735    
736        count = pcre_info(re, &options, &first_char);        old_count = pcre_info(re, &old_options, &old_first_char);
737        if (count < 0) fprintf(outfile,        if (count < 0) fprintf(outfile,
738          "Error %d while reading info\n", count);          "Error %d from pcre_info()\n", count);
739        else        else
740          {          {
741          fprintf(outfile, "Identifying subpattern count = %d\n", count);          if (old_count != count) fprintf(outfile,
742          if (options == 0) fprintf(outfile, "No options\n");            "Count disagreement: pcre_fullinfo=%d pcre_info=%d\n", count,
743            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s\n",              old_count);
744              ((options & PCRE_ANCHORED) != 0)? " anchored" : "",  
745              ((options & PCRE_CASELESS) != 0)? " caseless" : "",          if (old_first_char != first_char) fprintf(outfile,
746              ((options & PCRE_EXTENDED) != 0)? " extended" : "",            "First char disagreement: pcre_fullinfo=%d pcre_info=%d\n",
747              ((options & PCRE_MULTILINE) != 0)? " multiline" : "",              first_char, old_first_char);
748              ((options & PCRE_DOTALL) != 0)? " dotall" : "",  
749              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",          if (old_options != options) fprintf(outfile,
750              ((options & PCRE_EXTRA) != 0)? " extra" : "",            "Options disagreement: pcre_fullinfo=%d pcre_info=%d\n", options,
751              ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "");              old_options);
752          if (first_char == -1)          }
753            {  
754            fprintf(outfile, "First char at start or follows \\n\n");        if (size != gotten_store) fprintf(outfile,
755            }          "Size disagreement: pcre_fullinfo=%d call to malloc for %d\n",
756          else if (first_char < 0)          size, gotten_store);
757            {  
758            fprintf(outfile, "No first char\n");        fprintf(outfile, "Capturing subpattern count = %d\n", count);
759            }        if (backrefmax > 0)
760            fprintf(outfile, "Max back reference = %d\n", backrefmax);
761          if (options == 0) fprintf(outfile, "No options\n");
762            else fprintf(outfile, "Options:%s%s%s%s%s%s%s%s%s\n",
763              ((options & PCRE_ANCHORED) != 0)? " anchored" : "",
764              ((options & PCRE_CASELESS) != 0)? " caseless" : "",
765              ((options & PCRE_EXTENDED) != 0)? " extended" : "",
766              ((options & PCRE_MULTILINE) != 0)? " multiline" : "",
767              ((options & PCRE_DOTALL) != 0)? " dotall" : "",
768              ((options & PCRE_DOLLAR_ENDONLY) != 0)? " dollar_endonly" : "",
769              ((options & PCRE_EXTRA) != 0)? " extra" : "",
770              ((options & PCRE_UNGREEDY) != 0)? " ungreedy" : "",
771              ((options & PCRE_UTF8) != 0)? " utf8" : "");
772    
773          if (((((real_pcre *)re)->options) & PCRE_ICHANGED) != 0)
774            fprintf(outfile, "Case state changes\n");
775    
776          if (first_char == -1)
777            {
778            fprintf(outfile, "First char at start or follows \\n\n");
779            }
780          else if (first_char < 0)
781            {
782            fprintf(outfile, "No first char\n");
783            }
784          else
785            {
786            if (isprint(first_char))
787              fprintf(outfile, "First char = \'%c\'\n", first_char);
788          else          else
789            {            fprintf(outfile, "First char = %d\n", first_char);
790            if (isprint(first_char))          }
791              fprintf(outfile, "First char = \'%c\'\n", first_char);  
792            else        if (need_char < 0)
793              fprintf(outfile, "First char = %d\n", first_char);          {
794            }          fprintf(outfile, "No need char\n");
795            }
796          else
797            {
798            if (isprint(need_char))
799              fprintf(outfile, "Need char = \'%c\'\n", need_char);
800            else
801              fprintf(outfile, "Need char = %d\n", need_char);
802          }          }
803        }        }
804    
# Line 609  while (!done) Line 827  while (!done)
827        else if (extra == NULL)        else if (extra == NULL)
828          fprintf(outfile, "Study returned NULL\n");          fprintf(outfile, "Study returned NULL\n");
829    
       /* This looks at internal information. A bit kludgy to do it this  
       way, but it is useful for testing. */  
   
830        else if (do_showinfo)        else if (do_showinfo)
831          {          {
832          real_pcre_extra *xx = (real_pcre_extra *)extra;          uschar *start_bits = NULL;
833          if ((xx->options & PCRE_STUDY_MAPPED) == 0)          new_info(re, extra, PCRE_INFO_FIRSTTABLE, &start_bits);
834            if (start_bits == NULL)
835            fprintf(outfile, "No starting character set\n");            fprintf(outfile, "No starting character set\n");
836          else          else
837            {            {
# Line 624  while (!done) Line 840  while (!done)
840            fprintf(outfile, "Starting character set: ");            fprintf(outfile, "Starting character set: ");
841            for (i = 0; i < 256; i++)            for (i = 0; i < 256; i++)
842              {              {
843              if ((xx->start_bits[i/8] & (1<<(i%8))) != 0)              if ((start_bits[i/8] & (1<<(i%8))) != 0)
844                {                {
845                if (c > 75)                if (c > 75)
846                  {                  {
# Line 654  while (!done) Line 870  while (!done)
870    for (;;)    for (;;)
871      {      {
872      unsigned char *q;      unsigned char *q;
873        unsigned char *bptr = dbuffer;
874      int count, c;      int count, c;
875      int copystrings = 0;      int copystrings = 0;
876      int getstrings = 0;      int getstrings = 0;
877      int getlist = 0;      int getlist = 0;
878        int gmatched = 0;
879        int start_offset = 0;
880        int g_notempty = 0;
881      int offsets[45];      int offsets[45];
882      int size_offsets = sizeof(offsets)/sizeof(int);      int size_offsets = sizeof(offsets)/sizeof(int);
883    
884      options = 0;      options = 0;
885    
886      if (infile == stdin) printf("  data> ");      if (infile == stdin) printf("data> ");
887      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)      if (fgets((char *)buffer, sizeof(buffer), infile) == NULL)
888        {        {
889        done = 1;        done = 1;
# Line 703  while (!done) Line 923  while (!done)
923          break;          break;
924    
925          case 'x':          case 'x':
926    
927            /* Handle \x{..} specially - new Perl thing for utf8 */
928    
929            if (*p == '{')
930              {
931              unsigned char *pt = p;
932              c = 0;
933              while (isxdigit(*(++pt)))
934                c = c * 16 + tolower(*pt) - ((isdigit(*pt))? '0' : 'W');
935              if (*pt == '}')
936                {
937                unsigned char buffer[8];
938                int ii, utn;
939                utn = ord2utf8(c, buffer);
940                for (ii = 0; ii < utn - 1; ii++) *q++ = buffer[ii];
941                c = buffer[ii];   /* Last byte */
942                p = pt + 1;
943                break;
944                }
945              /* Not correct form; fall through */
946              }
947    
948            /* Ordinary \x */
949    
950          c = 0;          c = 0;
951          while (i++ < 2 && isxdigit(*p))          while (i++ < 2 && isxdigit(*p))
952            {            {
# Line 737  while (!done) Line 981  while (!done)
981          getlist = 1;          getlist = 1;
982          continue;          continue;
983    
984            case 'N':
985            options |= PCRE_NOTEMPTY;
986            continue;
987    
988          case 'O':          case 'O':
989          while(isdigit(*p)) n = n * 10 + *p++ - '0';          while(isdigit(*p)) n = n * 10 + *p++ - '0';
990          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;          if (n <= (int)(sizeof(offsets)/sizeof(int))) size_offsets = n;
# Line 754  while (!done) Line 1002  while (!done)
1002      /* Handle matching via the POSIX interface, which does not      /* Handle matching via the POSIX interface, which does not
1003      support timing. */      support timing. */
1004    
1005    #if !defined NOPOSIX
1006      if (posix || do_posix)      if (posix || do_posix)
1007        {        {
1008        int rc;        int rc;
1009        int eflags = 0;        int eflags = 0;
1010        regmatch_t pmatch[30];        regmatch_t pmatch[sizeof(offsets)/sizeof(int)];
1011        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;        if ((options & PCRE_NOTBOL) != 0) eflags |= REG_NOTBOL;
1012        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;        if ((options & PCRE_NOTEOL) != 0) eflags |= REG_NOTEOL;
1013    
1014        rc = regexec(&preg, (char *)dbuffer, sizeof(pmatch)/sizeof(regmatch_t),        rc = regexec(&preg, (const char *)bptr, size_offsets, pmatch, eflags);
         pmatch, eflags);  
1015    
1016        if (rc != 0)        if (rc != 0)
1017          {          {
# Line 773  while (!done) Line 1021  while (!done)
1021        else        else
1022          {          {
1023          size_t i;          size_t i;
1024          for (i = 0; i < sizeof(pmatch)/sizeof(regmatch_t); i++)          for (i = 0; i < size_offsets; i++)
1025            {            {
1026            if (pmatch[i].rm_so >= 0)            if (pmatch[i].rm_so >= 0)
1027              {              {
1028              fprintf(outfile, "%2d: ", (int)i);              fprintf(outfile, "%2d: ", (int)i);
1029              pchars(dbuffer + pmatch[i].rm_so,              pchars(dbuffer + pmatch[i].rm_so,
1030                pmatch[i].rm_eo - pmatch[i].rm_so);                pmatch[i].rm_eo - pmatch[i].rm_so, utf8);
1031              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1032                if (i == 0 && do_showrest)
1033                  {
1034                  fprintf(outfile, " 0+ ");
1035                  pchars(dbuffer + pmatch[i].rm_eo, len - pmatch[i].rm_eo, utf8);
1036                  fprintf(outfile, "\n");
1037                  }
1038              }              }
1039            }            }
1040          }          }
1041        }        }
1042    
1043      /* Handle matching via the native interface */      /* Handle matching via the native interface - repeats for /g and /G */
1044    
1045      else      else
1046    #endif  /* !defined NOPOSIX */
1047    
1048        for (;; gmatched++)    /* Loop for /g or /G */
1049        {        {
1050        if (timeit)        if (timeit)
1051          {          {
# Line 796  while (!done) Line 1053  while (!done)
1053          clock_t time_taken;          clock_t time_taken;
1054          clock_t start_time = clock();          clock_t start_time = clock();
1055          for (i = 0; i < LOOPREPEAT; i++)          for (i = 0; i < LOOPREPEAT; i++)
1056            count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,            count = pcre_exec(re, extra, (char *)bptr, len,
1057              size_offsets);              start_offset, options | g_notempty, offsets, size_offsets);
1058          time_taken = clock() - start_time;          time_taken = clock() - start_time;
1059          fprintf(outfile, "Execute time %.3f milliseconds\n",          fprintf(outfile, "Execute time %.3f milliseconds\n",
1060            ((double)time_taken * 1000.0)/            ((double)time_taken * 1000.0)/
1061            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));            ((double)LOOPREPEAT * (double)CLOCKS_PER_SEC));
1062          }          }
1063    
1064        count = pcre_exec(re, extra, (char *)dbuffer, len, options, offsets,        count = pcre_exec(re, extra, (char *)bptr, len,
1065          size_offsets);          start_offset, options | g_notempty, offsets, size_offsets);
1066    
1067        if (count == 0)        if (count == 0)
1068          {          {
# Line 813  while (!done) Line 1070  while (!done)
1070          count = size_offsets/3;          count = size_offsets/3;
1071          }          }
1072    
1073          /* Matched */
1074    
1075        if (count >= 0)        if (count >= 0)
1076          {          {
1077          int i;          int i;
# Line 823  while (!done) Line 1082  while (!done)
1082            else            else
1083              {              {
1084              fprintf(outfile, "%2d: ", i/2);              fprintf(outfile, "%2d: ", i/2);
1085              pchars(dbuffer + offsets[i], offsets[i+1] - offsets[i]);              pchars(bptr + offsets[i], offsets[i+1] - offsets[i], utf8);
1086              fprintf(outfile, "\n");              fprintf(outfile, "\n");
1087                if (i == 0)
1088                  {
1089                  if (do_showrest)
1090                    {
1091                    fprintf(outfile, " 0+ ");
1092                    pchars(bptr + offsets[i+1], len - offsets[i+1], utf8);
1093                    fprintf(outfile, "\n");
1094                    }
1095                  }
1096              }              }
1097            }            }
1098    
# Line 832  while (!done) Line 1100  while (!done)
1100            {            {
1101            if ((copystrings & (1 << i)) != 0)            if ((copystrings & (1 << i)) != 0)
1102              {              {
1103              char buffer[16];              char copybuffer[16];
1104              int rc = pcre_copy_substring((char *)dbuffer, offsets, count,              int rc = pcre_copy_substring((char *)bptr, offsets, count,
1105                i, buffer, sizeof(buffer));                i, copybuffer, sizeof(copybuffer));
1106              if (rc < 0)              if (rc < 0)
1107                fprintf(outfile, "copy substring %d failed %d\n", i, rc);                fprintf(outfile, "copy substring %d failed %d\n", i, rc);
1108              else              else
1109                fprintf(outfile, "%2dC %s (%d)\n", i, buffer, rc);                fprintf(outfile, "%2dC %s (%d)\n", i, copybuffer, rc);
1110              }              }
1111            }            }
1112    
# Line 847  while (!done) Line 1115  while (!done)
1115            if ((getstrings & (1 << i)) != 0)            if ((getstrings & (1 << i)) != 0)
1116              {              {
1117              const char *substring;              const char *substring;
1118              int rc = pcre_get_substring((char *)dbuffer, offsets, count,              int rc = pcre_get_substring((char *)bptr, offsets, count,
1119                i, &substring);                i, &substring);
1120              if (rc < 0)              if (rc < 0)
1121                fprintf(outfile, "get substring %d failed %d\n", i, rc);                fprintf(outfile, "get substring %d failed %d\n", i, rc);
1122              else              else
1123                {                {
1124                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);                fprintf(outfile, "%2dG %s (%d)\n", i, substring, rc);
1125                free((void *)substring);                /* free((void *)substring); */
1126                  pcre_free_substring(substring);
1127                }                }
1128              }              }
1129            }            }
# Line 862  while (!done) Line 1131  while (!done)
1131          if (getlist)          if (getlist)
1132            {            {
1133            const char **stringlist;            const char **stringlist;
1134            int rc = pcre_get_substring_list((char *)dbuffer, offsets, count,            int rc = pcre_get_substring_list((char *)bptr, offsets, count,
1135              &stringlist);              &stringlist);
1136            if (rc < 0)            if (rc < 0)
1137              fprintf(outfile, "get substring list failed %d\n", rc);              fprintf(outfile, "get substring list failed %d\n", rc);
# Line 872  while (!done) Line 1141  while (!done)
1141                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);                fprintf(outfile, "%2dL %s\n", i, stringlist[i]);
1142              if (stringlist[i] != NULL)              if (stringlist[i] != NULL)
1143                fprintf(outfile, "string list not terminated by NULL\n");                fprintf(outfile, "string list not terminated by NULL\n");
1144              free((void *)stringlist);              /* free((void *)stringlist); */
1145                pcre_free_substring_list(stringlist);
1146              }              }
1147            }            }
1148            }
1149    
1150          /* Failed to match. If this is a /g or /G loop and we previously set
1151          g_notempty after a null match, this is not necessarily the end.
1152          We want to advance the start offset, and continue. Fudge the offset
1153          values to achieve this. We won't be at the end of the string - that
1154          was checked before setting g_notempty. */
1155    
1156          else
1157            {
1158            if (g_notempty != 0)
1159              {
1160              offsets[0] = start_offset;
1161              offsets[1] = start_offset + 1;
1162              }
1163            else
1164              {
1165              if (gmatched == 0)   /* Error if no previous matches */
1166                {
1167                if (count == -1) fprintf(outfile, "No match\n");
1168                  else fprintf(outfile, "Error %d\n", count);
1169                }
1170              break;  /* Out of the /g loop */
1171              }
1172            }
1173    
1174          /* If not /g or /G we are done */
1175    
1176          if (!do_g && !do_G) break;
1177    
1178          /* If we have matched an empty string, first check to see if we are at
1179          the end of the subject. If so, the /g loop is over. Otherwise, mimic
1180          what Perl's /g options does. This turns out to be rather cunning. First
1181          we set PCRE_NOTEMPTY and PCRE_ANCHORED and try the match again at the
1182          same point. If this fails (picked up above) we advance to the next
1183          character. */
1184    
1185          g_notempty = 0;
1186          if (offsets[0] == offsets[1])
1187            {
1188            if (offsets[0] == len) break;
1189            g_notempty = PCRE_NOTEMPTY | PCRE_ANCHORED;
1190          }          }
1191    
1192          /* For /g, update the start offset, leaving the rest alone */
1193    
1194          if (do_g) start_offset = offsets[1];
1195    
1196          /* For /G, update the pointer and length */
1197    
1198        else        else
1199          {          {
1200          if (count == -1) fprintf(outfile, "No match\n");          bptr += offsets[1];
1201            else fprintf(outfile, "Error %d\n", count);          len -= offsets[1];
1202          }          }
1203        }        }  /* End of loop for /g and /G */
1204      }      }    /* End of loop for data lines */
1205    
1206    CONTINUE:    CONTINUE:
1207    
1208    #if !defined NOPOSIX
1209    if (posix || do_posix) regfree(&preg);    if (posix || do_posix) regfree(&preg);
1210    #endif
1211    
1212    if (re != NULL) free(re);    if (re != NULL) free(re);
1213    if (extra != NULL) free(extra);    if (extra != NULL) free(extra);
1214    if (tables != NULL)    if (tables != NULL)

Legend:
Removed from v.29  
changed lines
  Added in v.49

  ViewVC Help
Powered by ViewVC 1.1.5