/[pcre]/code/trunk/pcre_printint.c
ViewVC logotype

Diff of /code/trunk/pcre_printint.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

code/branches/pcre16/pcre_printint.c revision 810 by ph10, Mon Dec 19 13:34:10 2011 UTC code/trunk/pcre_printint.c revision 1094 by chpe, Tue Oct 16 15:56:02 2012 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2010 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 55  asked to print out a compiled regex for Line 55  asked to print out a compiled regex for
55  #include "config.h"  #include "config.h"
56  #endif  #endif
57    
58    /* For pcretest program. */
59    #define PRIV(name) name
60    
61  /* We have to include pcre_internal.h because we need the internal info for  /* We have to include pcre_internal.h because we need the internal info for
62  displaying the results of pcre_study() and we also need to know about the  displaying the results of pcre_study() and we also need to know about the
63  internal macros, structures, and other internal data values; pcretest has  internal macros, structures, and other internal data values; pcretest has
# Line 75  having a separate .h file just for this. Line 78  having a separate .h file just for this.
78  #ifdef PCRE_INCLUDED  #ifdef PCRE_INCLUDED
79  static /* Keep the following function as private. */  static /* Keep the following function as private. */
80  #endif  #endif
81  #ifdef COMPILE_PCRE8  
82    #if defined COMPILE_PCRE8
83  void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);  void pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths);
84  #else  #elif defined COMPILE_PCRE16
85  void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);  void pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths);
86    #elif defined COMPILE_PCRE32
87    void pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths);
88  #endif  #endif
89    
90  /* Macro that decides whether a character should be output as a literal or in  /* Macro that decides whether a character should be output as a literal or in
# Line 94  for testing purposes. */ Line 100  for testing purposes. */
100    
101  /* The table of operator names. */  /* The table of operator names. */
102    
103  static const char *OP_names[] = { OP_NAME_LIST };  static const char *priv_OP_names[] = { OP_NAME_LIST };
104    
105  /* This table of operator lengths is not actually used by the working code,  /* This table of operator lengths is not actually used by the working code,
106  but its size is needed for a check that ensures it is the correct size for the  but its size is needed for a check that ensures it is the correct size for the
107  number of opcodes (thus catching update omissions). */  number of opcodes (thus catching update omissions). */
108    
109  static const pcre_uint8 OP_lengths[] = { OP_LENGTHS };  static const pcre_uint8 priv_OP_lengths[] = { OP_LENGTHS };
110    
111    
112    
# Line 111  static const pcre_uint8 OP_lengths[] = { Line 117  static const pcre_uint8 OP_lengths[] = {
117  static int  static int
118  print_char(FILE *f, pcre_uchar *ptr, BOOL utf)  print_char(FILE *f, pcre_uchar *ptr, BOOL utf)
119  {  {
120  int c = *ptr;  pcre_uint32 c = *ptr;
121    
122  #ifndef SUPPORT_UTF  #ifndef SUPPORT_UTF
123    
124  (void)utf;  /* Avoid compiler warning */  (void)utf;  /* Avoid compiler warning */
125  if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);  if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
126    else if (c <= 0x80) fprintf(f, "\\x%02x", c);
127    else fprintf(f, "\\x{%x}", c);
128  return 0;  return 0;
129    
130  #else  #else
131    
132  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
133    
134  if (!utf || (c & 0xc0) != 0xc0)  if (!utf || (c & 0xc0) != 0xc0)
135    {    {
136    if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x%02x", c);    if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
137      else if (c < 0x80) fprintf(f, "\\x%02x", c);
138      else fprintf(f, "\\x{%02x}", c);
139    return 0;    return 0;
140    }    }
141  else  else
# Line 154  else Line 165  else
165    return a;    return a;
166    }    }
167    
168  #else  #elif defined COMPILE_PCRE16
   
 #ifdef COMPILE_PCRE16  
169    
170  if (!utf || (c & 0xfc00) != 0xd800)  if (!utf || (c & 0xfc00) != 0xd800)
171    {    {
172    if (PRINTABLE(c)) fprintf(f, "%c", c);    if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
173    else if (c <= 0xff) fprintf(f, "\\x%02x", c);    else if (c <= 0x80) fprintf(f, "\\x%02x", c);
174    else fprintf(f, "\\x{%x}", c);    else fprintf(f, "\\x{%02x}", c);
175    return 0;    return 0;
176    }    }
177  else  else
# Line 182  else Line 191  else
191    return 1;    return 1;
192    }    }
193    
194  #endif /* COMPILE_PCRE16 */  #elif defined COMPILE_PCRE32
195    
196  #endif /* COMPILE_PCRE8 */  if (!utf || (c & 0xfffff800u) != 0xd800u)
197      {
198      if (PRINTABLE(c)) fprintf(f, "%c", (char)c);
199      else if (c <= 0x80) fprintf(f, "\\x%02x", c);
200      else fprintf(f, "\\x{%x}", c);
201      return 0;
202      }
203    else
204      {
205      /* This is a check for malformed UTF-32; it should only occur if the sanity
206      check has been turned off. Rather than swallow a surrogate, just stop if
207      we hit one. Print it with \X instead of \x as an indication. */
208      fprintf(f, "\\X{%x}", c);
209      return 0;
210      }
211    
212    #endif /* COMPILE_PCRE[8|16|32] */
213    
214  #endif /* SUPPORT_UTF */  #endif /* SUPPORT_UTF */
215  }  }
# Line 198  print_puchar(FILE *f, PCRE_PUCHAR ptr) Line 223  print_puchar(FILE *f, PCRE_PUCHAR ptr)
223  {  {
224  while (*ptr != '\0')  while (*ptr != '\0')
225    {    {
226    register int c = *ptr++;    register pcre_uint32 c = *ptr++;
227    if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);    if (PRINTABLE(c)) fprintf(f, "%c", c); else fprintf(f, "\\x{%x}", c);
228    }    }
229  }  }
# Line 225  return (ptype == pvalue)? "??" : "??"; Line 250  return (ptype == pvalue)? "??" : "??";
250  }  }
251    
252    
253    /*************************************************
254    *       Print Unicode property value             *
255    *************************************************/
256    
257    /* "Normal" properties can be printed from tables. The PT_CLIST property is a
258    pseudo-property that contains a pointer to a list of case-equivalent
259    characters. This is used only when UCP support is available and UTF mode is
260    selected. It should never occur otherwise, but just in case it does, have
261    something ready to print. */
262    
263    static void
264    print_prop(FILE *f, pcre_uchar *code, const char *before, const char *after)
265    {
266    if (code[1] != PT_CLIST)
267      {
268      fprintf(f, "%s%s %s%s", before, priv_OP_names[*code], get_ucpname(code[1],
269        code[2]), after);
270      }
271    else
272      {
273      const char *not = (*code == OP_PROP)? "" : "not ";
274    #ifndef SUPPORT_UCP
275      fprintf(f, "%s%sclist %d%s", before, not, code[2], after);
276    #else
277      const pcre_uint32 *p = PRIV(ucd_caseless_sets) + code[2];
278      fprintf (f, "%s%sclist", before, not);
279      while (*p < NOTACHAR) fprintf(f, " %04x", *p++);
280      fprintf(f, "%s", after);
281    #endif
282      }
283    }
284    
285    
286    
287    
288  /*************************************************  /*************************************************
289  *         Print compiled regex                   *  *         Print compiled regex                   *
# Line 239  written that do not depend on the value Line 298  written that do not depend on the value
298  #ifdef PCRE_INCLUDED  #ifdef PCRE_INCLUDED
299  static /* Keep the following function as private. */  static /* Keep the following function as private. */
300  #endif  #endif
301  #ifdef COMPILE_PCRE8  #if defined COMPILE_PCRE8
302  void  void
303  pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths)  pcre_printint(pcre *external_re, FILE *f, BOOL print_lengths)
304  #else  #elif defined COMPILE_PCRE16
305  void  void
306  pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths)  pcre16_printint(pcre *external_re, FILE *f, BOOL print_lengths)
307    #elif defined COMPILE_PCRE32
308    void
309    pcre32_printint(pcre *external_re, FILE *f, BOOL print_lengths)
310  #endif  #endif
311  {  {
312  real_pcre *re = (real_pcre *)external_re;  REAL_PCRE *re = (REAL_PCRE *)external_re;
313  pcre_uchar *codestart, *code;  pcre_uchar *codestart, *code;
314  BOOL utf;  BOOL utf;
315    
# Line 268  if (re->magic_number != MAGIC_NUMBER) Line 330  if (re->magic_number != MAGIC_NUMBER)
330    }    }
331    
332  code = codestart = (pcre_uchar *)re + offset + count * size;  code = codestart = (pcre_uchar *)re + offset + count * size;
333  /* PCRE_UTF16 has the same value as PCRE_UTF8. */  /* PCRE_UTF(16|32) have the same value as PCRE_UTF8. */
334  utf = (options & PCRE_UTF8) != 0;  utf = (options & PCRE_UTF8) != 0;
335    
336  for(;;)  for(;;)
337    {    {
338    pcre_uchar *ccode;    pcre_uchar *ccode;
339    const char *flag = "  ";    const char *flag = "  ";
340    int c;    pcre_uint32 c;
341    int extra = 0;    int extra = 0;
342    
343    if (print_lengths)    if (print_lengths)
# Line 294  for(;;) Line 356  for(;;)
356    
357        case OP_TABLE_LENGTH:        case OP_TABLE_LENGTH:
358        case OP_TABLE_LENGTH +        case OP_TABLE_LENGTH +
359          ((sizeof(OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) &&          ((sizeof(priv_OP_names)/sizeof(const char *) == OP_TABLE_LENGTH) &&
360          (sizeof(OP_lengths) == OP_TABLE_LENGTH)):          (sizeof(priv_OP_lengths) == OP_TABLE_LENGTH)):
361        break;        break;
362  /* ========================================================================== */  /* ========================================================================== */
363    
364      case OP_END:      case OP_END:
365      fprintf(f, "    %s\n", OP_names[*code]);      fprintf(f, "    %s\n", priv_OP_names[*code]);
366      fprintf(f, "------------------------------------------------------------------\n");      fprintf(f, "------------------------------------------------------------------\n");
367      return;      return;
368    
# Line 332  for(;;) Line 394  for(;;)
394      case OP_SCBRAPOS:      case OP_SCBRAPOS:
395      if (print_lengths) fprintf(f, "%3d ", GET(code, 1));      if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
396        else fprintf(f, "    ");        else fprintf(f, "    ");
397      fprintf(f, "%s %d", OP_names[*code], GET2(code, 1+LINK_SIZE));      fprintf(f, "%s %d", priv_OP_names[*code], GET2(code, 1+LINK_SIZE));
398      break;      break;
399    
400      case OP_BRA:      case OP_BRA:
# Line 355  for(;;) Line 417  for(;;)
417      case OP_REVERSE:      case OP_REVERSE:
418      if (print_lengths) fprintf(f, "%3d ", GET(code, 1));      if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
419        else fprintf(f, "    ");        else fprintf(f, "    ");
420      fprintf(f, "%s", OP_names[*code]);      fprintf(f, "%s", priv_OP_names[*code]);
421      break;      break;
422    
423      case OP_CLOSE:      case OP_CLOSE:
424      fprintf(f, "    %s %d", OP_names[*code], GET2(code, 1));      fprintf(f, "    %s %d", priv_OP_names[*code], GET2(code, 1));
425      break;      break;
426    
427      case OP_CREF:      case OP_CREF:
428      case OP_NCREF:      case OP_NCREF:
429      fprintf(f, "%3d %s", GET2(code,1), OP_names[*code]);      fprintf(f, "%3d %s", GET2(code,1), priv_OP_names[*code]);
430      break;      break;
431    
432      case OP_RREF:      case OP_RREF:
# Line 419  for(;;) Line 481  for(;;)
481      fprintf(f, " %s ", flag);      fprintf(f, " %s ", flag);
482      if (*code >= OP_TYPESTAR)      if (*code >= OP_TYPESTAR)
483        {        {
       fprintf(f, "%s", OP_names[code[1]]);  
484        if (code[1] == OP_PROP || code[1] == OP_NOTPROP)        if (code[1] == OP_PROP || code[1] == OP_NOTPROP)
485          {          {
486          fprintf(f, " %s ", get_ucpname(code[2], code[3]));          print_prop(f, code + 1, "", " ");
487          extra = 2;          extra = 2;
488          }          }
489          else fprintf(f, "%s", priv_OP_names[code[1]]);
490        }        }
491      else extra = print_char(f, code+1, utf);      else extra = print_char(f, code+1, utf);
492      fprintf(f, "%s", OP_names[*code]);      fprintf(f, "%s", priv_OP_names[*code]);
493      break;      break;
494    
495      case OP_EXACTI:      case OP_EXACTI:
# Line 453  for(;;) Line 515  for(;;)
515      case OP_TYPEUPTO:      case OP_TYPEUPTO:
516      case OP_TYPEMINUPTO:      case OP_TYPEMINUPTO:
517      case OP_TYPEPOSUPTO:      case OP_TYPEPOSUPTO:
     fprintf(f, "    %s", OP_names[code[1 + IMM2_SIZE]]);  
518      if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)      if (code[1 + IMM2_SIZE] == OP_PROP || code[1 + IMM2_SIZE] == OP_NOTPROP)
519        {        {
520        fprintf(f, " %s ", get_ucpname(code[1 + IMM2_SIZE + 1],        print_prop(f, code + IMM2_SIZE + 1, "    ", " ");
         code[1 + IMM2_SIZE + 2]));  
521        extra = 2;        extra = 2;
522        }        }
523        else fprintf(f, "    %s", priv_OP_names[code[1 + IMM2_SIZE]]);
524      fprintf(f, "{");      fprintf(f, "{");
525      if (*code != OP_TYPEEXACT) fprintf(f, "0,");      if (*code != OP_TYPEEXACT) fprintf(f, "0,");
526      fprintf(f, "%d}", GET2(code,1));      fprintf(f, "%d}", GET2(code,1));
# Line 471  for(;;) Line 532  for(;;)
532      flag = "/i";      flag = "/i";
533      /* Fall through */      /* Fall through */
534      case OP_NOT:      case OP_NOT:
535      c = code[1];      fprintf(f, " %s [^", flag);
536      if (PRINTABLE(c)) fprintf(f, " %s [^%c]", flag, c);      extra = print_char(f, code + 1, utf);
537      else if (utf || c > 0xff)      fprintf(f, "]");
       fprintf(f, " %s [^\\x{%02x}]", flag, c);  
     else  
       fprintf(f, " %s [^\\x%02x]", flag, c);  
538      break;      break;
539    
540      case OP_NOTSTARI:      case OP_NOTSTARI:
# Line 500  for(;;) Line 558  for(;;)
558      case OP_NOTQUERY:      case OP_NOTQUERY:
559      case OP_NOTMINQUERY:      case OP_NOTMINQUERY:
560      case OP_NOTPOSQUERY:      case OP_NOTPOSQUERY:
561      c = code[1];      fprintf(f, " %s [^", flag);
562      if (PRINTABLE(c)) fprintf(f, " %s [^%c]", flag, c);      extra = print_char(f, code + 1, utf);
563        else fprintf(f, " %s [^\\x%02x]", flag, c);      fprintf(f, "]%s", priv_OP_names[*code]);
     fprintf(f, "%s", OP_names[*code]);  
564      break;      break;
565    
566      case OP_NOTEXACTI:      case OP_NOTEXACTI:
# Line 517  for(;;) Line 574  for(;;)
574      case OP_NOTUPTO:      case OP_NOTUPTO:
575      case OP_NOTMINUPTO:      case OP_NOTMINUPTO:
576      case OP_NOTPOSUPTO:      case OP_NOTPOSUPTO:
577      c = code[1 + IMM2_SIZE];      fprintf(f, " %s [^", flag);
578      if (PRINTABLE(c)) fprintf(f, " %s [^%c]{", flag, c);      extra = print_char(f, code + 1 + IMM2_SIZE, utf);
579        else fprintf(f, " %s [^\\x%02x]{", flag, c);      fprintf(f, "]{");
580      if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,");      if (*code != OP_NOTEXACT && *code != OP_NOTEXACTI) fprintf(f, "0,");
581      fprintf(f, "%d}", GET2(code,1));      fprintf(f, "%d}", GET2(code,1));
582      if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?");      if (*code == OP_NOTMINUPTO || *code == OP_NOTMINUPTOI) fprintf(f, "?");
# Line 530  for(;;) Line 587  for(;;)
587      case OP_RECURSE:      case OP_RECURSE:
588      if (print_lengths) fprintf(f, "%3d ", GET(code, 1));      if (print_lengths) fprintf(f, "%3d ", GET(code, 1));
589        else fprintf(f, "    ");        else fprintf(f, "    ");
590      fprintf(f, "%s", OP_names[*code]);      fprintf(f, "%s", priv_OP_names[*code]);
591      break;      break;
592    
593      case OP_REFI:      case OP_REFI:
# Line 538  for(;;) Line 595  for(;;)
595      /* Fall through */      /* Fall through */
596      case OP_REF:      case OP_REF:
597      fprintf(f, " %s \\%d", flag, GET2(code,1));      fprintf(f, " %s \\%d", flag, GET2(code,1));
598      ccode = code + PRIV(OP_lengths)[*code];      ccode = code + priv_OP_lengths[*code];
599      goto CLASS_REF_REPEAT;      goto CLASS_REF_REPEAT;
600    
601      case OP_CALLOUT:      case OP_CALLOUT:
602      fprintf(f, "    %s %d %d %d", OP_names[*code], code[1], GET(code,2),      fprintf(f, "    %s %d %d %d", priv_OP_names[*code], code[1], GET(code,2),
603        GET(code, 2 + LINK_SIZE));        GET(code, 2 + LINK_SIZE));
604      break;      break;
605    
606      case OP_PROP:      case OP_PROP:
607      case OP_NOTPROP:      case OP_NOTPROP:
608      fprintf(f, "    %s %s", OP_names[*code], get_ucpname(code[1], code[2]));      print_prop(f, code, "    ", "");
609      break;      break;
610    
611      /* OP_XCLASS can only occur in UTF or PCRE16 modes. However, there's no      /* OP_XCLASS can only occur in UTF or PCRE16 modes. However, there's no
# Line 627  for(;;) Line 684  for(;;)
684              }              }
685            else            else
686              {              {
687              ccode += 1 + print_char(f, ccode, TRUE);              ccode += 1 + print_char(f, ccode, utf);
688              if (ch == XCL_RANGE)              if (ch == XCL_RANGE)
689                {                {
690                fprintf(f, "-");                fprintf(f, "-");
691                ccode += 1 + print_char(f, ccode, TRUE);                ccode += 1 + print_char(f, ccode, utf);
692                }                }
693              }              }
694            }            }
# Line 652  for(;;) Line 709  for(;;)
709          case OP_CRMINPLUS:          case OP_CRMINPLUS:
710          case OP_CRQUERY:          case OP_CRQUERY:
711          case OP_CRMINQUERY:          case OP_CRMINQUERY:
712          fprintf(f, "%s", OP_names[*ccode]);          fprintf(f, "%s", priv_OP_names[*ccode]);
713          extra += PRIV(OP_lengths)[*ccode];          extra += priv_OP_lengths[*ccode];
714          break;          break;
715    
716          case OP_CRRANGE:          case OP_CRRANGE:
# Line 663  for(;;) Line 720  for(;;)
720          if (max == 0) fprintf(f, "{%d,}", min);          if (max == 0) fprintf(f, "{%d,}", min);
721          else fprintf(f, "{%d,%d}", min, max);          else fprintf(f, "{%d,%d}", min, max);
722          if (*ccode == OP_CRMINRANGE) fprintf(f, "?");          if (*ccode == OP_CRMINRANGE) fprintf(f, "?");
723          extra += PRIV(OP_lengths)[*ccode];          extra += priv_OP_lengths[*ccode];
724          break;          break;
725    
726          /* Do nothing if it's not a repeat; this code stops picky compilers          /* Do nothing if it's not a repeat; this code stops picky compilers
# Line 679  for(;;) Line 736  for(;;)
736      case OP_PRUNE_ARG:      case OP_PRUNE_ARG:
737      case OP_SKIP_ARG:      case OP_SKIP_ARG:
738      case OP_THEN_ARG:      case OP_THEN_ARG:
739      fprintf(f, "    %s ", OP_names[*code]);      fprintf(f, "    %s ", priv_OP_names[*code]);
740      print_puchar(f, code + 2);      print_puchar(f, code + 2);
741      extra += code[1];      extra += code[1];
742      break;      break;
743    
744      case OP_THEN:      case OP_THEN:
745      fprintf(f, "    %s", OP_names[*code]);      fprintf(f, "    %s", priv_OP_names[*code]);
746      break;      break;
747    
748      case OP_CIRCM:      case OP_CIRCM:
# Line 696  for(;;) Line 753  for(;;)
753      /* Anything else is just an item with no data, but possibly a flag. */      /* Anything else is just an item with no data, but possibly a flag. */
754    
755      default:      default:
756      fprintf(f, " %s %s", flag, OP_names[*code]);      fprintf(f, " %s %s", flag, priv_OP_names[*code]);
757      break;      break;
758      }      }
759    
760    code += PRIV(OP_lengths)[*code] + extra;    code += priv_OP_lengths[*code] + extra;
761    fprintf(f, "\n");    fprintf(f, "\n");
762    }    }
763  }  }

Legend:
Removed from v.810  
changed lines
  Added in v.1094

  ViewVC Help
Powered by ViewVC 1.1.5