/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 93 by nigel, Sat Feb 24 21:41:42 2007 UTC revision 119 by ph10, Mon Mar 12 09:29:45 2007 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 82  are simple data values; negative values Line 82  are simple data values; negative values
82  on. Zero means further processing is needed (for things like \x), or the escape  on. Zero means further processing is needed (for things like \x), or the escape
83  is invalid. */  is invalid. */
84    
85  #if !EBCDIC   /* This is the "normal" table for ASCII systems */  #ifndef EBCDIC  /* This is the "normal" table for ASCII systems */
86  static const short int escapes[] = {  static const short int escapes[] = {
87       0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */       0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */
88       0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */       0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */
# Line 96  static const short int escapes[] = { Line 96  static const short int escapes[] = {
96       0,      0, -ESC_z                                            /* x - z */       0,      0, -ESC_z                                            /* x - z */
97  };  };
98    
99  #else         /* This is the "abnormal" table for EBCDIC systems */  #else           /* This is the "abnormal" table for EBCDIC systems */
100  static const short int escapes[] = {  static const short int escapes[] = {
101  /*  48 */     0,     0,      0,     '.',    '<',   '(',    '+',    '|',  /*  48 */     0,     0,      0,     '.',    '<',   '(',    '+',    '|',
102  /*  50 */   '&',     0,      0,       0,      0,     0,      0,      0,  /*  50 */   '&',     0,      0,       0,      0,     0,      0,      0,
# Line 262  For convenience, we use the same bit def Line 262  For convenience, we use the same bit def
262    
263  Then we can use ctype_digit and ctype_xdigit in the code. */  Then we can use ctype_digit and ctype_xdigit in the code. */
264    
265  #if !EBCDIC    /* This is the "normal" case, for ASCII systems */  #ifndef EBCDIC  /* This is the "normal" case, for ASCII systems */
266  static const unsigned char digitab[] =  static const unsigned char digitab[] =
267    {    {
268    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
# Line 298  static const unsigned char digitab[] = Line 298  static const unsigned char digitab[] =
298    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
299    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
300    
301  #else          /* This is the "abnormal" case, for EBCDIC systems */  #else           /* This is the "abnormal" case, for EBCDIC systems */
302  static const unsigned char digitab[] =  static const unsigned char digitab[] =
303    {    {
304    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7  0 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7  0 */
# Line 312  static const unsigned char digitab[] = Line 312  static const unsigned char digitab[] =
312    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*    - 71 40 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*    - 71 40 */
313    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  72- |     */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  72- |     */
314    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  & - 87 50 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  & - 87 50 */
315    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  88-     */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  88- 95    */
316    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  - -103 60 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  - -103 60 */
317    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 104- ?     */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 104- ?     */
318    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 70 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 70 */
# Line 346  static const unsigned char ebcdic_charta Line 346  static const unsigned char ebcdic_charta
346    0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*    - 71 */    0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*    - 71 */
347    0x00,0x00,0x00,0x80,0x00,0x80,0x80,0x80, /*  72- |  */    0x00,0x00,0x00,0x80,0x00,0x80,0x80,0x80, /*  72- |  */
348    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  & - 87 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  & - 87 */
349    0x00,0x00,0x00,0x80,0x80,0x80,0x00,0x00, /*  88-  */    0x00,0x00,0x00,0x80,0x80,0x80,0x00,0x00, /*  88- 95 */
350    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  - -103 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  - -103 */
351    0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x80, /* 104- ?  */    0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x80, /* 104- ?  */
352    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 */
# Line 421  if (c == 0) *errorcodeptr = ERR1; Line 421  if (c == 0) *errorcodeptr = ERR1;
421  a table. A non-zero result is something that can be returned immediately.  a table. A non-zero result is something that can be returned immediately.
422  Otherwise further processing may be required. */  Otherwise further processing may be required. */
423    
424  #if !EBCDIC    /* ASCII coding */  #ifndef EBCDIC  /* ASCII coding */
425  else if (c < '0' || c > 'z') {}                           /* Not alphameric */  else if (c < '0' || c > 'z') {}                           /* Not alphameric */
426  else if ((i = escapes[c - '0']) != 0) c = i;  else if ((i = escapes[c - '0']) != 0) c = i;
427    
428  #else          /* EBCDIC coding */  #else           /* EBCDIC coding */
429  else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphameric */  else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphameric */
430  else if ((i = escapes[c - 0x48]) != 0)  c = i;  else if ((i = escapes[c - 0x48]) != 0)  c = i;
431  #endif  #endif
# Line 562  else Line 562  else
562          if (c == 0 && cc == '0') continue;     /* Leading zeroes */          if (c == 0 && cc == '0') continue;     /* Leading zeroes */
563          count++;          count++;
564    
565  #if !EBCDIC    /* ASCII coding */  #ifndef EBCDIC  /* ASCII coding */
566          if (cc >= 'a') cc -= 32;               /* Convert to upper case */          if (cc >= 'a') cc -= 32;               /* Convert to upper case */
567          c = (c << 4) + cc - ((cc < 'A')? '0' : ('A' - 10));          c = (c << 4) + cc - ((cc < 'A')? '0' : ('A' - 10));
568  #else          /* EBCDIC coding */  #else           /* EBCDIC coding */
569          if (cc >= 'a' && cc <= 'z') cc += 64;  /* Convert to upper case */          if (cc >= 'a' && cc <= 'z') cc += 64;  /* Convert to upper case */
570          c = (c << 4) + cc - ((cc >= '0')? '0' : ('A' - 10));          c = (c << 4) + cc - ((cc >= '0')? '0' : ('A' - 10));
571  #endif  #endif
# Line 589  else Line 589  else
589        {        {
590        int cc;                               /* Some compilers don't like ++ */        int cc;                               /* Some compilers don't like ++ */
591        cc = *(++ptr);                        /* in initializers */        cc = *(++ptr);                        /* in initializers */
592  #if !EBCDIC    /* ASCII coding */  #ifndef EBCDIC  /* ASCII coding */
593        if (cc >= 'a') cc -= 32;              /* Convert to upper case */        if (cc >= 'a') cc -= 32;              /* Convert to upper case */
594        c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10));        c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10));
595  #else          /* EBCDIC coding */  #else           /* EBCDIC coding */
596        if (cc <= 'z') cc += 64;              /* Convert to upper case */        if (cc <= 'z') cc += 64;              /* Convert to upper case */
597        c = c * 16 + cc - ((cc >= '0')? '0' : ('A' - 10));        c = c * 16 + cc - ((cc >= '0')? '0' : ('A' - 10));
598  #endif  #endif
# Line 611  else Line 611  else
611        return 0;        return 0;
612        }        }
613    
614  #if !EBCDIC    /* ASCII coding */  #ifndef EBCDIC  /* ASCII coding */
615      if (c >= 'a' && c <= 'z') c -= 32;      if (c >= 'a' && c <= 'z') c -= 32;
616      c ^= 0x40;      c ^= 0x40;
617  #else          /* EBCDIC coding */  #else           /* EBCDIC coding */
618      if (c >= 'a' && c <= 'z') c += 64;      if (c >= 'a' && c <= 'z') c += 64;
619      c ^= 0xC0;      c ^= 0xC0;
620  #endif  #endif
# Line 1246  for (;;) Line 1246  for (;;)
1246    else    else
1247      {      {
1248      code += _pcre_OP_lengths[c];      code += _pcre_OP_lengths[c];
1249    #ifdef SUPPORT_UTF8
1250      if (utf8) switch(c)      if (utf8) switch(c)
1251        {        {
1252        case OP_CHAR:        case OP_CHAR:
# Line 1266  for (;;) Line 1267  for (;;)
1267        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
1268        break;        break;
1269        }        }
1270    #endif
1271      }      }
1272    }    }
1273  }  }
# Line 1309  for (;;) Line 1311  for (;;)
1311    else    else
1312      {      {
1313      code += _pcre_OP_lengths[c];      code += _pcre_OP_lengths[c];
1314    #ifdef SUPPORT_UTF8
1315      if (utf8) switch(c)      if (utf8) switch(c)
1316        {        {
1317        case OP_CHAR:        case OP_CHAR:
# Line 1329  for (;;) Line 1332  for (;;)
1332        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
1333        break;        break;
1334        }        }
1335    #endif
1336      }      }
1337    }    }
1338  }  }
# Line 5096  if (errorcodeptr != NULL) *errorcodeptr Line 5100  if (errorcodeptr != NULL) *errorcodeptr
5100  if (erroroffset == NULL)  if (erroroffset == NULL)
5101    {    {
5102    errorcode = ERR16;    errorcode = ERR16;
5103    goto PCRE_EARLY_ERROR_RETURN;    goto PCRE_EARLY_ERROR_RETURN2;
5104    }    }
5105    
5106  *erroroffset = 0;  *erroroffset = 0;
# Line 5109  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 5113  if (utf8 && (options & PCRE_NO_UTF8_CHEC
5113       (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)       (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)
5114    {    {
5115    errorcode = ERR44;    errorcode = ERR44;
5116    goto PCRE_UTF8_ERROR_RETURN;    goto PCRE_EARLY_ERROR_RETURN2;
5117    }    }
5118  #else  #else
5119  if ((options & PCRE_UTF8) != 0)  if ((options & PCRE_UTF8) != 0)
# Line 5321  if (errorcode != 0) Line 5325  if (errorcode != 0)
5325    (pcre_free)(re);    (pcre_free)(re);
5326    PCRE_EARLY_ERROR_RETURN:    PCRE_EARLY_ERROR_RETURN:
5327    *erroroffset = ptr - (const uschar *)pattern;    *erroroffset = ptr - (const uschar *)pattern;
5328  #ifdef SUPPORT_UTF8    PCRE_EARLY_ERROR_RETURN2:
   PCRE_UTF8_ERROR_RETURN:  
 #endif  
5329    *errorptr = error_texts[errorcode];    *errorptr = error_texts[errorcode];
5330    if (errorcodeptr != NULL) *errorcodeptr = errorcode;    if (errorcodeptr != NULL) *errorcodeptr = errorcode;
5331    return NULL;    return NULL;
# Line 5413  if ((re->options & PCRE_REQCHSET) != 0) Line 5415  if ((re->options & PCRE_REQCHSET) != 0)
5415      else printf("Req char = \\x%02x%s\n", ch, caseless);      else printf("Req char = \\x%02x%s\n", ch, caseless);
5416    }    }
5417    
5418  pcre_printint(re, stdout);  pcre_printint(re, stdout, TRUE);
5419    
5420  /* This check is done here in the debugging case so that the code that  /* This check is done here in the debugging case so that the code that
5421  was compiled can be seen. */  was compiled can be seen. */

Legend:
Removed from v.93  
changed lines
  Added in v.119

  ViewVC Help
Powered by ViewVC 1.1.5