/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 93 by nigel, Sat Feb 24 21:41:42 2007 UTC revision 149 by ph10, Mon Apr 16 15:28:08 2007 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2006 University of Cambridge             Copyright (c) 1997-2007 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 82  are simple data values; negative values Line 82  are simple data values; negative values
82  on. Zero means further processing is needed (for things like \x), or the escape  on. Zero means further processing is needed (for things like \x), or the escape
83  is invalid. */  is invalid. */
84    
85  #if !EBCDIC   /* This is the "normal" table for ASCII systems */  #ifndef EBCDIC  /* This is the "normal" table for ASCII systems */
86  static const short int escapes[] = {  static const short int escapes[] = {
87       0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */       0,      0,      0,      0,      0,      0,      0,      0,   /* 0 - 7 */
88       0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */       0,      0,    ':',    ';',    '<',    '=',    '>',    '?',   /* 8 - ? */
# Line 96  static const short int escapes[] = { Line 96  static const short int escapes[] = {
96       0,      0, -ESC_z                                            /* x - z */       0,      0, -ESC_z                                            /* x - z */
97  };  };
98    
99  #else         /* This is the "abnormal" table for EBCDIC systems */  #else           /* This is the "abnormal" table for EBCDIC systems */
100  static const short int escapes[] = {  static const short int escapes[] = {
101  /*  48 */     0,     0,      0,     '.',    '<',   '(',    '+',    '|',  /*  48 */     0,     0,      0,     '.',    '<',   '(',    '+',    '|',
102  /*  50 */   '&',     0,      0,       0,      0,     0,      0,      0,  /*  50 */   '&',     0,      0,       0,      0,     0,      0,      0,
# Line 262  For convenience, we use the same bit def Line 262  For convenience, we use the same bit def
262    
263  Then we can use ctype_digit and ctype_xdigit in the code. */  Then we can use ctype_digit and ctype_xdigit in the code. */
264    
265  #if !EBCDIC    /* This is the "normal" case, for ASCII systems */  #ifndef EBCDIC  /* This is the "normal" case, for ASCII systems */
266  static const unsigned char digitab[] =  static const unsigned char digitab[] =
267    {    {
268    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7 */
# Line 298  static const unsigned char digitab[] = Line 298  static const unsigned char digitab[] =
298    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 240-247 */
299    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00};/* 248-255 */
300    
301  #else          /* This is the "abnormal" case, for EBCDIC systems */  #else           /* This is the "abnormal" case, for EBCDIC systems */
302  static const unsigned char digitab[] =  static const unsigned char digitab[] =
303    {    {
304    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7  0 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*   0-  7  0 */
# Line 312  static const unsigned char digitab[] = Line 312  static const unsigned char digitab[] =
312    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*    - 71 40 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*    - 71 40 */
313    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  72- |     */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  72- |     */
314    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  & - 87 50 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  & - 87 50 */
315    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  88-     */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  88- 95    */
316    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  - -103 60 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  - -103 60 */
317    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 104- ?     */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 104- ?     */
318    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 70 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 70 */
# Line 346  static const unsigned char ebcdic_charta Line 346  static const unsigned char ebcdic_charta
346    0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*    - 71 */    0x01,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*    - 71 */
347    0x00,0x00,0x00,0x80,0x00,0x80,0x80,0x80, /*  72- |  */    0x00,0x00,0x00,0x80,0x00,0x80,0x80,0x80, /*  72- |  */
348    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  & - 87 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  & - 87 */
349    0x00,0x00,0x00,0x80,0x80,0x80,0x00,0x00, /*  88-  */    0x00,0x00,0x00,0x80,0x80,0x80,0x00,0x00, /*  88- 95 */
350    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  - -103 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /*  - -103 */
351    0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x80, /* 104- ?  */    0x00,0x00,0x00,0x00,0x00,0x10,0x00,0x80, /* 104- ?  */
352    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 */    0x00,0x00,0x00,0x00,0x00,0x00,0x00,0x00, /* 112-119 */
# Line 421  if (c == 0) *errorcodeptr = ERR1; Line 421  if (c == 0) *errorcodeptr = ERR1;
421  a table. A non-zero result is something that can be returned immediately.  a table. A non-zero result is something that can be returned immediately.
422  Otherwise further processing may be required. */  Otherwise further processing may be required. */
423    
424  #if !EBCDIC    /* ASCII coding */  #ifndef EBCDIC  /* ASCII coding */
425  else if (c < '0' || c > 'z') {}                           /* Not alphameric */  else if (c < '0' || c > 'z') {}                           /* Not alphameric */
426  else if ((i = escapes[c - '0']) != 0) c = i;  else if ((i = escapes[c - '0']) != 0) c = i;
427    
428  #else          /* EBCDIC coding */  #else           /* EBCDIC coding */
429  else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphameric */  else if (c < 'a' || (ebcdic_chartab[c] & 0x0E) == 0) {}   /* Not alphameric */
430  else if ((i = escapes[c - 0x48]) != 0)  c = i;  else if ((i = escapes[c - 0x48]) != 0)  c = i;
431  #endif  #endif
# Line 562  else Line 562  else
562          if (c == 0 && cc == '0') continue;     /* Leading zeroes */          if (c == 0 && cc == '0') continue;     /* Leading zeroes */
563          count++;          count++;
564    
565  #if !EBCDIC    /* ASCII coding */  #ifndef EBCDIC  /* ASCII coding */
566          if (cc >= 'a') cc -= 32;               /* Convert to upper case */          if (cc >= 'a') cc -= 32;               /* Convert to upper case */
567          c = (c << 4) + cc - ((cc < 'A')? '0' : ('A' - 10));          c = (c << 4) + cc - ((cc < 'A')? '0' : ('A' - 10));
568  #else          /* EBCDIC coding */  #else           /* EBCDIC coding */
569          if (cc >= 'a' && cc <= 'z') cc += 64;  /* Convert to upper case */          if (cc >= 'a' && cc <= 'z') cc += 64;  /* Convert to upper case */
570          c = (c << 4) + cc - ((cc >= '0')? '0' : ('A' - 10));          c = (c << 4) + cc - ((cc >= '0')? '0' : ('A' - 10));
571  #endif  #endif
# Line 589  else Line 589  else
589        {        {
590        int cc;                               /* Some compilers don't like ++ */        int cc;                               /* Some compilers don't like ++ */
591        cc = *(++ptr);                        /* in initializers */        cc = *(++ptr);                        /* in initializers */
592  #if !EBCDIC    /* ASCII coding */  #ifndef EBCDIC  /* ASCII coding */
593        if (cc >= 'a') cc -= 32;              /* Convert to upper case */        if (cc >= 'a') cc -= 32;              /* Convert to upper case */
594        c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10));        c = c * 16 + cc - ((cc < 'A')? '0' : ('A' - 10));
595  #else          /* EBCDIC coding */  #else           /* EBCDIC coding */
596        if (cc <= 'z') cc += 64;              /* Convert to upper case */        if (cc <= 'z') cc += 64;              /* Convert to upper case */
597        c = c * 16 + cc - ((cc >= '0')? '0' : ('A' - 10));        c = c * 16 + cc - ((cc >= '0')? '0' : ('A' - 10));
598  #endif  #endif
# Line 611  else Line 611  else
611        return 0;        return 0;
612        }        }
613    
614  #if !EBCDIC    /* ASCII coding */  #ifndef EBCDIC  /* ASCII coding */
615      if (c >= 'a' && c <= 'z') c -= 32;      if (c >= 'a' && c <= 'z') c -= 32;
616      c ^= 0x40;      c ^= 0x40;
617  #else          /* EBCDIC coding */  #else           /* EBCDIC coding */
618      if (c >= 'a' && c <= 'z') c += 64;      if (c >= 'a' && c <= 'z') c += 64;
619      c ^= 0xC0;      c ^= 0xC0;
620  #endif  #endif
# Line 1246  for (;;) Line 1246  for (;;)
1246    else    else
1247      {      {
1248      code += _pcre_OP_lengths[c];      code += _pcre_OP_lengths[c];
1249    #ifdef SUPPORT_UTF8
1250      if (utf8) switch(c)      if (utf8) switch(c)
1251        {        {
1252        case OP_CHAR:        case OP_CHAR:
# Line 1266  for (;;) Line 1267  for (;;)
1267        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
1268        break;        break;
1269        }        }
1270    #endif
1271      }      }
1272    }    }
1273  }  }
# Line 1309  for (;;) Line 1311  for (;;)
1311    else    else
1312      {      {
1313      code += _pcre_OP_lengths[c];      code += _pcre_OP_lengths[c];
1314    #ifdef SUPPORT_UTF8
1315      if (utf8) switch(c)      if (utf8) switch(c)
1316        {        {
1317        case OP_CHAR:        case OP_CHAR:
# Line 1329  for (;;) Line 1332  for (;;)
1332        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];        if (code[-1] >= 0xc0) code += _pcre_utf8_table4[code[-1] & 0x3f];
1333        break;        break;
1334        }        }
1335    #endif
1336      }      }
1337    }    }
1338  }  }
# Line 5039  Returns:        pointer to compiled data Line 5043  Returns:        pointer to compiled data
5043                  with errorptr and erroroffset set                  with errorptr and erroroffset set
5044  */  */
5045    
5046  PCRE_DATA_SCOPE pcre *  PCRE_EXP_DEFN pcre *
5047  pcre_compile(const char *pattern, int options, const char **errorptr,  pcre_compile(const char *pattern, int options, const char **errorptr,
5048    int *erroroffset, const unsigned char *tables)    int *erroroffset, const unsigned char *tables)
5049  {  {
# Line 5047  return pcre_compile2(pattern, options, N Line 5051  return pcre_compile2(pattern, options, N
5051  }  }
5052    
5053    
5054  PCRE_DATA_SCOPE pcre *  PCRE_EXP_DEFN pcre *
5055  pcre_compile2(const char *pattern, int options, int *errorcodeptr,  pcre_compile2(const char *pattern, int options, int *errorcodeptr,
5056    const char **errorptr, int *erroroffset, const unsigned char *tables)    const char **errorptr, int *erroroffset, const unsigned char *tables)
5057  {  {
# Line 5096  if (errorcodeptr != NULL) *errorcodeptr Line 5100  if (errorcodeptr != NULL) *errorcodeptr
5100  if (erroroffset == NULL)  if (erroroffset == NULL)
5101    {    {
5102    errorcode = ERR16;    errorcode = ERR16;
5103    goto PCRE_EARLY_ERROR_RETURN;    goto PCRE_EARLY_ERROR_RETURN2;
5104    }    }
5105    
5106  *erroroffset = 0;  *erroroffset = 0;
# Line 5109  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 5113  if (utf8 && (options & PCRE_NO_UTF8_CHEC
5113       (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)       (*erroroffset = _pcre_valid_utf8((uschar *)pattern, -1)) >= 0)
5114    {    {
5115    errorcode = ERR44;    errorcode = ERR44;
5116    goto PCRE_UTF8_ERROR_RETURN;    goto PCRE_EARLY_ERROR_RETURN2;
5117    }    }
5118  #else  #else
5119  if ((options & PCRE_UTF8) != 0)  if ((options & PCRE_UTF8) != 0)
# Line 5134  cd->cbits = tables + cbits_offset; Line 5138  cd->cbits = tables + cbits_offset;
5138  cd->ctypes = tables + ctypes_offset;  cd->ctypes = tables + ctypes_offset;
5139    
5140  /* Handle different types of newline. The three bits give seven cases. The  /* Handle different types of newline. The three bits give seven cases. The
5141  current code allows for fixed one- or two-byte sequences, plus "any". */  current code allows for fixed one- or two-byte sequences, plus "any" and
5142    "anycrlf". */
5143    
5144  switch (options & (PCRE_NEWLINE_CRLF | PCRE_NEWLINE_ANY))  switch (options & (PCRE_NEWLINE_CRLF | PCRE_NEWLINE_ANY))
5145    {    {
# Line 5144  switch (options & (PCRE_NEWLINE_CRLF | P Line 5149  switch (options & (PCRE_NEWLINE_CRLF | P
5149    case PCRE_NEWLINE_CR+    case PCRE_NEWLINE_CR+
5150         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;         PCRE_NEWLINE_LF: newline = ('\r' << 8) | '\n'; break;
5151    case PCRE_NEWLINE_ANY: newline = -1; break;    case PCRE_NEWLINE_ANY: newline = -1; break;
5152      case PCRE_NEWLINE_ANYCRLF: newline = -2; break;
5153    default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;    default: errorcode = ERR56; goto PCRE_EARLY_ERROR_RETURN;
5154    }    }
5155    
5156  if (newline < 0)  if (newline == -2)
5157      {
5158      cd->nltype = NLTYPE_ANYCRLF;
5159      }
5160    else if (newline < 0)
5161    {    {
5162    cd->nltype = NLTYPE_ANY;    cd->nltype = NLTYPE_ANY;
5163    }    }
# Line 5321  if (errorcode != 0) Line 5331  if (errorcode != 0)
5331    (pcre_free)(re);    (pcre_free)(re);
5332    PCRE_EARLY_ERROR_RETURN:    PCRE_EARLY_ERROR_RETURN:
5333    *erroroffset = ptr - (const uschar *)pattern;    *erroroffset = ptr - (const uschar *)pattern;
5334  #ifdef SUPPORT_UTF8    PCRE_EARLY_ERROR_RETURN2:
   PCRE_UTF8_ERROR_RETURN:  
 #endif  
5335    *errorptr = error_texts[errorcode];    *errorptr = error_texts[errorcode];
5336    if (errorcodeptr != NULL) *errorcodeptr = errorcode;    if (errorcodeptr != NULL) *errorcodeptr = errorcode;
5337    return NULL;    return NULL;
# Line 5413  if ((re->options & PCRE_REQCHSET) != 0) Line 5421  if ((re->options & PCRE_REQCHSET) != 0)
5421      else printf("Req char = \\x%02x%s\n", ch, caseless);      else printf("Req char = \\x%02x%s\n", ch, caseless);
5422    }    }
5423    
5424  pcre_printint(re, stdout);  pcre_printint(re, stdout, TRUE);
5425    
5426  /* This check is done here in the debugging case so that the code that  /* This check is done here in the debugging case so that the code that
5427  was compiled can be seen. */  was compiled can be seen. */

Legend:
Removed from v.93  
changed lines
  Added in v.149

  ViewVC Help
Powered by ViewVC 1.1.5