/[pcre]/code/trunk/pcreposix.c
ViewVC logotype

Diff of /code/trunk/pcreposix.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 3 by nigel, Sat Feb 24 21:38:01 2007 UTC revision 69 by nigel, Sat Feb 24 21:40:18 2007 UTC
# Line 12  functions. Line 12  functions.
12    
13  Written by: Philip Hazel <ph10@cam.ac.uk>  Written by: Philip Hazel <ph10@cam.ac.uk>
14    
15             Copyright (c) 1997 University of Cambridge             Copyright (c) 1997-2003 University of Cambridge
16    
17  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
18  Permission is granted to anyone to use this software for any purpose on any  Permission is granted to anyone to use this software for any purpose on any
# Line 28  restrictions: Line 28  restrictions:
28    
29  3. Altered versions must be plainly marked as such, and must not be  3. Altered versions must be plainly marked as such, and must not be
30     misrepresented as being the original software.     misrepresented as being the original software.
31    
32    4. If PCRE is embedded in any software that is released under the GNU
33       General Purpose Licence (GPL), then the terms of that licence shall
34       supersede any condition above with which it is incompatible.
35  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
36  */  */
37    
# Line 39  restrictions: Line 43  restrictions:
43    
44  /* Corresponding tables of PCRE error messages and POSIX error codes. */  /* Corresponding tables of PCRE error messages and POSIX error codes. */
45    
46  static char *estring[] = {  static const char *estring[] = {
47    ERR1,  ERR2,  ERR3,  ERR4,  ERR5,  ERR6,  ERR7,  ERR8,  ERR9,  ERR10,    ERR1,  ERR2,  ERR3,  ERR4,  ERR5,  ERR6,  ERR7,  ERR8,  ERR9,  ERR10,
48    ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, ERR20,    ERR11, ERR12, ERR13, ERR14, ERR15, ERR16, ERR17, ERR18, ERR19, ERR20,
49    ERR21, ERR22, ERR23 };    ERR21, ERR22, ERR23, ERR24, ERR25, ERR26, ERR27, ERR29, ERR29, ERR30,
50      ERR31, ERR32, ERR33, ERR34, ERR35, ERR36, ERR37, ERR38, ERR39, ERR40,
51      ERR41, ERR42, ERR43 };
52    
53  static int eint[] = {  static int eint[] = {
54    REG_EESCAPE, /* "\\ at end of pattern" */    REG_EESCAPE, /* "\\ at end of pattern" */
# Line 57  static int eint[] = { Line 63  static int eint[] = {
63    REG_BADRPT,  /* "operand of unlimited repeat could match the empty string" */    REG_BADRPT,  /* "operand of unlimited repeat could match the empty string" */
64    REG_ASSERT,  /* "internal error: unexpected repeat" */    REG_ASSERT,  /* "internal error: unexpected repeat" */
65    REG_BADPAT,  /* "unrecognized character after (?" */    REG_BADPAT,  /* "unrecognized character after (?" */
66    REG_ESIZE,   /* "too many capturing parenthesized sub-patterns" */    REG_BADPAT,  /* "POSIX named classes are supported only within a class" */
67    REG_EPAREN,  /* "missing )" */    REG_EPAREN,  /* "missing )" */
68    REG_ESUBREG, /* "back reference to non-existent subpattern" */    REG_ESUBREG, /* "reference to non-existent subpattern" */
69    REG_INVARG,  /* "erroffset passed as NULL" */    REG_INVARG,  /* "erroffset passed as NULL" */
70    REG_INVARG,  /* "unknown option bit(s) set" */    REG_INVARG,  /* "unknown option bit(s) set" */
71    REG_EPAREN,  /* "missing ) after comment" */    REG_EPAREN,  /* "missing ) after comment" */
72    REG_ESIZE,   /* "too many sets of parentheses" */    REG_ESIZE,   /* "parentheses nested too deeply" */
73    REG_ESIZE,   /* "regular expression too large" */    REG_ESIZE,   /* "regular expression too large" */
74    REG_ESPACE,  /* "failed to get memory" */    REG_ESPACE,  /* "failed to get memory" */
75    REG_EPAREN,  /* "unmatched brackets" */    REG_EPAREN,  /* "unmatched brackets" */
76    REG_ASSERT   /* "internal error: code overflow" */    REG_ASSERT,  /* "internal error: code overflow" */
77      REG_BADPAT,  /* "unrecognized character after (?<" */
78      REG_BADPAT,  /* "lookbehind assertion is not fixed length" */
79      REG_BADPAT,  /* "malformed number after (?(" */
80      REG_BADPAT,  /* "conditional group containe more than two branches" */
81      REG_BADPAT,  /* "assertion expected after (?(" */
82      REG_BADPAT,  /* "(?R or (?digits must be followed by )" */
83      REG_ECTYPE,  /* "unknown POSIX class name" */
84      REG_BADPAT,  /* "POSIX collating elements are not supported" */
85      REG_INVARG,  /* "this version of PCRE is not compiled with PCRE_UTF8 support" */
86      REG_BADPAT,  /* "spare error" */
87      REG_BADPAT,  /* "character value in \x{...} sequence is too large" */
88      REG_BADPAT,  /* "invalid condition (?(0)" */
89      REG_BADPAT,  /* "\\C not allowed in lookbehind assertion" */
90      REG_EESCAPE, /* "PCRE does not support \\L, \\l, \\N, \\P, \\p, \\U, \\u, or \\X" */
91      REG_BADPAT,  /* "number after (?C is > 255" */
92      REG_BADPAT,  /* "closing ) for (?C expected" */
93      REG_BADPAT,  /* "recursive call could loop indefinitely" */
94      REG_BADPAT,  /* "unrecognized character after (?P" */
95      REG_BADPAT,  /* "syntax error after (?P" */
96      REG_BADPAT   /* "two named groups have the same name" */
97  };  };
98    
99  /* Table of texts corresponding to POSIX error codes */  /* Table of texts corresponding to POSIX error codes */
100    
101  static char *pstring[] = {  static const char *pstring[] = {
102    "",                                /* Dummy for value 0 */    "",                                /* Dummy for value 0 */
103    "internal error",                  /* REG_ASSERT */    "internal error",                  /* REG_ASSERT */
104    "invalid repeat counts in {}",     /* BADBR      */    "invalid repeat counts in {}",     /* BADBR      */
# Line 106  look them up in a table to turn them int Line 132  look them up in a table to turn them int
132  static int  static int
133  pcre_posix_error_code(const char *s)  pcre_posix_error_code(const char *s)
134  {  {
135  int i;  size_t i;
136  for (i = 0; i < sizeof(estring)/sizeof(char *); i++)  for (i = 0; i < sizeof(estring)/sizeof(char *); i++)
137    if (strcmp(s, estring[i]) == 0) return eint[i];    if (strcmp(s, estring[i]) == 0) return eint[i];
138  return REG_ASSERT;  return REG_ASSERT;
# Line 121  return REG_ASSERT; Line 147  return REG_ASSERT;
147  size_t  size_t
148  regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)  regerror(int errcode, const regex_t *preg, char *errbuf, size_t errbuf_size)
149  {  {
150  char *message, *addmessage;  const char *message, *addmessage;
151  int length, adlength;  size_t length, addlength;
152    
153  message = (errcode >= sizeof(pstring)/sizeof(char *))?  message = (errcode >= (int)(sizeof(pstring)/sizeof(char *)))?
154    "unknown error code" : pstring[errcode];    "unknown error code" : pstring[errcode];
155    length = strlen(message) + 1;
156    
157  length = (int)strlen(message) + 1;  addmessage = " at offset ";
158    addlength = (preg != NULL && (int)preg->re_erroffset != -1)?
159  if (preg != NULL && (int)preg->re_erroffset != -1)    strlen(addmessage) + 6 : 0;
   {  
   addmessage = " at offset ";  
   adlength = (int)strlen(addmessage) + 6;  
   }  
 else adlength = 0;  
160    
161  if (errbuf_size > 0)  if (errbuf_size > 0)
162    {    {
163    if (adlength > 0 && errbuf_size >= length + adlength)    if (addlength > 0 && errbuf_size >= length + addlength)
164      sprintf(errbuf, "%s%s%-6d", message, addmessage, preg->re_erroffset);      sprintf(errbuf, "%s%s%-6d", message, addmessage, (int)preg->re_erroffset);
165    else    else
166      {      {
167      strncpy(errbuf, message, errbuf_size - 1);      strncpy(errbuf, message, errbuf_size - 1);
# Line 147  if (errbuf_size > 0) Line 169  if (errbuf_size > 0)
169      }      }
170    }    }
171    
172  return length + adlength;  return length + addlength;
173  }  }
174    
175    
# Line 183  Returns:      0 on success Line 205  Returns:      0 on success
205  int  int
206  regcomp(regex_t *preg, const char *pattern, int cflags)  regcomp(regex_t *preg, const char *pattern, int cflags)
207  {  {
208  char *errorptr;  const char *errorptr;
209  int erroffset;  int erroffset;
210  int options = 0;  int options = 0;
211    
212  if ((cflags & REG_ICASE) != 0) options |= PCRE_CASELESS;  if ((cflags & REG_ICASE) != 0) options |= PCRE_CASELESS;
213  if ((cflags & REG_NEWLINE) != 0) options |= PCRE_MULTILINE;  if ((cflags & REG_NEWLINE) != 0) options |= PCRE_MULTILINE;
214    
215  preg->re_pcre = pcre_compile(pattern, options, &errorptr, &erroffset);  preg->re_pcre = pcre_compile(pattern, options, &errorptr, &erroffset, NULL);
216  preg->re_erroffset = erroffset;  preg->re_erroffset = erroffset;
217    
218  if (preg->re_pcre == NULL) return pcre_posix_error_code(errorptr);  if (preg->re_pcre == NULL) return pcre_posix_error_code(errorptr);
# Line 206  return 0; Line 228  return 0;
228  *              Match a regular expression        *  *              Match a regular expression        *
229  *************************************************/  *************************************************/
230    
231    /* Unfortunately, PCRE requires 3 ints of working space for each captured
232    substring, so we have to get and release working store instead of just using
233    the POSIX structures as was done in earlier releases when PCRE needed only 2
234    ints. However, if the number of possible capturing brackets is small, use a
235    block of store on the stack, to reduce the use of malloc/free. The threshold is
236    in a macro that can be changed at configure time. */
237    
238  int  int
239  regexec(regex_t *preg, const char *string, size_t nmatch,  regexec(const regex_t *preg, const char *string, size_t nmatch,
240    regmatch_t pmatch[], int eflags)    regmatch_t pmatch[], int eflags)
241  {  {
242  int rc;  int rc;
243  int options = 0;  int options = 0;
244    int *ovector = NULL;
245    int small_ovector[POSIX_MALLOC_THRESHOLD * 3];
246    BOOL allocated_ovector = FALSE;
247    
248  if ((eflags & REG_NOTBOL) != 0) options |= PCRE_NOTBOL;  if ((eflags & REG_NOTBOL) != 0) options |= PCRE_NOTBOL;
249  if ((eflags & REG_NOTEOL) != 0) options |= PCRE_NOTEOL;  if ((eflags & REG_NOTEOL) != 0) options |= PCRE_NOTEOL;
250    
251  preg->re_erroffset = -1;   /* Only has meaning after compile */  ((regex_t *)preg)->re_erroffset = (size_t)(-1);  /* Only has meaning after compile */
252    
253  rc = pcre_exec(preg->re_pcre, NULL, string, (int)strlen(string), options,  if (nmatch > 0)
254    (int *)pmatch, nmatch * 2);    {
255      if (nmatch <= POSIX_MALLOC_THRESHOLD)
256        {
257        ovector = &(small_ovector[0]);
258        }
259      else
260        {
261        ovector = (int *)malloc(sizeof(int) * nmatch * 3);
262        if (ovector == NULL) return REG_ESPACE;
263        allocated_ovector = TRUE;
264        }
265      }
266    
267    rc = pcre_exec(preg->re_pcre, NULL, string, (int)strlen(string), 0, options,
268      ovector, nmatch * 3);
269    
270  if (rc == 0) return 0;    /* All pmatch were filled in */  if (rc == 0) rc = nmatch;    /* All captured slots were filled in */
271    
272  if (rc > 0)  if (rc >= 0)
273    {    {
274    int i;    size_t i;
275    for (i = rc; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1;    for (i = 0; i < (size_t)rc; i++)
276        {
277        pmatch[i].rm_so = ovector[i*2];
278        pmatch[i].rm_eo = ovector[i*2+1];
279        }
280      if (allocated_ovector) free(ovector);
281      for (; i < nmatch; i++) pmatch[i].rm_so = pmatch[i].rm_eo = -1;
282    return 0;    return 0;
283    }    }
284    
285  else switch(rc)  else
286    {    {
287    case PCRE_ERROR_NOMATCH: return REG_NOMATCH;    if (allocated_ovector) free(ovector);
288    case PCRE_ERROR_BADREF: return REG_ESUBREG;    switch(rc)
289    case PCRE_ERROR_NULL: return REG_INVARG;      {
290    case PCRE_ERROR_BADOPTION: return REG_INVARG;      case PCRE_ERROR_NOMATCH: return REG_NOMATCH;
291    case PCRE_ERROR_BADMAGIC: return REG_INVARG;      case PCRE_ERROR_NULL: return REG_INVARG;
292    case PCRE_ERROR_UNKNOWN_NODE: return REG_ASSERT;      case PCRE_ERROR_BADOPTION: return REG_INVARG;
293    case PCRE_ERROR_NOMEMORY: return REG_ESPACE;      case PCRE_ERROR_BADMAGIC: return REG_INVARG;
294    default: return REG_ASSERT;      case PCRE_ERROR_UNKNOWN_NODE: return REG_ASSERT;
295        case PCRE_ERROR_NOMEMORY: return REG_ESPACE;
296        default: return REG_ASSERT;
297        }
298    }    }
299  }  }
300    

Legend:
Removed from v.3  
changed lines
  Added in v.69

  ViewVC Help
Powered by ViewVC 1.1.5