/[pcre]/code/trunk/pcre_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1386 by zherczeg, Fri Oct 25 17:37:50 2013 UTC revision 1387 by ph10, Sat Nov 2 18:29:05 2013 UTC
# Line 264  static const int verbcount = sizeof(verb Line 264  static const int verbcount = sizeof(verb
264  now all in a single string, to reduce the number of relocations when a shared  now all in a single string, to reduce the number of relocations when a shared
265  library is dynamically loaded. The list of lengths is terminated by a zero  library is dynamically loaded. The list of lengths is terminated by a zero
266  length entry. The first three must be alpha, lower, upper, as this is assumed  length entry. The first three must be alpha, lower, upper, as this is assumed
267  for handling case independence. */  for handling case independence. The indices for graph, print, and punct are
268    needed, so identify them. */
269    
270  static const char posix_names[] =  static const char posix_names[] =
271    STRING_alpha0 STRING_lower0 STRING_upper0 STRING_alnum0    STRING_alpha0 STRING_lower0 STRING_upper0 STRING_alnum0
# Line 275  static const char posix_names[] = Line 276  static const char posix_names[] =
276  static const pcre_uint8 posix_name_lengths[] = {  static const pcre_uint8 posix_name_lengths[] = {
277    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };    5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 5, 4, 6, 0 };
278    
279    #define PC_GRAPH  8
280    #define PC_PRINT  9
281    #define PC_PUNCT 10
282    
283    
284  /* Table of class bit maps for each POSIX class. Each class is formed from a  /* Table of class bit maps for each POSIX class. Each class is formed from a
285  base map, with an optional addition or removal of another map. Then, for some  base map, with an optional addition or removal of another map. Then, for some
286  classes, there is some additional tweaking: for [:blank:] the vertical space  classes, there is some additional tweaking: for [:blank:] the vertical space
# Line 302  static const int posix_class_maps[] = { Line 308  static const int posix_class_maps[] = {
308    cbit_xdigit,-1,          0              /* xdigit */    cbit_xdigit,-1,          0              /* xdigit */
309  };  };
310    
311  /* Table of substitutes for \d etc when PCRE_UCP is set. The POSIX class  /* Table of substitutes for \d etc when PCRE_UCP is set. They are replaced by
312  substitutes must be in the order of the names, defined above, and there are  Unicode property escapes. */
 both positive and negative cases. NULL means no substitute. */  
313    
314  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
315  static const pcre_uchar string_PNd[]  = {  static const pcre_uchar string_PNd[]  = {
# Line 329  static const pcre_uchar string_pXwd[] = Line 334  static const pcre_uchar string_pXwd[] =
334  static const pcre_uchar *substitutes[] = {  static const pcre_uchar *substitutes[] = {
335    string_PNd,           /* \D */    string_PNd,           /* \D */
336    string_pNd,           /* \d */    string_pNd,           /* \d */
337    string_PXsp,          /* \S */       /* NOTE: Xsp is Perl space */    string_PXsp,          /* \S */   /* Xsp is Perl space, but from 8.34, Perl */
338    string_pXsp,          /* \s */    string_pXsp,          /* \s */   /* space and POSIX space are the same. */
339    string_PXwd,          /* \W */    string_PXwd,          /* \W */
340    string_pXwd           /* \w */    string_pXwd           /* \w */
341  };  };
342    
343    /* The POSIX class substitutes must be in the order of the POSIX class names,
344    defined above, and there are both positive and negative cases. NULL means no
345    general substitute of a Unicode property escape (\p or \P). However, for some
346    POSIX classes (e.g. graph, print, punct) a special property code is compiled
347    directly. */
348    
349  static const pcre_uchar string_pL[] =   {  static const pcre_uchar string_pL[] =   {
350    CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,    CHAR_BACKSLASH, CHAR_p, CHAR_LEFT_CURLY_BRACKET,
351    CHAR_L, CHAR_RIGHT_CURLY_BRACKET, '\0' };    CHAR_L, CHAR_RIGHT_CURLY_BRACKET, '\0' };
# Line 382  static const pcre_uchar *posix_substitut Line 393  static const pcre_uchar *posix_substitut
393    NULL,                 /* graph */    NULL,                 /* graph */
394    NULL,                 /* print */    NULL,                 /* print */
395    NULL,                 /* punct */    NULL,                 /* punct */
396    string_pXps,          /* space */    /* NOTE: Xps is POSIX space */    string_pXps,          /* space */   /* Xps is POSIX space, but from 8.34 */
397    string_pXwd,          /* word */    string_pXwd,          /* word  */   /* Perl and POSIX space are the same */
398    NULL,                 /* xdigit */    NULL,                 /* xdigit */
399    /* Negated cases */    /* Negated cases */
400    string_PL,            /* ^alpha */    string_PL,            /* ^alpha */
# Line 397  static const pcre_uchar *posix_substitut Line 408  static const pcre_uchar *posix_substitut
408    NULL,                 /* ^graph */    NULL,                 /* ^graph */
409    NULL,                 /* ^print */    NULL,                 /* ^print */
410    NULL,                 /* ^punct */    NULL,                 /* ^punct */
411    string_PXps,          /* ^space */   /* NOTE: Xps is POSIX space */    string_PXps,          /* ^space */  /* Xps is POSIX space, but from 8.34 */
412    string_PXwd,          /* ^word */    string_PXwd,          /* ^word */   /* Perl and POSIX space are the same */
413    NULL                  /* ^xdigit */    NULL                  /* ^xdigit */
414  };  };
415  #define POSIX_SUBSIZE (sizeof(posix_substitutes) / sizeof(pcre_uchar *))  #define POSIX_SUBSIZE (sizeof(posix_substitutes) / sizeof(pcre_uchar *))
# Line 2973  switch(c) Line 2984  switch(c)
2984    case OP_CLASS:    case OP_CLASS:
2985  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8  #if defined SUPPORT_UTF || !defined COMPILE_PCRE8
2986    case OP_XCLASS:    case OP_XCLASS:
   
2987    if (c == OP_XCLASS)    if (c == OP_XCLASS)
2988      end = code + GET(code, 0) - 1;      end = code + GET(code, 0) - 1;
2989    else    else
# Line 4830  for (;; ptr++) Line 4840  for (;; ptr++)
4840            posix_class = 0;            posix_class = 0;
4841    
4842          /* When PCRE_UCP is set, some of the POSIX classes are converted to          /* When PCRE_UCP is set, some of the POSIX classes are converted to
4843          different escape sequences that use Unicode properties. */          different escape sequences that use Unicode properties \p or \P. Others
4844            that are not available via \p or \P generate XCL_PROP/XCL_NOTPROP
4845            directly. */
4846    
4847  #ifdef SUPPORT_UCP  #ifdef SUPPORT_UCP
4848          if ((options & PCRE_UCP) != 0)          if ((options & PCRE_UCP) != 0)
4849            {            {
4850              unsigned int ptype = 0;
4851            int pc = posix_class + ((local_negate)? POSIX_SUBSIZE/2 : 0);            int pc = posix_class + ((local_negate)? POSIX_SUBSIZE/2 : 0);
4852    
4853              /* The posix_substitutes table specifies which POSIX classes can be
4854              converted to \p or \P items. */
4855    
4856            if (posix_substitutes[pc] != NULL)            if (posix_substitutes[pc] != NULL)
4857              {              {
4858              nestptr = tempptr + 1;              nestptr = tempptr + 1;
4859              ptr = posix_substitutes[pc] - 1;              ptr = posix_substitutes[pc] - 1;
4860              continue;              continue;
4861              }              }
4862    
4863              /* There are three other classes that generate special property calls
4864              that are recognized only in an XCLASS. */
4865    
4866              else switch(posix_class)
4867                {
4868                case PC_GRAPH:
4869                ptype = PT_PXGRAPH;
4870                /* Fall through */
4871                case PC_PRINT:
4872                if (ptype == 0) ptype = PT_PXPRINT;
4873                /* Fall through */
4874                case PC_PUNCT:
4875                if (ptype == 0) ptype = PT_PXPUNCT;
4876                *class_uchardata++ = local_negate? XCL_NOTPROP : XCL_PROP;
4877                *class_uchardata++ = ptype;
4878                *class_uchardata++ = 0;
4879                ptr = tempptr + 1;
4880                continue;
4881    
4882                /* For all other POSIX classes, no special action is taken in UCP
4883                mode. Fall through to the non_UCP case. */
4884    
4885                default:
4886                break;
4887                }
4888            }            }
4889  #endif  #endif
4890          /* In the non-UCP case, we build the bit map for the POSIX class in a          /* In the non-UCP case, or when UCP makes no difference, we build the
4891          chunk of local store because we may be adding and subtracting from it,          bit map for the POSIX class in a chunk of local store because we may be
4892          and we don't want to subtract bits that may be in the main map already.          adding and subtracting from it, and we don't want to subtract bits that
4893          At the end we or the result into the bit map that is being built. */          may be in the main map already. At the end we or the result into the
4894            bit map that is being built. */
4895    
4896          posix_class *= 3;          posix_class *= 3;
4897    
# Line 6136  for (;; ptr++) Line 6180  for (;; ptr++)
6180    
6181        len = (int)(code - tempcode);        len = (int)(code - tempcode);
6182        if (len > 0)        if (len > 0)
6183          {          {
6184          unsigned int repcode = *tempcode;          unsigned int repcode = *tempcode;
6185    
6186          /* There is a table for possessifying opcodes, all of which are less          /* There is a table for possessifying opcodes, all of which are less
6187          than OP_CALLOUT. A zero entry means there is no possessified version.          than OP_CALLOUT. A zero entry means there is no possessified version.
6188          */          */
6189    
6190          if (repcode < OP_CALLOUT && opcode_possessify[repcode] > 0)          if (repcode < OP_CALLOUT && opcode_possessify[repcode] > 0)
6191            *tempcode = opcode_possessify[repcode];            *tempcode = opcode_possessify[repcode];
6192    
6193          /* For opcode without a special possessified version, wrap the item in          /* For opcode without a special possessified version, wrap the item in
6194          ONCE brackets. Because we are moving code along, we must ensure that any          ONCE brackets. Because we are moving code along, we must ensure that any
6195          pending recursive references are updated. */          pending recursive references are updated. */
6196    
6197          else          else
6198            {            {
6199            *code = OP_END;            *code = OP_END;
# Line 6162  for (;; ptr++) Line 6206  for (;; ptr++)
6206            PUTINC(code, 0, len);            PUTINC(code, 0, len);
6207            PUT(tempcode, 1, len);            PUT(tempcode, 1, len);
6208            }            }
6209          }          }
6210    
6211  #ifdef NEVER  #ifdef NEVER
6212        if (len > 0) switch (*tempcode)        if (len > 0) switch (*tempcode)

Legend:
Removed from v.1386  
changed lines
  Added in v.1387

  ViewVC Help
Powered by ViewVC 1.1.5