/[pcre]/code/branches/pcre16/pcre_compile.c
ViewVC logotype

Diff of /code/branches/pcre16/pcre_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 782 by zherczeg, Sat Dec 3 23:58:37 2011 UTC revision 785 by zherczeg, Mon Dec 5 20:12:24 2011 UTC
# Line 102  overrun before it actually does run off Line 102  overrun before it actually does run off
102  #define REQ_CASELESS   0x10000000l      /* Indicates caselessness */  #define REQ_CASELESS   0x10000000l      /* Indicates caselessness */
103  #define REQ_VARY       0x20000000l      /* Reqchar followed non-literal item */  #define REQ_VARY       0x20000000l      /* Reqchar followed non-literal item */
104    
105    /* Repeated character flags. */
106    
107    #define UTF_LENGTH     0x10000000l      /* The char contains its length. */
108    
109  /* Table for handling escaped characters in the range '0'-'z'. Positive returns  /* Table for handling escaped characters in the range '0'-'z'. Positive returns
110  are simple data values; negative values are for special things like \d and so  are simple data values; negative values are for special things like \d and so
111  on. Zero means further processing is needed (for things like \x), or the escape  on. Zero means further processing is needed (for things like \x), or the escape
# Line 2896  static BOOL Line 2900  static BOOL
2900  check_auto_possessive(const pcre_uchar *previous, BOOL utf,  check_auto_possessive(const pcre_uchar *previous, BOOL utf,
2901    const pcre_uchar *ptr, int options, compile_data *cd)    const pcre_uchar *ptr, int options, compile_data *cd)
2902  {  {
2903  int c, next;  pcre_int32 c, next;
2904  int op_code = *previous++;  int op_code = *previous++;
2905    
2906  /* Skip whitespace and comments in extended mode */  /* Skip whitespace and comments in extended mode */
# Line 2932  if (*ptr == CHAR_BACKSLASH) Line 2936  if (*ptr == CHAR_BACKSLASH)
2936    if (temperrorcode != 0) return FALSE;    if (temperrorcode != 0) return FALSE;
2937    ptr++;    /* Point after the escape sequence */    ptr++;    /* Point after the escape sequence */
2938    }    }
2939    else if (!MAX_255(*ptr) || (cd->ctypes[*ptr] & ctype_meta) == 0)
 else if ((cd->ctypes[*ptr] & ctype_meta) == 0)  
2940    {    {
2941  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
2942    if (utf) { GETCHARINC(next, ptr); } else    if (utf) { GETCHARINC(next, ptr); } else
2943  #endif  #endif
2944    next = *ptr++;    next = *ptr++;
2945    }    }
   
2946  else return FALSE;  else return FALSE;
2947    
2948  /* Skip whitespace and comments in extended mode */  /* Skip whitespace and comments in extended mode */
# Line 4603  for (;; ptr++) Line 4605  for (;; ptr++)
4605    
4606        /* Deal with UTF characters that take up more than one character. It's        /* Deal with UTF characters that take up more than one character. It's
4607        easier to write this out separately than try to macrify it. Use c to        easier to write this out separately than try to macrify it. Use c to
4608        hold the length of the character in bytes, plus 0x80 to flag that it's a        hold the length of the character in bytes, plus UTF_LENGTH to flag that
4609        length rather than a small character. */        it's a length rather than a small character. */
4610    
4611  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4612    #ifdef COMPILE_PCRE8
4613        if (utf && (code[-1] & 0x80) != 0)        if (utf && (code[-1] & 0x80) != 0)
4614    #endif /* COMPILE_PCRE8 */
4615    #ifdef COMPILE_PCRE16
4616          if (utf && (code[-1] & 0xfc00) == 0xdc00)
4617    #endif /* COMPILE_PCRE8 */
4618          {          {
4619          pcre_uchar *lastchar = code - 1;          pcre_uchar *lastchar = code - 1;
4620          BACKCHAR(lastchar);          BACKCHAR(lastchar);
4621          c = code - lastchar;            /* Length of UTF-8 character */          c = code - lastchar;            /* Length of UTF-8 character */
4622          memcpy(utf_chars, lastchar, IN_UCHARS(c)); /* Save the char */          memcpy(utf_chars, lastchar, IN_UCHARS(c)); /* Save the char */
4623          c |= 0x80;                      /* Flag c as a length */          c |= UTF_LENGTH;                /* Flag c as a length */
4624          }          }
4625        else        else
4626  #endif  #endif /* SUPPORT_UTF */
4627    
4628        /* Handle the case of a single charater - either with no UTF support, or        /* Handle the case of a single charater - either with no UTF support, or
4629        with UTF disabled, or for a single character UTF character. */        with UTF disabled, or for a single character UTF character. */
# Line 4758  for (;; ptr++) Line 4765  for (;; ptr++)
4765          we have to insert the character for the previous code. For a repeated          we have to insert the character for the previous code. For a repeated
4766          Unicode property match, there are two extra bytes that define the          Unicode property match, there are two extra bytes that define the
4767          required property. In UTF-8 mode, long characters have their length in          required property. In UTF-8 mode, long characters have their length in
4768          c, with the 0x80 bit as a flag. */          c, with the UTF_LENGTH bit as a flag. */
4769    
4770          if (repeat_max < 0)          if (repeat_max < 0)
4771            {            {
4772  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4773            if (utf && c >= 128)            if (utf && (c & UTF_LENGTH) != 0)
4774              {              {
4775              memcpy(code, utf_chars, c & 7);              memcpy(code, utf_chars, IN_UCHARS(c & 7));
4776              code += c & 7;              code += c & 7;
4777              }              }
4778            else            else
# Line 4787  for (;; ptr++) Line 4794  for (;; ptr++)
4794    
4795          else if (repeat_max != repeat_min)          else if (repeat_max != repeat_min)
4796            {            {
4797  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4798            if (utf && c >= 128)            if (utf && (c & UTF_LENGTH) != 0)
4799              {              {
4800              memcpy(code, utf_chars, c & 7);              memcpy(code, utf_chars, IN_UCHARS(c & 7));
4801              code += c & 7;              code += c & 7;
4802              }              }
4803            else            else
# Line 4817  for (;; ptr++) Line 4824  for (;; ptr++)
4824    
4825        /* The character or character type itself comes last in all cases. */        /* The character or character type itself comes last in all cases. */
4826    
4827  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF
4828        if (utf && c >= 128)        if (utf && (c & UTF_LENGTH) != 0)
4829          {          {
4830          memcpy(code, utf_chars, c & 7);          memcpy(code, utf_chars, IN_UCHARS(c & 7));
4831          code += c & 7;          code += c & 7;
4832          }          }
4833        else        else
# Line 6661  for (;; ptr++) Line 6668  for (;; ptr++)
6668    
6669  #ifdef SUPPORT_UTF  #ifdef SUPPORT_UTF
6670      if (utf && HAS_EXTRALEN(c))      if (utf && HAS_EXTRALEN(c))
6671        {        ACROSSCHAR(TRUE, ptr[1], mcbuffer[mclength++] = *(++ptr));
       INTERNALCHAR(TRUE, ptr[1], mcbuffer[mclength++] = *(++ptr));  
       }  
6672  #endif  #endif
6673    
6674      /* At this point we have the character's bytes in mcbuffer, and the length      /* At this point we have the character's bytes in mcbuffer, and the length
# Line 7789  if ((re->options & PCRE_ANCHORED) == 0) Line 7794  if ((re->options & PCRE_ANCHORED) == 0)
7794        re->first_char = firstchar & 0xffff;        re->first_char = firstchar & 0xffff;
7795  #endif  #endif
7796  #endif  #endif
7797        if ((firstchar & REQ_CASELESS) != 0 && MAX_255(re->first_char)        if ((firstchar & REQ_CASELESS) != 0)
7798          && cd->fcc[re->first_char] != re->first_char)          {
7799          re->flags |= PCRE_FCH_CASELESS;  #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
7800            /* We ignore non-ASCII first chars in 8 bit mode. */
7801            if (utf)
7802              {
7803              if (re->first_char < 128)
7804                {
7805                if (cd->fcc[re->first_char] != re->first_char)
7806                  re->flags |= PCRE_FCH_CASELESS;
7807                }
7808              else if ((options & PCRE_UCP) != 0
7809                  && UCD_OTHERCASE(re->first_char) != re->first_char)
7810                re->flags |= PCRE_FCH_CASELESS;
7811              }
7812            else
7813    #endif
7814            if (MAX_255(re->first_char)
7815                && cd->fcc[re->first_char] != re->first_char)
7816              re->flags |= PCRE_FCH_CASELESS;
7817            }
7818    
7819        re->flags |= PCRE_FIRSTSET;        re->flags |= PCRE_FIRSTSET;
7820        }        }
# Line 7814  if (reqchar >= 0 && Line 7837  if (reqchar >= 0 &&
7837    re->req_char = reqchar & 0xffff;    re->req_char = reqchar & 0xffff;
7838  #endif  #endif
7839  #endif  #endif
7840    if ((reqchar & REQ_CASELESS) != 0 && MAX_255(re->req_char)    if ((reqchar & REQ_CASELESS) != 0)
7841      && cd->fcc[re->req_char] != re->req_char)      {
7842      re->flags |= PCRE_RCH_CASELESS;  #if defined SUPPORT_UCP && !(defined COMPILE_PCRE8)
7843        /* We ignore non-ASCII first chars in 8 bit mode. */
7844        if (utf)
7845          {
7846          if (re->first_char < 128)
7847            {
7848            if (cd->fcc[re->first_char] != re->first_char)
7849              re->flags |= PCRE_RCH_CASELESS;
7850            }
7851          else if ((options & PCRE_UCP) != 0
7852              && UCD_OTHERCASE(re->first_char) != re->first_char)
7853            re->flags |= PCRE_RCH_CASELESS;
7854          }
7855        else
7856    #endif
7857        if (MAX_255(re->req_char) && cd->fcc[re->req_char] != re->req_char)
7858          re->flags |= PCRE_RCH_CASELESS;
7859        }
7860    
7861    re->flags |= PCRE_REQCHSET;    re->flags |= PCRE_REQCHSET;
7862    }    }

Legend:
Removed from v.782  
changed lines
  Added in v.785

  ViewVC Help
Powered by ViewVC 1.1.5