/[pcre]/code/branches/pcre16/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/branches/pcre16/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 755 by ph10, Mon Nov 21 10:41:54 2011 UTC revision 756 by ph10, Mon Nov 21 10:48:42 2011 UTC
# Line 113  small value. Non-zero values in the tabl Line 113  small value. Non-zero values in the tabl
113  the character is to be found. ***NOTE*** If the start of this table is  the character is to be found. ***NOTE*** If the start of this table is
114  modified, the three tables that follow must also be modified. */  modified, the three tables that follow must also be modified. */
115    
116  static const uschar coptable[] = {  static const pcre_uint8 coptable[] = {
117    0,                             /* End                                    */    0,                             /* End                                    */
118    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
119    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
# Line 182  remember the fact that a character could Line 182  remember the fact that a character could
182  the subject is reached. ***NOTE*** If the start of this table is modified, the  the subject is reached. ***NOTE*** If the start of this table is modified, the
183  two tables that follow must also be modified. */  two tables that follow must also be modified. */
184    
185  static const uschar poptable[] = {  static const pcre_uint8 poptable[] = {
186    0,                             /* End                                    */    0,                             /* End                                    */
187    0, 0, 0, 1, 1,                 /* \A, \G, \K, \B, \b                     */    0, 0, 0, 1, 1,                 /* \A, \G, \K, \B, \b                     */
188    1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */    1, 1, 1, 1, 1, 1,              /* \D, \d, \S, \s, \W, \w                 */
# Line 249  static const uschar poptable[] = { Line 249  static const uschar poptable[] = {
249  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
250  and \w */  and \w */
251    
252  static const uschar toptable1[] = {  static const pcre_uint8 toptable1[] = {
253    0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
254    ctype_digit, ctype_digit,    ctype_digit, ctype_digit,
255    ctype_space, ctype_space,    ctype_space, ctype_space,
# Line 257  static const uschar toptable1[] = { Line 257  static const uschar toptable1[] = {
257    0, 0                            /* OP_ANY, OP_ALLANY */    0, 0                            /* OP_ANY, OP_ALLANY */
258  };  };
259    
260  static const uschar toptable2[] = {  static const pcre_uint8 toptable2[] = {
261    0, 0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
262    ctype_digit, 0,    ctype_digit, 0,
263    ctype_space, 0,    ctype_space, 0,
# Line 386  for the current character, one for the f Line 386  for the current character, one for the f
386  static int  static int
387  internal_dfa_exec(  internal_dfa_exec(
388    dfa_match_data *md,    dfa_match_data *md,
389    const uschar *this_start_code,    const pcre_uchar *this_start_code,
390    const uschar *current_subject,    const pcre_uchar *current_subject,
391    int start_offset,    int start_offset,
392    int *offsets,    int *offsets,
393    int offsetcount,    int offsetcount,
# Line 398  internal_dfa_exec( Line 398  internal_dfa_exec(
398  stateblock *active_states, *new_states, *temp_states;  stateblock *active_states, *new_states, *temp_states;
399  stateblock *next_active_state, *next_new_state;  stateblock *next_active_state, *next_new_state;
400    
401  const uschar *ctypes, *lcc, *fcc;  const pcre_uint8 *ctypes, *lcc, *fcc;
402  const uschar *ptr;  const pcre_uchar *ptr;
403  const uschar *end_code, *first_op;  const pcre_uchar *end_code, *first_op;
404    
405  dfa_recursion_info new_recursive;  dfa_recursion_info new_recursive;
406    
# Line 409  int active_count, new_count, match_count Line 409  int active_count, new_count, match_count
409  /* Some fields in the md block are frequently referenced, so we load them into  /* Some fields in the md block are frequently referenced, so we load them into
410  independent variables in the hope that this will perform better. */  independent variables in the hope that this will perform better. */
411    
412  const uschar *start_subject = md->start_subject;  const pcre_uchar *start_subject = md->start_subject;
413  const uschar *end_subject = md->end_subject;  const pcre_uchar *end_subject = md->end_subject;
414  const uschar *start_code = md->start_code;  const pcre_uchar *start_code = md->start_code;
415    
416  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
417  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;  BOOL utf8 = (md->poptions & PCRE_UTF8) != 0;
# Line 583  for (;;) Line 583  for (;;)
583    
584  #ifdef PCRE_DEBUG  #ifdef PCRE_DEBUG
585    printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);    printf("%.*sNext character: rest of subject = \"", rlevel*2-2, SP);
586    pchars((uschar *)ptr, strlen((char *)ptr), stdout);    pchars((pcre_uchar *)ptr, strlen((char *)ptr), stdout);
587    printf("\"\n");    printf("\"\n");
588    
589    printf("%.*sActive states: ", rlevel*2-2, SP);    printf("%.*sActive states: ", rlevel*2-2, SP);
# Line 624  for (;;) Line 624  for (;;)
624      {      {
625      stateblock *current_state = active_states + i;      stateblock *current_state = active_states + i;
626      BOOL caseless = FALSE;      BOOL caseless = FALSE;
627      const uschar *code;      const pcre_uchar *code;
628      int state_offset = current_state->offset;      int state_offset = current_state->offset;
629      int count, codevalue, rrc;      int count, codevalue, rrc;
630    
# Line 956  for (;;) Line 956  for (;;)
956    
957          if (ptr > start_subject)          if (ptr > start_subject)
958            {            {
959            const uschar *temp = ptr - 1;            const pcre_uchar *temp = ptr - 1;
960            if (temp < md->start_used_ptr) md->start_used_ptr = temp;            if (temp < md->start_used_ptr) md->start_used_ptr = temp;
961  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
962            if (utf8) BACKCHAR(temp);            if (utf8) BACKCHAR(temp);
# Line 1281  for (;;) Line 1281  for (;;)
1281        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }        if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1282        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1283          {          {
1284          const uschar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1285          int ncount = 0;          int ncount = 0;
1286          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)          if (count > 0 && codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSPLUS)
1287            {            {
# Line 1537  for (;;) Line 1537  for (;;)
1537        ADD_ACTIVE(state_offset + 2, 0);        ADD_ACTIVE(state_offset + 2, 0);
1538        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1539          {          {
1540          const uschar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1541          int ncount = 0;          int ncount = 0;
1542          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSSTAR ||
1543              codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)              codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSQUERY)
# Line 1804  for (;;) Line 1804  for (;;)
1804        count = current_state->count;  /* Number already matched */        count = current_state->count;  /* Number already matched */
1805        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
1806          {          {
1807          const uschar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
1808          int ncount = 0;          int ncount = 0;
1809          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)          if (codevalue == OP_EXTUNI_EXTRA + OP_TYPEPOSUPTO)
1810            {            {
# Line 2023  for (;;) Line 2023  for (;;)
2023        case OP_EXTUNI:        case OP_EXTUNI:
2024        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)        if (clen > 0 && UCD_CATEGORY(c) != ucp_M)
2025          {          {
2026          const uschar *nptr = ptr + clen;          const pcre_uchar *nptr = ptr + clen;
2027          int ncount = 0;          int ncount = 0;
2028          while (nptr < end_subject)          while (nptr < end_subject)
2029            {            {
# Line 2418  for (;;) Line 2418  for (;;)
2418          {          {
2419          BOOL isinclass = FALSE;          BOOL isinclass = FALSE;
2420          int next_state_offset;          int next_state_offset;
2421          const uschar *ecode;          const pcre_uchar *ecode;
2422    
2423          /* For a simple class, there is always just a 32-byte table, and we          /* For a simple class, there is always just a 32-byte table, and we
2424          can set isinclass from it. */          can set isinclass from it. */
# Line 2510  for (;;) Line 2510  for (;;)
2510          int rc;          int rc;
2511          int local_offsets[2];          int local_offsets[2];
2512          int local_workspace[1000];          int local_workspace[1000];
2513          const uschar *endasscode = code + GET(code, 1);          const pcre_uchar *endasscode = code + GET(code, 1);
2514    
2515          while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);          while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2516    
# Line 2599  for (;;) Line 2599  for (;;)
2599          else          else
2600            {            {
2601            int rc;            int rc;
2602            const uschar *asscode = code + LINK_SIZE + 1;            const pcre_uchar *asscode = code + LINK_SIZE + 1;
2603            const uschar *endasscode = asscode + GET(asscode, 1);            const pcre_uchar *endasscode = asscode + GET(asscode, 1);
2604    
2605            while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);            while (*endasscode == OP_ALT) endasscode += GET(endasscode, 1);
2606    
# Line 2631  for (;;) Line 2631  for (;;)
2631          dfa_recursion_info *ri;          dfa_recursion_info *ri;
2632          int local_offsets[1000];          int local_offsets[1000];
2633          int local_workspace[1000];          int local_workspace[1000];
2634          const uschar *callpat = start_code + GET(code, 1);          const pcre_uchar *callpat = start_code + GET(code, 1);
2635          int recno = (callpat == md->start_code)? 0 :          int recno = (callpat == md->start_code)? 0 :
2636            GET2(callpat, 1 + LINK_SIZE);            GET2(callpat, 1 + LINK_SIZE);
2637          int rc;          int rc;
# Line 2682  for (;;) Line 2682  for (;;)
2682            {            {
2683            for (rc = rc*2 - 2; rc >= 0; rc -= 2)            for (rc = rc*2 - 2; rc >= 0; rc -= 2)
2684              {              {
2685              const uschar *p = start_subject + local_offsets[rc];              const pcre_uchar *p = start_subject + local_offsets[rc];
2686              const uschar *pp = start_subject + local_offsets[rc+1];              const pcre_uchar *pp = start_subject + local_offsets[rc+1];
2687              int charcount = local_offsets[rc+1] - local_offsets[rc];              int charcount = local_offsets[rc+1] - local_offsets[rc];
2688              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
2689              if (charcount > 0)              if (charcount > 0)
# Line 2708  for (;;) Line 2708  for (;;)
2708        case OP_BRAPOSZERO:        case OP_BRAPOSZERO:
2709          {          {
2710          int charcount, matched_count;          int charcount, matched_count;
2711          const uschar *local_ptr = ptr;          const pcre_uchar *local_ptr = ptr;
2712          BOOL allow_zero;          BOOL allow_zero;
2713    
2714          if (codevalue == OP_BRAPOSZERO)          if (codevalue == OP_BRAPOSZERO)
# Line 2758  for (;;) Line 2758  for (;;)
2758    
2759          if (matched_count > 0 || allow_zero)          if (matched_count > 0 || allow_zero)
2760            {            {
2761            const uschar *end_subpattern = code;            const pcre_uchar *end_subpattern = code;
2762            int next_state_offset;            int next_state_offset;
2763    
2764            do { end_subpattern += GET(end_subpattern, 1); }            do { end_subpattern += GET(end_subpattern, 1); }
# Line 2779  for (;;) Line 2779  for (;;)
2779              }              }
2780            else            else
2781              {              {
2782              const uschar *p = ptr;              const pcre_uchar *p = ptr;
2783              const uschar *pp = local_ptr;              const pcre_uchar *pp = local_ptr;
2784              charcount = pp - p;              charcount = pp - p;
2785              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
2786              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
# Line 2809  for (;;) Line 2809  for (;;)
2809    
2810          if (rc >= 0)          if (rc >= 0)
2811            {            {
2812            const uschar *end_subpattern = code;            const pcre_uchar *end_subpattern = code;
2813            int charcount = local_offsets[1] - local_offsets[0];            int charcount = local_offsets[1] - local_offsets[0];
2814            int next_state_offset, repeat_state_offset;            int next_state_offset, repeat_state_offset;
2815    
# Line 2862  for (;;) Line 2862  for (;;)
2862              }              }
2863            else            else
2864              {              {
2865              const uschar *p = start_subject + local_offsets[0];              const pcre_uchar *p = start_subject + local_offsets[0];
2866              const uschar *pp = start_subject + local_offsets[1];              const pcre_uchar *pp = start_subject + local_offsets[1];
2867              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;              while (p < pp) if ((*p++ & 0xc0) == 0x80) charcount--;
2868              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));              ADD_NEW_DATA(-next_state_offset, 0, (charcount - 1));
2869              if (repeat_state_offset >= 0)              if (repeat_state_offset >= 0)
# Line 3005  real_pcre *re = (real_pcre *)argument_re Line 3005  real_pcre *re = (real_pcre *)argument_re
3005  dfa_match_data match_block;  dfa_match_data match_block;
3006  dfa_match_data *md = &match_block;  dfa_match_data *md = &match_block;
3007  BOOL utf8, anchored, startline, firstline;  BOOL utf8, anchored, startline, firstline;
3008  const uschar *current_subject, *end_subject, *lcc;  const pcre_uchar *current_subject, *end_subject;
3009    const pcre_uint8 *lcc;
3010    
3011  pcre_study_data internal_study;  pcre_study_data internal_study;
3012  const pcre_study_data *study = NULL;  const pcre_study_data *study = NULL;
3013  real_pcre internal_re;  real_pcre internal_re;
3014    
3015  const uschar *req_byte_ptr;  const pcre_uint8 *req_byte_ptr;
3016  const uschar *start_bits = NULL;  const pcre_uint8 *start_bits = NULL;
3017  BOOL first_byte_caseless = FALSE;  BOOL first_byte_caseless = FALSE;
3018  BOOL req_byte_caseless = FALSE;  BOOL req_byte_caseless = FALSE;
3019  int first_byte = -1;  int first_byte = -1;
# Line 3080  anchored = (options & (PCRE_ANCHORED|PCR Line 3081  anchored = (options & (PCRE_ANCHORED|PCR
3081    
3082  /* The remaining fixed data for passing around. */  /* The remaining fixed data for passing around. */
3083    
3084  md->start_code = (const uschar *)argument_re +  md->start_code = (const pcre_uchar *)argument_re +
3085      re->name_table_offset + re->name_count * re->name_entry_size;      re->name_table_offset + re->name_count * re->name_entry_size;
3086  md->start_subject = (const unsigned char *)subject;  md->start_subject = (const unsigned char *)subject;
3087  md->end_subject = end_subject;  md->end_subject = end_subject;
# Line 3147  back the character offset. */ Line 3148  back the character offset. */
3148  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)  if (utf8 && (options & PCRE_NO_UTF8_CHECK) == 0)
3149    {    {
3150    int erroroffset;    int erroroffset;
3151    int errorcode = _pcre_valid_utf8((uschar *)subject, length, &erroroffset);    int errorcode = _pcre_valid_utf8((pcre_uchar *)subject, length, &erroroffset);
3152    if (errorcode != 0)    if (errorcode != 0)
3153      {      {
3154      if (offsetcount >= 2)      if (offsetcount >= 2)
# Line 3159  if (utf8 && (options & PCRE_NO_UTF8_CHEC Line 3160  if (utf8 && (options & PCRE_NO_UTF8_CHEC
3160        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;        PCRE_ERROR_SHORTUTF8 : PCRE_ERROR_BADUTF8;
3161      }      }
3162    if (start_offset > 0 && start_offset < length &&    if (start_offset > 0 && start_offset < length &&
3163          (((USPTR)subject)[start_offset] & 0xc0) == 0x80)          (((PCRE_PUCHAR)subject)[start_offset] & 0xc0) == 0x80)
3164      return PCRE_ERROR_BADUTF8_OFFSET;      return PCRE_ERROR_BADUTF8_OFFSET;
3165    }    }
3166  #endif  #endif
# Line 3219  for (;;) Line 3220  for (;;)
3220    
3221    if ((options & PCRE_DFA_RESTART) == 0)    if ((options & PCRE_DFA_RESTART) == 0)
3222      {      {
3223      const uschar *save_end_subject = end_subject;      const pcre_uchar *save_end_subject = end_subject;
3224    
3225      /* If firstline is TRUE, the start of the match is constrained to the first      /* If firstline is TRUE, the start of the match is constrained to the first
3226      line of a multiline string. Implement this by temporarily adjusting      line of a multiline string. Implement this by temporarily adjusting
# Line 3228  for (;;) Line 3229  for (;;)
3229    
3230      if (firstline)      if (firstline)
3231        {        {
3232        USPTR t = current_subject;        PCRE_PUCHAR t = current_subject;
3233  #ifdef SUPPORT_UTF8  #ifdef SUPPORT_UTF8
3234        if (utf8)        if (utf8)
3235          {          {
# Line 3357  for (;;) Line 3358  for (;;)
3358    
3359        if (req_byte >= 0 && end_subject - current_subject < REQ_BYTE_MAX)        if (req_byte >= 0 && end_subject - current_subject < REQ_BYTE_MAX)
3360          {          {
3361          register const uschar *p = current_subject + ((first_byte >= 0)? 1 : 0);          register const pcre_uchar *p = current_subject + ((first_byte >= 0)? 1 : 0);
3362    
3363          /* We don't need to repeat the search if we haven't yet reached the          /* We don't need to repeat the search if we haven't yet reached the
3364          place we found it at last time. */          place we found it at last time. */

Legend:
Removed from v.755  
changed lines
  Added in v.756

  ViewVC Help
Powered by ViewVC 1.1.5