/[pcre]/code/trunk/pcre_jit_compile.c
ViewVC logotype

Diff of /code/trunk/pcre_jit_compile.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 1358 by zherczeg, Thu Aug 29 13:40:47 2013 UTC revision 1367 by zherczeg, Mon Oct 7 07:41:44 2013 UTC
# Line 371  typedef struct compiler_common { Line 371  typedef struct compiler_common {
371    sljit_sw ctypes;    sljit_sw ctypes;
372    int digits[2 + MAX_RANGE_SIZE];    int digits[2 + MAX_RANGE_SIZE];
373    /* Named capturing brackets. */    /* Named capturing brackets. */
374    sljit_uw name_table;    pcre_uchar *name_table;
375    sljit_sw name_count;    sljit_sw name_count;
376    sljit_sw name_entry_size;    sljit_sw name_entry_size;
377    
# Line 614  switch(*cc) Line 614  switch(*cc)
614    case OP_SCBRAPOS:    case OP_SCBRAPOS:
615    case OP_SCOND:    case OP_SCOND:
616    case OP_CREF:    case OP_CREF:
617    case OP_NCREF:    case OP_DNCREF:
618    case OP_RREF:    case OP_RREF:
619    case OP_NRREF:    case OP_DNRREF:
620    case OP_DEF:    case OP_DEF:
621    case OP_BRAZERO:    case OP_BRAZERO:
622    case OP_BRAMINZERO:    case OP_BRAMINZERO:
# Line 736  switch(*cc) Line 736  switch(*cc)
736    
737  static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)  static BOOL check_opcode_types(compiler_common *common, pcre_uchar *cc, pcre_uchar *ccend)
738  {  {
739  pcre_uchar *name;  pcre_uchar *slot;
 pcre_uchar *name2;  
 unsigned int cbra_index;  
740  int i;  int i;
741    
742  /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */  /* Calculate important variables (like stack size) and checks whether all opcodes are supported. */
# Line 778  while (cc < ccend) Line 776  while (cc < ccend)
776      cc += 1 + IMM2_SIZE;      cc += 1 + IMM2_SIZE;
777      break;      break;
778    
779      case OP_NCREF:      case OP_DNCREF:
780      cbra_index = GET2(cc, 1);      i = GET2(cc, 1 + IMM2_SIZE);
781      name = (pcre_uchar *)common->name_table;      slot = common->name_table + GET2(cc, 1) * common->name_entry_size;
782      name2 = name;      while (i-- > 0)
783      for (i = 0; i < common->name_count; i++)        {
784        {        common->optimized_cbracket[GET2(slot, 0)] = 0;
785        if (GET2(name, 0) == cbra_index) break;        slot += common->name_entry_size;
       name += common->name_entry_size;  
       }  
     SLJIT_ASSERT(i != common->name_count);  
   
     for (i = 0; i < common->name_count; i++)  
       {  
       if (STRCMP_UC_UC(name2 + IMM2_SIZE, name + IMM2_SIZE) == 0)  
         common->optimized_cbracket[GET2(name2, 0)] = 0;  
       name2 += common->name_entry_size;  
786        }        }
787      cc += 1 + IMM2_SIZE;      cc += 1 + 2 * IMM2_SIZE;
788      break;      break;
789    
790      case OP_RECURSE:      case OP_RECURSE:
# Line 4317  while (*cc != XCL_END) Line 4306  while (*cc != XCL_END)
4306    
4307        case PT_SPACE:        case PT_SPACE:
4308        case PT_PXSPACE:        case PT_PXSPACE:
       if (*cc == PT_SPACE)  
         {  
         OP1(SLJIT_MOV, TMP2, 0, SLJIT_IMM, 0);  
         jump = CMP(SLJIT_C_EQUAL, TMP1, 0, SLJIT_IMM, 11 - charoffset);  
         }  
4309        SET_CHAR_OFFSET(9);        SET_CHAR_OFFSET(9);
4310        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, TMP1, 0, SLJIT_IMM, 13 - 9);
4311        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);        OP_FLAGS(SLJIT_MOV, TMP2, 0, SLJIT_UNUSED, 0, SLJIT_C_LESS_EQUAL);
       if (*cc == PT_SPACE)  
         JUMPHERE(jump);  
4312    
4313        SET_TYPE_OFFSET(ucp_Zl);        SET_TYPE_OFFSET(ucp_Zl);
4314        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);        OP2(SLJIT_SUB | SLJIT_SET_U, SLJIT_UNUSED, 0, typereg, 0, SLJIT_IMM, ucp_Zs - ucp_Zl);
# Line 5902  common->accept = save_accept; Line 5884  common->accept = save_accept;
5884  return cc + 1 + LINK_SIZE;  return cc + 1 + LINK_SIZE;
5885  }  }
5886    
 static sljit_sw SLJIT_CALL do_searchovector(sljit_uw refno, sljit_sw* locals, pcre_uchar *name_table)  
 {  
 int condition = FALSE;  
 pcre_uchar *slotA = name_table;  
 pcre_uchar *slotB;  
 sljit_sw name_count = locals[LOCALS0 / sizeof(sljit_sw)];  
 sljit_sw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];  
 sljit_sw no_capture;  
 int i;  
   
 locals += refno & 0xff;  
 refno >>= 8;  
 no_capture = locals[1];  
   
 for (i = 0; i < name_count; i++)  
   {  
   if (GET2(slotA, 0) == refno) break;  
   slotA += name_entry_size;  
   }  
   
 if (i < name_count)  
   {  
   /* Found a name for the number - there can be only one; duplicate names  
   for different numbers are allowed, but not vice versa. First scan down  
   for duplicates. */  
   
   slotB = slotA;  
   while (slotB > name_table)  
     {  
     slotB -= name_entry_size;  
     if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)  
       {  
       condition = locals[GET2(slotB, 0) << 1] != no_capture;  
       if (condition) break;  
       }  
     else break;  
     }  
   
   /* Scan up for duplicates */  
   if (!condition)  
     {  
     slotB = slotA;  
     for (i++; i < name_count; i++)  
       {  
       slotB += name_entry_size;  
       if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)  
         {  
         condition = locals[GET2(slotB, 0) << 1] != no_capture;  
         if (condition) break;  
         }  
       else break;  
       }  
     }  
   }  
 return condition;  
 }  
   
 static sljit_sw SLJIT_CALL do_searchgroups(sljit_uw recno, sljit_uw* locals, pcre_uchar *name_table)  
 {  
 int condition = FALSE;  
 pcre_uchar *slotA = name_table;  
 pcre_uchar *slotB;  
 sljit_uw name_count = locals[LOCALS0 / sizeof(sljit_sw)];  
 sljit_uw name_entry_size = locals[LOCALS1 / sizeof(sljit_sw)];  
 sljit_uw group_num = locals[POSSESSIVE0 / sizeof(sljit_sw)];  
 sljit_uw i;  
   
 for (i = 0; i < name_count; i++)  
   {  
   if (GET2(slotA, 0) == recno) break;  
   slotA += name_entry_size;  
   }  
   
 if (i < name_count)  
   {  
   /* Found a name for the number - there can be only one; duplicate  
   names for different numbers are allowed, but not vice versa. First  
   scan down for duplicates. */  
   
   slotB = slotA;  
   while (slotB > name_table)  
     {  
     slotB -= name_entry_size;  
     if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)  
       {  
       condition = GET2(slotB, 0) == group_num;  
       if (condition) break;  
       }  
     else break;  
     }  
   
   /* Scan up for duplicates */  
   if (!condition)  
     {  
     slotB = slotA;  
     for (i++; i < name_count; i++)  
       {  
       slotB += name_entry_size;  
       if (STRCMP_UC_UC(slotA + IMM2_SIZE, slotB + IMM2_SIZE) == 0)  
         {  
         condition = GET2(slotB, 0) == group_num;  
         if (condition) break;  
         }  
       else break;  
       }  
     }  
   }  
 return condition;  
 }  
   
5887  static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)  static SLJIT_INLINE void match_once_common(compiler_common *common, pcre_uchar ket, int framesize, int private_data_ptr, BOOL has_alternatives, BOOL needs_control_head)
5888  {  {
5889  DEFINE_COMPILER;  DEFINE_COMPILER;
# Line 6144  backtrack_common *backtrack; Line 6016  backtrack_common *backtrack;
6016  pcre_uchar opcode;  pcre_uchar opcode;
6017  int private_data_ptr = 0;  int private_data_ptr = 0;
6018  int offset = 0;  int offset = 0;
6019  int stacksize;  int i, stacksize;
6020  int repeat_ptr = 0, repeat_length = 0;  int repeat_ptr = 0, repeat_length = 0;
6021  int repeat_type = 0, repeat_count = 0;  int repeat_type = 0, repeat_count = 0;
6022  pcre_uchar *ccbegin;  pcre_uchar *ccbegin;
6023  pcre_uchar *matchingpath;  pcre_uchar *matchingpath;
6024    pcre_uchar *slot;
6025  pcre_uchar bra = OP_BRA;  pcre_uchar bra = OP_BRA;
6026  pcre_uchar ket;  pcre_uchar ket;
6027  assert_backtrack *assert;  assert_backtrack *assert;
# Line 6198  SLJIT_ASSERT(!((bra == OP_BRAZERO && ket Line 6071  SLJIT_ASSERT(!((bra == OP_BRAZERO && ket
6071  cc += GET(cc, 1);  cc += GET(cc, 1);
6072    
6073  has_alternatives = *cc == OP_ALT;  has_alternatives = *cc == OP_ALT;
6074  if (SLJIT_UNLIKELY(opcode == OP_COND) || SLJIT_UNLIKELY(opcode == OP_SCOND))  if (SLJIT_UNLIKELY(opcode == OP_COND || opcode == OP_SCOND))
6075    {    has_alternatives = (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF) ? FALSE : TRUE;
   has_alternatives = (*matchingpath == OP_RREF) ? FALSE : TRUE;  
   if (*matchingpath == OP_NRREF)  
     {  
     stacksize = GET2(matchingpath, 1);  
     if (common->currententry == NULL || stacksize == RREF_ANY)  
       has_alternatives = FALSE;  
     else if (common->currententry->start == 0)  
       has_alternatives = stacksize != 0;  
     else  
       has_alternatives = stacksize != (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);  
     }  
   }  
6076    
6077  if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))  if (SLJIT_UNLIKELY(opcode == OP_COND) && (*cc == OP_KETRMAX || *cc == OP_KETRMIN))
6078    opcode = OP_SCOND;    opcode = OP_SCOND;
# Line 6448  if (opcode == OP_COND || opcode == OP_SC Line 6309  if (opcode == OP_COND || opcode == OP_SC
6309        CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));        CMP(SLJIT_C_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(matchingpath, 1) << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1)));
6310      matchingpath += 1 + IMM2_SIZE;      matchingpath += 1 + IMM2_SIZE;
6311      }      }
6312    else if (*matchingpath == OP_NCREF)    else if (*matchingpath == OP_DNCREF)
6313      {      {
6314      SLJIT_ASSERT(has_alternatives);      SLJIT_ASSERT(has_alternatives);
     stacksize = GET2(matchingpath, 1);  
     jump = CMP(SLJIT_C_NOT_EQUAL, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(stacksize << 1), SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));  
   
     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);  
     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);  
     OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);  
     OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, (stacksize << 8) | (common->ovector_start / sizeof(sljit_sw)));  
     GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);  
     OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);  
     sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchovector));  
     OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);  
     add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));  
6315    
6316      JUMPHERE(jump);      i = GET2(matchingpath, 1 + IMM2_SIZE);
6317      matchingpath += 1 + IMM2_SIZE;      slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
6318        OP1(SLJIT_MOV, TMP3, 0, STR_PTR, 0);
6319        OP1(SLJIT_MOV, TMP1, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(1));
6320        OP2(SLJIT_SUB | SLJIT_SET_E, TMP2, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
6321        slot += common->name_entry_size;
6322        i--;
6323        while (i-- > 0)
6324          {
6325          OP2(SLJIT_SUB, STR_PTR, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), OVECTOR(GET2(slot, 0) << 1), TMP1, 0);
6326          OP2(SLJIT_OR | SLJIT_SET_E, TMP2, 0, TMP2, 0, STR_PTR, 0);
6327          slot += common->name_entry_size;
6328          }
6329        OP1(SLJIT_MOV, STR_PTR, 0, TMP3, 0);
6330        add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), JUMP(SLJIT_C_ZERO));
6331        matchingpath += 1 + 2 * IMM2_SIZE;
6332      }      }
6333    else if (*matchingpath == OP_RREF || *matchingpath == OP_NRREF)    else if (*matchingpath == OP_RREF || *matchingpath == OP_DNRREF)
6334      {      {
6335      /* Never has other case. */      /* Never has other case. */
6336      BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;      BACKTRACK_AS(bracket_backtrack)->u.condfailed = NULL;
6337        SLJIT_ASSERT(!has_alternatives);
6338    
6339      stacksize = GET2(matchingpath, 1);      if (*matchingpath == OP_RREF)
     if (common->currententry == NULL)  
       stacksize = 0;  
     else if (stacksize == RREF_ANY)  
       stacksize = 1;  
     else if (common->currententry->start == 0)  
       stacksize = stacksize == 0;  
     else  
       stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);  
   
     if (*matchingpath == OP_RREF || stacksize || common->currententry == NULL)  
6340        {        {
6341        SLJIT_ASSERT(!has_alternatives);        stacksize = GET2(matchingpath, 1);
6342          if (common->currententry == NULL)
6343            stacksize = 0;
6344          else if (stacksize == RREF_ANY)
6345            stacksize = 1;
6346          else if (common->currententry->start == 0)
6347            stacksize = stacksize == 0;
6348          else
6349            stacksize = stacksize == (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6350    
6351        if (stacksize != 0)        if (stacksize != 0)
6352          matchingpath += 1 + IMM2_SIZE;          matchingpath += 1 + IMM2_SIZE;
6353          }
6354        else
6355          {
6356          if (common->currententry == NULL || common->currententry->start == 0)
6357            stacksize = 0;
6358        else        else
6359          {          {
6360            stacksize = GET2(matchingpath, 1 + IMM2_SIZE);
6361            slot = common->name_table + GET2(matchingpath, 1) * common->name_entry_size;
6362            i = (int)GET2(common->start, common->currententry->start + 1 + LINK_SIZE);
6363            while (stacksize > 0)
6364              {
6365              if (GET2(slot, 0) == i)
6366                break;
6367              slot += common->name_entry_size;
6368              stacksize--;
6369              }
6370            }
6371    
6372          if (stacksize != 0)
6373            matchingpath += 1 + 2 * IMM2_SIZE;
6374          }
6375    
6376          /* The stacksize == 0 is a common "else" case. */
6377          if (stacksize == 0)
6378            {
6379          if (*cc == OP_ALT)          if (*cc == OP_ALT)
6380            {            {
6381            matchingpath = cc + 1 + LINK_SIZE;            matchingpath = cc + 1 + LINK_SIZE;
# Line 6497  if (opcode == OP_COND || opcode == OP_SC Line 6384  if (opcode == OP_COND || opcode == OP_SC
6384          else          else
6385            matchingpath = cc;            matchingpath = cc;
6386          }          }
       }  
     else  
       {  
       SLJIT_ASSERT(has_alternatives);  
   
       stacksize = GET2(matchingpath, 1);  
       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1, STACK_TOP, 0);  
       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS0, SLJIT_IMM, common->name_count);  
       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), LOCALS1, SLJIT_IMM, common->name_entry_size);  
       OP1(SLJIT_MOV, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE0, SLJIT_IMM, GET2(common->start, common->currententry->start + 1 + LINK_SIZE));  
       OP1(SLJIT_MOV, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, stacksize);  
       GET_LOCAL_BASE(SLJIT_SCRATCH_REG2, 0, 0);  
       OP1(SLJIT_MOV, SLJIT_SCRATCH_REG3, 0, SLJIT_IMM, common->name_table);  
       sljit_emit_ijump(compiler, SLJIT_CALL3, SLJIT_IMM, SLJIT_FUNC_OFFSET(do_searchgroups));  
       OP1(SLJIT_MOV, STACK_TOP, 0, SLJIT_MEM1(SLJIT_LOCALS_REG), POSSESSIVE1);  
       add_jump(compiler, &(BACKTRACK_AS(bracket_backtrack)->u.condfailed), CMP(SLJIT_C_EQUAL, SLJIT_SCRATCH_REG1, 0, SLJIT_IMM, 0));  
       matchingpath += 1 + IMM2_SIZE;  
       }  
6387      }      }
6388    else    else
6389      {      {
# Line 8958  else Line 8827  else
8827  common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;  common->endonly = (re->options & PCRE_DOLLAR_ENDONLY) != 0;
8828  common->ctypes = (sljit_sw)(tables + ctypes_offset);  common->ctypes = (sljit_sw)(tables + ctypes_offset);
8829  common->digits[0] = -2;  common->digits[0] = -2;
8830  common->name_table = (sljit_sw)((pcre_uchar *)re + re->name_table_offset);  common->name_table = ((pcre_uchar *)re) + re->name_table_offset;
8831  common->name_count = re->name_count;  common->name_count = re->name_count;
8832  common->name_entry_size = re->name_entry_size;  common->name_entry_size = re->name_entry_size;
8833  common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;  common->jscript_compat = (re->options & PCRE_JAVASCRIPT_COMPAT) != 0;

Legend:
Removed from v.1358  
changed lines
  Added in v.1367

  ViewVC Help
Powered by ViewVC 1.1.5