/[pcre]/code/trunk/pcre_dfa_exec.c
ViewVC logotype

Diff of /code/trunk/pcre_dfa_exec.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 150 by ph10, Tue Apr 17 08:22:40 2007 UTC revision 341 by ph10, Sat Apr 19 16:41:04 2008 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2008 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 44  FSM). This is NOT Perl- compatible, but Line 44  FSM). This is NOT Perl- compatible, but
44  applications. */  applications. */
45    
46    
47    #ifdef HAVE_CONFIG_H
48    #include "config.h"
49    #endif
50    
51  #define NLBLOCK md             /* Block containing newline information */  #define NLBLOCK md             /* Block containing newline information */
52  #define PSSTART start_subject  /* Field containing processed string start */  #define PSSTART start_subject  /* Field containing processed string start */
53  #define PSEND   end_subject    /* Field containing processed string end */  #define PSEND   end_subject    /* Field containing processed string end */
# Line 63  applications. */ Line 67  applications. */
67    
68  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes  /* These are offsets that are used to turn the OP_TYPESTAR and friends opcodes
69  into others, under special conditions. A gap of 20 between the blocks should be  into others, under special conditions. A gap of 20 between the blocks should be
70  enough. */  enough. The resulting opcodes don't have to be less than 256 because they are
71    never stored, so we push them well clear of the normal opcodes. */
72    
73  #define OP_PROP_EXTRA 100  #define OP_PROP_EXTRA       300
74  #define OP_EXTUNI_EXTRA 120  #define OP_EXTUNI_EXTRA     320
75  #define OP_ANYNL_EXTRA 140  #define OP_ANYNL_EXTRA      340
76    #define OP_HSPACE_EXTRA     360
77    #define OP_VSPACE_EXTRA     380
78    
79    
80  /* This table identifies those opcodes that are followed immediately by a  /* This table identifies those opcodes that are followed immediately by a
81  character that is to be tested in some way. This makes is possible to  character that is to be tested in some way. This makes is possible to
82  centralize the loading of these characters. In the case of Type * etc, the  centralize the loading of these characters. In the case of Type * etc, the
83  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a  "character" is the opcode for \D, \d, \S, \s, \W, or \w, which will always be a
84  small value. */  small value. ***NOTE*** If the start of this table is modified, the two tables
85    that follow must also be modified. */
86    
87  static uschar coptable[] = {  static const uschar coptable[] = {
88    0,                             /* End                                    */    0,                             /* End                                    */
89    0, 0, 0, 0, 0, 0, 0, 0, 0, 0,  /* \A, \G, \B, \b, \D, \d, \S, \s, \W, \w */    0, 0, 0, 0, 0,                 /* \A, \G, \K, \B, \b                     */
90    0, 0,                          /* Any, Anybyte                           */    0, 0, 0, 0, 0, 0,              /* \D, \d, \S, \s, \W, \w                 */
91    0, 0, 0, 0,                    /* NOTPROP, PROP, EXTUNI, ANYNL           */    0, 0, 0,                       /* Any, AllAny, Anybyte                   */
92      0, 0, 0,                       /* NOTPROP, PROP, EXTUNI                  */
93      0, 0, 0, 0, 0,                 /* \R, \H, \h, \V, \v                     */
94    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */    0, 0, 0, 0, 0,                 /* \Z, \z, Opt, ^, $                      */
95    1,                             /* Char                                   */    1,                             /* Char                                   */
96    1,                             /* Charnc                                 */    1,                             /* Charnc                                 */
# Line 120  static uschar coptable[] = { Line 130  static uschar coptable[] = {
130    0,                             /* CREF                                   */    0,                             /* CREF                                   */
131    0,                             /* RREF                                   */    0,                             /* RREF                                   */
132    0,                             /* DEF                                    */    0,                             /* DEF                                    */
133    0, 0                           /* BRAZERO, BRAMINZERO                    */    0, 0,                          /* BRAZERO, BRAMINZERO                    */
134      0, 0, 0, 0,                    /* PRUNE, SKIP, THEN, COMMIT              */
135      0, 0, 0                        /* FAIL, ACCEPT, SKIPZERO                 */
136  };  };
137    
138  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,  /* These 2 tables allow for compact code for testing for \D, \d, \S, \s, \W,
139  and \w */  and \w */
140    
141  static uschar toptable1[] = {  static const uschar toptable1[] = {
142    0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
143    ctype_digit, ctype_digit,    ctype_digit, ctype_digit,
144    ctype_space, ctype_space,    ctype_space, ctype_space,
145    ctype_word,  ctype_word,    ctype_word,  ctype_word,
146    0                               /* OP_ANY */    0, 0                            /* OP_ANY, OP_ALLANY */
147  };  };
148    
149  static uschar toptable2[] = {  static const uschar toptable2[] = {
150    0, 0, 0, 0, 0,    0, 0, 0, 0, 0, 0,
151    ctype_digit, 0,    ctype_digit, 0,
152    ctype_space, 0,    ctype_space, 0,
153    ctype_word,  0,    ctype_word,  0,
154    1                               /* OP_ANY */    1, 1                            /* OP_ANY, OP_ALLANY */
155  };  };
156    
157    
# Line 211  Arguments: Line 223  Arguments:
223    rlevel            function call recursion level    rlevel            function call recursion level
224    recursing         regex recursive call level    recursing         regex recursive call level
225    
226  Returns:            > 0 =>  Returns:            > 0 => number of match offset pairs placed in offsets
227                      = 0 =>                      = 0 => offsets overflowed; longest matches are present
228                       -1 => failed to match                       -1 => failed to match
229                     < -1 => some kind of unexpected problem                     < -1 => some kind of unexpected problem
230    
# Line 500  for (;;) Line 512  for (;;)
512      const uschar *code;      const uschar *code;
513      int state_offset = current_state->offset;      int state_offset = current_state->offset;
514      int count, codevalue;      int count, codevalue;
515    #ifdef SUPPORT_UCP
516      int chartype, script;      int chartype, script;
517    #endif
518    
519  #ifdef DEBUG  #ifdef DEBUG
520      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);      printf ("%.*sProcessing state %d c=", rlevel*2-2, SP, state_offset);
# Line 555  for (;;) Line 569  for (;;)
569      permitted.      permitted.
570    
571      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an      We also use this mechanism for opcodes such as OP_TYPEPLUS that take an
572      argument that is not a data character - but is always one byte long.      argument that is not a data character - but is always one byte long. We
573      Unfortunately, we have to take special action to deal with  \P, \p, and      have to take special action to deal with  \P, \p, \H, \h, \V, \v and \X in
574      \X in this case. To keep the other cases fast, convert these ones to new      this case. To keep the other cases fast, convert these ones to new opcodes.
575      opcodes. */      */
576    
577      if (coptable[codevalue] > 0)      if (coptable[codevalue] > 0)
578        {        {
# Line 576  for (;;) Line 590  for (;;)
590            case OP_PROP: codevalue += OP_PROP_EXTRA; break;            case OP_PROP: codevalue += OP_PROP_EXTRA; break;
591            case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;            case OP_ANYNL: codevalue += OP_ANYNL_EXTRA; break;
592            case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;            case OP_EXTUNI: codevalue += OP_EXTUNI_EXTRA; break;
593              case OP_NOT_HSPACE:
594              case OP_HSPACE: codevalue += OP_HSPACE_EXTRA; break;
595              case OP_NOT_VSPACE:
596              case OP_VSPACE: codevalue += OP_VSPACE_EXTRA; break;
597            default: break;            default: break;
598            }            }
599          }          }
# Line 676  for (;;) Line 694  for (;;)
694        break;        break;
695    
696        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
697          case OP_SKIPZERO:
698          code += 1 + GET(code, 2);
699          while (*code == OP_ALT) code += GET(code, 1);
700          ADD_ACTIVE(code - start_code + 1 + LINK_SIZE, 0);
701          break;
702    
703          /*-----------------------------------------------------------------*/
704        case OP_CIRC:        case OP_CIRC:
705        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||        if ((ptr == start_subject && (md->moptions & PCRE_NOTBOL) == 0) ||
706            ((ims & PCRE_MULTILINE) != 0 &&            ((ims & PCRE_MULTILINE) != 0 &&
# Line 719  for (;;) Line 744  for (;;)
744        break;        break;
745    
746        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
747          case OP_ALLANY:
748          if (clen > 0)
749            { ADD_NEW(state_offset + 1, 0); }
750          break;
751    
752          /*-----------------------------------------------------------------*/
753        case OP_EODN:        case OP_EODN:
754        if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))        if (clen == 0 || (IS_NEWLINE(ptr) && ptr == end_subject - md->nllen))
755          { ADD_ACTIVE(state_offset + 1, 0); }          { ADD_ACTIVE(state_offset + 1, 0); }
# Line 783  for (;;) Line 814  for (;;)
814        break;        break;
815    
816    
 #ifdef SUPPORT_UCP  
   
817        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
818        /* Check the next character by Unicode property. We will get here only        /* Check the next character by Unicode property. We will get here only
819        if the support is in the binary; otherwise a compile-time error occurs.        if the support is in the binary; otherwise a compile-time error occurs.
820        */        */
821    
822    #ifdef SUPPORT_UCP
823        case OP_PROP:        case OP_PROP:
824        case OP_NOTPROP:        case OP_NOTPROP:
825        if (clen > 0)        if (clen > 0)
# Line 835  for (;;) Line 865  for (;;)
865  /* ========================================================================== */  /* ========================================================================== */
866        /* These opcodes likewise inspect the subject character, but have an        /* These opcodes likewise inspect the subject character, but have an
867        argument that is not a data character. It is one of these opcodes:        argument that is not a data character. It is one of these opcodes:
868        OP_ANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE, OP_WORDCHAR,        OP_ANY, OP_ALLANY, OP_DIGIT, OP_NOT_DIGIT, OP_WHITESPACE, OP_NOT_SPACE,
869        OP_NOT_WORDCHAR. The value is loaded into d. */        OP_WORDCHAR, OP_NOT_WORDCHAR. The value is loaded into d. */
870    
871        case OP_TYPEPLUS:        case OP_TYPEPLUS:
872        case OP_TYPEMINPLUS:        case OP_TYPEMINPLUS:
# Line 970  for (;;) Line 1000  for (;;)
1000        argument. It keeps the code above fast for the other cases. The argument        argument. It keeps the code above fast for the other cases. The argument
1001        is in the d variable. */        is in the d variable. */
1002    
1003    #ifdef SUPPORT_UCP
1004        case OP_PROP_EXTRA + OP_TYPEPLUS:        case OP_PROP_EXTRA + OP_TYPEPLUS:
1005        case OP_PROP_EXTRA + OP_TYPEMINPLUS:        case OP_PROP_EXTRA + OP_TYPEMINPLUS:
1006        case OP_PROP_EXTRA + OP_TYPEPOSPLUS:        case OP_PROP_EXTRA + OP_TYPEPOSPLUS:
# Line 1049  for (;;) Line 1080  for (;;)
1080          ADD_NEW_DATA(-state_offset, count, ncount);          ADD_NEW_DATA(-state_offset, count, ncount);
1081          }          }
1082        break;        break;
1083    #endif
1084    
1085        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1086        case OP_ANYNL_EXTRA + OP_TYPEPLUS:        case OP_ANYNL_EXTRA + OP_TYPEPLUS:
# Line 1061  for (;;) Line 1093  for (;;)
1093          int ncount = 0;          int ncount = 0;
1094          switch (c)          switch (c)
1095            {            {
1096              case 0x000b:
1097              case 0x000c:
1098              case 0x0085:
1099              case 0x2028:
1100              case 0x2029:
1101              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1102              goto ANYNL01;
1103    
1104            case 0x000d:            case 0x000d:
1105            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1106            /* Fall through */            /* Fall through */
1107    
1108              ANYNL01:
1109              case 0x000a:
1110              if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)
1111                {
1112                active_count--;           /* Remove non-match possibility */
1113                next_active_state--;
1114                }
1115              count++;
1116              ADD_NEW_DATA(-state_offset, count, ncount);
1117              break;
1118    
1119              default:
1120              break;
1121              }
1122            }
1123          break;
1124    
1125          /*-----------------------------------------------------------------*/
1126          case OP_VSPACE_EXTRA + OP_TYPEPLUS:
1127          case OP_VSPACE_EXTRA + OP_TYPEMINPLUS:
1128          case OP_VSPACE_EXTRA + OP_TYPEPOSPLUS:
1129          count = current_state->count;  /* Already matched */
1130          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1131          if (clen > 0)
1132            {
1133            BOOL OK;
1134            switch (c)
1135              {
1136            case 0x000a:            case 0x000a:
1137            case 0x000b:            case 0x000b:
1138            case 0x000c:            case 0x000c:
1139              case 0x000d:
1140            case 0x0085:            case 0x0085:
1141            case 0x2028:            case 0x2028:
1142            case 0x2029:            case 0x2029:
1143            if (count > 0 && codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSPLUS)            OK = TRUE;
1144              break;
1145    
1146              default:
1147              OK = FALSE;
1148              break;
1149              }
1150    
1151            if (OK == (d == OP_VSPACE))
1152              {
1153              if (count > 0 && codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSPLUS)
1154              {              {
1155              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1156              next_active_state--;              next_active_state--;
1157              }              }
1158            count++;            count++;
1159            ADD_NEW_DATA(-state_offset, count, ncount);            ADD_NEW_DATA(-state_offset, count, 0);
1160              }
1161            }
1162          break;
1163    
1164          /*-----------------------------------------------------------------*/
1165          case OP_HSPACE_EXTRA + OP_TYPEPLUS:
1166          case OP_HSPACE_EXTRA + OP_TYPEMINPLUS:
1167          case OP_HSPACE_EXTRA + OP_TYPEPOSPLUS:
1168          count = current_state->count;  /* Already matched */
1169          if (count > 0) { ADD_ACTIVE(state_offset + 2, 0); }
1170          if (clen > 0)
1171            {
1172            BOOL OK;
1173            switch (c)
1174              {
1175              case 0x09:      /* HT */
1176              case 0x20:      /* SPACE */
1177              case 0xa0:      /* NBSP */
1178              case 0x1680:    /* OGHAM SPACE MARK */
1179              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1180              case 0x2000:    /* EN QUAD */
1181              case 0x2001:    /* EM QUAD */
1182              case 0x2002:    /* EN SPACE */
1183              case 0x2003:    /* EM SPACE */
1184              case 0x2004:    /* THREE-PER-EM SPACE */
1185              case 0x2005:    /* FOUR-PER-EM SPACE */
1186              case 0x2006:    /* SIX-PER-EM SPACE */
1187              case 0x2007:    /* FIGURE SPACE */
1188              case 0x2008:    /* PUNCTUATION SPACE */
1189              case 0x2009:    /* THIN SPACE */
1190              case 0x200A:    /* HAIR SPACE */
1191              case 0x202f:    /* NARROW NO-BREAK SPACE */
1192              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1193              case 0x3000:    /* IDEOGRAPHIC SPACE */
1194              OK = TRUE;
1195            break;            break;
1196    
1197            default:            default:
1198              OK = FALSE;
1199            break;            break;
1200            }            }
1201    
1202            if (OK == (d == OP_HSPACE))
1203              {
1204              if (count > 0 && codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSPLUS)
1205                {
1206                active_count--;           /* Remove non-match possibility */
1207                next_active_state--;
1208                }
1209              count++;
1210              ADD_NEW_DATA(-state_offset, count, 0);
1211              }
1212          }          }
1213        break;        break;
1214    
1215        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1216    #ifdef SUPPORT_UCP
1217        case OP_PROP_EXTRA + OP_TYPEQUERY:        case OP_PROP_EXTRA + OP_TYPEQUERY:
1218        case OP_PROP_EXTRA + OP_TYPEMINQUERY:        case OP_PROP_EXTRA + OP_TYPEMINQUERY:
1219        case OP_PROP_EXTRA + OP_TYPEPOSQUERY:        case OP_PROP_EXTRA + OP_TYPEPOSQUERY:
# Line 1182  for (;;) Line 1311  for (;;)
1311          ADD_NEW_DATA(-(state_offset + count), 0, ncount);          ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1312          }          }
1313        break;        break;
1314    #endif
1315    
1316        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1317        case OP_ANYNL_EXTRA + OP_TYPEQUERY:        case OP_ANYNL_EXTRA + OP_TYPEQUERY:
# Line 1202  for (;;) Line 1332  for (;;)
1332          int ncount = 0;          int ncount = 0;
1333          switch (c)          switch (c)
1334            {            {
1335              case 0x000b:
1336              case 0x000c:
1337              case 0x0085:
1338              case 0x2028:
1339              case 0x2029:
1340              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1341              goto ANYNL02;
1342    
1343            case 0x000d:            case 0x000d:
1344            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1345            /* Fall through */            /* Fall through */
1346    
1347              ANYNL02:
1348              case 0x000a:
1349              if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||
1350                  codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)
1351                {
1352                active_count--;           /* Remove non-match possibility */
1353                next_active_state--;
1354                }
1355              ADD_NEW_DATA(-(state_offset + count), 0, ncount);
1356              break;
1357    
1358              default:
1359              break;
1360              }
1361            }
1362          break;
1363    
1364          /*-----------------------------------------------------------------*/
1365          case OP_VSPACE_EXTRA + OP_TYPEQUERY:
1366          case OP_VSPACE_EXTRA + OP_TYPEMINQUERY:
1367          case OP_VSPACE_EXTRA + OP_TYPEPOSQUERY:
1368          count = 2;
1369          goto QS4;
1370    
1371          case OP_VSPACE_EXTRA + OP_TYPESTAR:
1372          case OP_VSPACE_EXTRA + OP_TYPEMINSTAR:
1373          case OP_VSPACE_EXTRA + OP_TYPEPOSSTAR:
1374          count = 0;
1375    
1376          QS4:
1377          ADD_ACTIVE(state_offset + 2, 0);
1378          if (clen > 0)
1379            {
1380            BOOL OK;
1381            switch (c)
1382              {
1383            case 0x000a:            case 0x000a:
1384            case 0x000b:            case 0x000b:
1385            case 0x000c:            case 0x000c:
1386              case 0x000d:
1387            case 0x0085:            case 0x0085:
1388            case 0x2028:            case 0x2028:
1389            case 0x2029:            case 0x2029:
1390            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSSTAR ||            OK = TRUE;
1391                codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSQUERY)            break;
1392    
1393              default:
1394              OK = FALSE;
1395              break;
1396              }
1397            if (OK == (d == OP_VSPACE))
1398              {
1399              if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSSTAR ||
1400                  codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSQUERY)
1401              {              {
1402              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1403              next_active_state--;              next_active_state--;
1404              }              }
1405            ADD_NEW_DATA(-(state_offset + count), 0, ncount);            ADD_NEW_DATA(-(state_offset + count), 0, 0);
1406              }
1407            }
1408          break;
1409    
1410          /*-----------------------------------------------------------------*/
1411          case OP_HSPACE_EXTRA + OP_TYPEQUERY:
1412          case OP_HSPACE_EXTRA + OP_TYPEMINQUERY:
1413          case OP_HSPACE_EXTRA + OP_TYPEPOSQUERY:
1414          count = 2;
1415          goto QS5;
1416    
1417          case OP_HSPACE_EXTRA + OP_TYPESTAR:
1418          case OP_HSPACE_EXTRA + OP_TYPEMINSTAR:
1419          case OP_HSPACE_EXTRA + OP_TYPEPOSSTAR:
1420          count = 0;
1421    
1422          QS5:
1423          ADD_ACTIVE(state_offset + 2, 0);
1424          if (clen > 0)
1425            {
1426            BOOL OK;
1427            switch (c)
1428              {
1429              case 0x09:      /* HT */
1430              case 0x20:      /* SPACE */
1431              case 0xa0:      /* NBSP */
1432              case 0x1680:    /* OGHAM SPACE MARK */
1433              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1434              case 0x2000:    /* EN QUAD */
1435              case 0x2001:    /* EM QUAD */
1436              case 0x2002:    /* EN SPACE */
1437              case 0x2003:    /* EM SPACE */
1438              case 0x2004:    /* THREE-PER-EM SPACE */
1439              case 0x2005:    /* FOUR-PER-EM SPACE */
1440              case 0x2006:    /* SIX-PER-EM SPACE */
1441              case 0x2007:    /* FIGURE SPACE */
1442              case 0x2008:    /* PUNCTUATION SPACE */
1443              case 0x2009:    /* THIN SPACE */
1444              case 0x200A:    /* HAIR SPACE */
1445              case 0x202f:    /* NARROW NO-BREAK SPACE */
1446              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1447              case 0x3000:    /* IDEOGRAPHIC SPACE */
1448              OK = TRUE;
1449            break;            break;
1450    
1451            default:            default:
1452              OK = FALSE;
1453            break;            break;
1454            }            }
1455    
1456            if (OK == (d == OP_HSPACE))
1457              {
1458              if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSSTAR ||
1459                  codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSQUERY)
1460                {
1461                active_count--;           /* Remove non-match possibility */
1462                next_active_state--;
1463                }
1464              ADD_NEW_DATA(-(state_offset + count), 0, 0);
1465              }
1466          }          }
1467        break;        break;
1468    
1469        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1470    #ifdef SUPPORT_UCP
1471        case OP_PROP_EXTRA + OP_TYPEEXACT:        case OP_PROP_EXTRA + OP_TYPEEXACT:
1472        case OP_PROP_EXTRA + OP_TYPEUPTO:        case OP_PROP_EXTRA + OP_TYPEUPTO:
1473        case OP_PROP_EXTRA + OP_TYPEMINUPTO:        case OP_PROP_EXTRA + OP_TYPEMINUPTO:
# Line 1313  for (;;) Line 1555  for (;;)
1555            { ADD_NEW_DATA(-state_offset, count, ncount); }            { ADD_NEW_DATA(-state_offset, count, ncount); }
1556          }          }
1557        break;        break;
1558    #endif
1559    
1560        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1561        case OP_ANYNL_EXTRA + OP_TYPEEXACT:        case OP_ANYNL_EXTRA + OP_TYPEEXACT:
# Line 1327  for (;;) Line 1570  for (;;)
1570          int ncount = 0;          int ncount = 0;
1571          switch (c)          switch (c)
1572            {            {
1573              case 0x000b:
1574              case 0x000c:
1575              case 0x0085:
1576              case 0x2028:
1577              case 0x2029:
1578              if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1579              goto ANYNL03;
1580    
1581            case 0x000d:            case 0x000d:
1582            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;            if (ptr + 1 < end_subject && ptr[1] == 0x0a) ncount = 1;
1583            /* Fall through */            /* Fall through */
1584    
1585              ANYNL03:
1586              case 0x000a:
1587              if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)
1588                {
1589                active_count--;           /* Remove non-match possibility */
1590                next_active_state--;
1591                }
1592              if (++count >= GET2(code, 1))
1593                { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }
1594              else
1595                { ADD_NEW_DATA(-state_offset, count, ncount); }
1596              break;
1597    
1598              default:
1599              break;
1600              }
1601            }
1602          break;
1603    
1604          /*-----------------------------------------------------------------*/
1605          case OP_VSPACE_EXTRA + OP_TYPEEXACT:
1606          case OP_VSPACE_EXTRA + OP_TYPEUPTO:
1607          case OP_VSPACE_EXTRA + OP_TYPEMINUPTO:
1608          case OP_VSPACE_EXTRA + OP_TYPEPOSUPTO:
1609          if (codevalue != OP_VSPACE_EXTRA + OP_TYPEEXACT)
1610            { ADD_ACTIVE(state_offset + 4, 0); }
1611          count = current_state->count;  /* Number already matched */
1612          if (clen > 0)
1613            {
1614            BOOL OK;
1615            switch (c)
1616              {
1617            case 0x000a:            case 0x000a:
1618            case 0x000b:            case 0x000b:
1619            case 0x000c:            case 0x000c:
1620              case 0x000d:
1621            case 0x0085:            case 0x0085:
1622            case 0x2028:            case 0x2028:
1623            case 0x2029:            case 0x2029:
1624            if (codevalue == OP_ANYNL_EXTRA + OP_TYPEPOSUPTO)            OK = TRUE;
1625              break;
1626    
1627              default:
1628              OK = FALSE;
1629              }
1630    
1631            if (OK == (d == OP_VSPACE))
1632              {
1633              if (codevalue == OP_VSPACE_EXTRA + OP_TYPEPOSUPTO)
1634              {              {
1635              active_count--;           /* Remove non-match possibility */              active_count--;           /* Remove non-match possibility */
1636              next_active_state--;              next_active_state--;
1637              }              }
1638            if (++count >= GET2(code, 1))            if (++count >= GET2(code, 1))
1639              { ADD_NEW_DATA(-(state_offset + 4), 0, ncount); }              { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
1640            else            else
1641              { ADD_NEW_DATA(-state_offset, count, ncount); }              { ADD_NEW_DATA(-state_offset, count, 0); }
1642              }
1643            }
1644          break;
1645    
1646          /*-----------------------------------------------------------------*/
1647          case OP_HSPACE_EXTRA + OP_TYPEEXACT:
1648          case OP_HSPACE_EXTRA + OP_TYPEUPTO:
1649          case OP_HSPACE_EXTRA + OP_TYPEMINUPTO:
1650          case OP_HSPACE_EXTRA + OP_TYPEPOSUPTO:
1651          if (codevalue != OP_HSPACE_EXTRA + OP_TYPEEXACT)
1652            { ADD_ACTIVE(state_offset + 4, 0); }
1653          count = current_state->count;  /* Number already matched */
1654          if (clen > 0)
1655            {
1656            BOOL OK;
1657            switch (c)
1658              {
1659              case 0x09:      /* HT */
1660              case 0x20:      /* SPACE */
1661              case 0xa0:      /* NBSP */
1662              case 0x1680:    /* OGHAM SPACE MARK */
1663              case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1664              case 0x2000:    /* EN QUAD */
1665              case 0x2001:    /* EM QUAD */
1666              case 0x2002:    /* EN SPACE */
1667              case 0x2003:    /* EM SPACE */
1668              case 0x2004:    /* THREE-PER-EM SPACE */
1669              case 0x2005:    /* FOUR-PER-EM SPACE */
1670              case 0x2006:    /* SIX-PER-EM SPACE */
1671              case 0x2007:    /* FIGURE SPACE */
1672              case 0x2008:    /* PUNCTUATION SPACE */
1673              case 0x2009:    /* THIN SPACE */
1674              case 0x200A:    /* HAIR SPACE */
1675              case 0x202f:    /* NARROW NO-BREAK SPACE */
1676              case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1677              case 0x3000:    /* IDEOGRAPHIC SPACE */
1678              OK = TRUE;
1679            break;            break;
1680    
1681            default:            default:
1682              OK = FALSE;
1683            break;            break;
1684            }            }
1685    
1686            if (OK == (d == OP_HSPACE))
1687              {
1688              if (codevalue == OP_HSPACE_EXTRA + OP_TYPEPOSUPTO)
1689                {
1690                active_count--;           /* Remove non-match possibility */
1691                next_active_state--;
1692                }
1693              if (++count >= GET2(code, 1))
1694                { ADD_NEW_DATA(-(state_offset + 4), 0, 0); }
1695              else
1696                { ADD_NEW_DATA(-state_offset, count, 0); }
1697              }
1698          }          }
1699        break;        break;
1700    
# Line 1429  for (;;) Line 1775  for (;;)
1775        case OP_ANYNL:        case OP_ANYNL:
1776        if (clen > 0) switch(c)        if (clen > 0) switch(c)
1777          {          {
         case 0x000a:  
1778          case 0x000b:          case 0x000b:
1779          case 0x000c:          case 0x000c:
1780          case 0x0085:          case 0x0085:
1781          case 0x2028:          case 0x2028:
1782          case 0x2029:          case 0x2029:
1783            if ((md->moptions & PCRE_BSR_ANYCRLF) != 0) break;
1784    
1785            case 0x000a:
1786          ADD_NEW(state_offset + 1, 0);          ADD_NEW(state_offset + 1, 0);
1787          break;          break;
1788    
1789          case 0x000d:          case 0x000d:
1790          if (ptr + 1 < end_subject && ptr[1] == 0x0a)          if (ptr + 1 < end_subject && ptr[1] == 0x0a)
1791            {            {
# Line 1451  for (;;) Line 1800  for (;;)
1800        break;        break;
1801    
1802        /*-----------------------------------------------------------------*/        /*-----------------------------------------------------------------*/
1803          case OP_NOT_VSPACE:
1804          if (clen > 0) switch(c)
1805            {
1806            case 0x000a:
1807            case 0x000b:
1808            case 0x000c:
1809            case 0x000d:
1810            case 0x0085:
1811            case 0x2028:
1812            case 0x2029:
1813            break;
1814    
1815            default:
1816            ADD_NEW(state_offset + 1, 0);
1817            break;
1818            }
1819          break;
1820    
1821          /*-----------------------------------------------------------------*/
1822          case OP_VSPACE:
1823          if (clen > 0) switch(c)
1824            {
1825            case 0x000a:
1826            case 0x000b:
1827            case 0x000c:
1828            case 0x000d:
1829            case 0x0085:
1830            case 0x2028:
1831            case 0x2029:
1832            ADD_NEW(state_offset + 1, 0);
1833            break;
1834    
1835            default: break;
1836            }
1837          break;
1838    
1839          /*-----------------------------------------------------------------*/
1840          case OP_NOT_HSPACE:
1841          if (clen > 0) switch(c)
1842            {
1843            case 0x09:      /* HT */
1844            case 0x20:      /* SPACE */
1845            case 0xa0:      /* NBSP */
1846            case 0x1680:    /* OGHAM SPACE MARK */
1847            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1848            case 0x2000:    /* EN QUAD */
1849            case 0x2001:    /* EM QUAD */
1850            case 0x2002:    /* EN SPACE */
1851            case 0x2003:    /* EM SPACE */
1852            case 0x2004:    /* THREE-PER-EM SPACE */
1853            case 0x2005:    /* FOUR-PER-EM SPACE */
1854            case 0x2006:    /* SIX-PER-EM SPACE */
1855            case 0x2007:    /* FIGURE SPACE */
1856            case 0x2008:    /* PUNCTUATION SPACE */
1857            case 0x2009:    /* THIN SPACE */
1858            case 0x200A:    /* HAIR SPACE */
1859            case 0x202f:    /* NARROW NO-BREAK SPACE */
1860            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1861            case 0x3000:    /* IDEOGRAPHIC SPACE */
1862            break;
1863    
1864            default:
1865            ADD_NEW(state_offset + 1, 0);
1866            break;
1867            }
1868          break;
1869    
1870          /*-----------------------------------------------------------------*/
1871          case OP_HSPACE:
1872          if (clen > 0) switch(c)
1873            {
1874            case 0x09:      /* HT */
1875            case 0x20:      /* SPACE */
1876            case 0xa0:      /* NBSP */
1877            case 0x1680:    /* OGHAM SPACE MARK */
1878            case 0x180e:    /* MONGOLIAN VOWEL SEPARATOR */
1879            case 0x2000:    /* EN QUAD */
1880            case 0x2001:    /* EM QUAD */
1881            case 0x2002:    /* EN SPACE */
1882            case 0x2003:    /* EM SPACE */
1883            case 0x2004:    /* THREE-PER-EM SPACE */
1884            case 0x2005:    /* FOUR-PER-EM SPACE */
1885            case 0x2006:    /* SIX-PER-EM SPACE */
1886            case 0x2007:    /* FIGURE SPACE */
1887            case 0x2008:    /* PUNCTUATION SPACE */
1888            case 0x2009:    /* THIN SPACE */
1889            case 0x200A:    /* HAIR SPACE */
1890            case 0x202f:    /* NARROW NO-BREAK SPACE */
1891            case 0x205f:    /* MEDIUM MATHEMATICAL SPACE */
1892            case 0x3000:    /* IDEOGRAPHIC SPACE */
1893            ADD_NEW(state_offset + 1, 0);
1894            break;
1895            }
1896          break;
1897    
1898          /*-----------------------------------------------------------------*/
1899        /* Match a negated single character. This is only used for one-byte        /* Match a negated single character. This is only used for one-byte
1900        characters, that is, we know that d < 256. The character we are        characters, that is, we know that d < 256. The character we are
1901        checking (c) can be multibyte. */        checking (c) can be multibyte. */
# Line 1730  for (;;) Line 2175  for (;;)
2175    
2176  /* ========================================================================== */  /* ========================================================================== */
2177        /* These are the opcodes for fancy brackets of various kinds. We have        /* These are the opcodes for fancy brackets of various kinds. We have
2178        to use recursion in order to handle them. */        to use recursion in order to handle them. The "always failing" assersion
2179          (?!) is optimised when compiling to OP_FAIL, so we have to support that,
2180          though the other "backtracking verbs" are not supported. */
2181    
2182          case OP_FAIL:
2183          break;
2184    
2185        case OP_ASSERT:        case OP_ASSERT:
2186        case OP_ASSERT_NOT:        case OP_ASSERT_NOT:
# Line 2163  md->end_subject = end_subject; Line 2613  md->end_subject = end_subject;
2613  md->moptions = options;  md->moptions = options;
2614  md->poptions = re->options;  md->poptions = re->options;
2615    
2616    /* If the BSR option is not set at match time, copy what was set
2617    at compile time. */
2618    
2619    if ((md->moptions & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) == 0)
2620      {
2621      if ((re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE)) != 0)
2622        md->moptions |= re->options & (PCRE_BSR_ANYCRLF|PCRE_BSR_UNICODE);
2623    #ifdef BSR_ANYCRLF
2624      else md->moptions |= PCRE_BSR_ANYCRLF;
2625    #endif
2626      }
2627    
2628  /* Handle different types of newline. The three bits give eight cases. If  /* Handle different types of newline. The three bits give eight cases. If
2629  nothing is set at run time, whatever was used at compile time applies. */  nothing is set at run time, whatever was used at compile time applies. */
2630    
# Line 2233  if (md->tables == NULL) md->tables = _pc Line 2695  if (md->tables == NULL) md->tables = _pc
2695  used in a loop when finding where to start. */  used in a loop when finding where to start. */
2696    
2697  lcc = md->tables + lcc_offset;  lcc = md->tables + lcc_offset;
2698  startline = (re->options & PCRE_STARTLINE) != 0;  startline = (re->flags & PCRE_STARTLINE) != 0;
2699  firstline = (re->options & PCRE_FIRSTLINE) != 0;  firstline = (re->options & PCRE_FIRSTLINE) != 0;
2700    
2701  /* Set up the first character to match, if available. The first_byte value is  /* Set up the first character to match, if available. The first_byte value is
# Line 2244  studied, there may be a bitmap of possib Line 2706  studied, there may be a bitmap of possib
2706    
2707  if (!anchored)  if (!anchored)
2708    {    {
2709    if ((re->options & PCRE_FIRSTSET) != 0)    if ((re->flags & PCRE_FIRSTSET) != 0)
2710      {      {
2711      first_byte = re->first_byte & 255;      first_byte = re->first_byte & 255;
2712      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)      if ((first_byte_caseless = ((re->first_byte & REQ_CASELESS) != 0)) == TRUE)
# Line 2261  if (!anchored) Line 2723  if (!anchored)
2723  /* For anchored or unanchored matches, there may be a "last known required  /* For anchored or unanchored matches, there may be a "last known required
2724  character" set. */  character" set. */
2725    
2726  if ((re->options & PCRE_REQCHSET) != 0)  if ((re->flags & PCRE_REQCHSET) != 0)
2727    {    {
2728    req_byte = re->req_byte & 255;    req_byte = re->req_byte & 255;
2729    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;    req_byte_caseless = (re->req_byte & REQ_CASELESS) != 0;
# Line 2431  for (;;) Line 2893  for (;;)
2893      }      }
2894    if (current_subject > end_subject) break;    if (current_subject > end_subject) break;
2895    
2896    /* If we have just passed a CR and the newline option is CRLF or ANY or    /* If we have just passed a CR and we are now at a LF, and the pattern does
2897    ANYCRLF, and we are now at a LF, advance the match position by one more    not contain any explicit matches for \r or \n, and the newline option is CRLF
2898    character. */    or ANY or ANYCRLF, advance the match position by one more character. */
2899    
2900    if (current_subject[-1] == '\r' &&    if (current_subject[-1] == '\r' &&
2901         (md->nltype == NLTYPE_ANY ||        current_subject < end_subject &&
2902          md->nltype == NLTYPE_ANYCRLF ||        *current_subject == '\n' &&
2903          md->nllen == 2) &&        (re->flags & PCRE_HASCRORLF) == 0 &&
2904         current_subject < end_subject &&          (md->nltype == NLTYPE_ANY ||
2905         *current_subject == '\n')           md->nltype == NLTYPE_ANYCRLF ||
2906             md->nllen == 2))
2907      current_subject++;      current_subject++;
2908    
2909    }   /* "Bumpalong" loop */    }   /* "Bumpalong" loop */

Legend:
Removed from v.150  
changed lines
  Added in v.341

  ViewVC Help
Powered by ViewVC 1.1.5