/[pcre]/code/trunk/doc/pcre.txt
ViewVC logotype

Diff of /code/trunk/doc/pcre.txt

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 678 by ph10, Sun Aug 28 15:23:03 2011 UTC revision 691 by ph10, Sun Sep 11 14:31:21 2011 UTC
# Line 120  REVISION Line 120  REVISION
120         Last updated: 24 August 2011         Last updated: 24 August 2011
121         Copyright (c) 1997-2011 University of Cambridge.         Copyright (c) 1997-2011 University of Cambridge.
122  ------------------------------------------------------------------------------  ------------------------------------------------------------------------------
123    
124    
125  PCREBUILD(3)                                                      PCREBUILD(3)  PCREBUILD(3)                                                      PCREBUILD(3)
126    
127    
# Line 217  UNICODE CHARACTER PROPERTY SUPPORT Line 217  UNICODE CHARACTER PROPERTY SUPPORT
217         are supported. Details are given in the pcrepattern documentation.         are supported. Details are given in the pcrepattern documentation.
218    
219    
220    JUST-IN-TIME COMPILER SUPPORT
221    
222           Just-in-time compiler support is included in the build by specifying
223    
224             --enable-jit
225    
226           This  support  is available only for certain hardware architectures. If
227           this option is set for an  unsupported  architecture,  a  compile  time
228           error  occurs.   See  the pcrejit documentation for a discussion of JIT
229           usage. When JIT support is enabled, pcregrep automatically makes use of
230           it, unless you add
231    
232             --disable-pcregrep-jit
233    
234           to the "configure" command.
235    
236    
237  CODE VALUE OF NEWLINE  CODE VALUE OF NEWLINE
238    
239         By  default,  PCRE interprets the linefeed (LF) character as indicating         By  default,  PCRE interprets the linefeed (LF) character as indicating
# Line 464  AUTHOR Line 481  AUTHOR
481    
482  REVISION  REVISION
483    
484         Last updated: 02 August 2011         Last updated: 06 September 2011
485         Copyright (c) 1997-2011 University of Cambridge.         Copyright (c) 1997-2011 University of Cambridge.
486  ------------------------------------------------------------------------------  ------------------------------------------------------------------------------
487    
488    
489  PCREMATCHING(3)                                                PCREMATCHING(3)  PCREMATCHING(3)                                                PCREMATCHING(3)
490    
491    
# Line 671  REVISION Line 688  REVISION
688         Last updated: 17 November 2010         Last updated: 17 November 2010
689         Copyright (c) 1997-2010 University of Cambridge.         Copyright (c) 1997-2010 University of Cambridge.
690  ------------------------------------------------------------------------------  ------------------------------------------------------------------------------
691    
692    
693  PCREAPI(3)                                                          PCREAPI(3)  PCREAPI(3)                                                          PCREAPI(3)
694    
695    
# Line 680  NAME Line 697  NAME
697         PCRE - Perl-compatible regular expressions         PCRE - Perl-compatible regular expressions
698    
699    
700  PCRE NATIVE API  PCRE NATIVE API BASIC FUNCTIONS
701    
702         #include <pcre.h>         #include <pcre.h>
703    
# Line 696  PCRE NATIVE API Line 713  PCRE NATIVE API
713         pcre_extra *pcre_study(const pcre *code, int options,         pcre_extra *pcre_study(const pcre *code, int options,
714              const char **errptr);              const char **errptr);
715    
716           void pcre_free_study(pcre_extra *extra);
717    
718         int pcre_exec(const pcre *code, const pcre_extra *extra,         int pcre_exec(const pcre *code, const pcre_extra *extra,
719              const char *subject, int length, int startoffset,              const char *subject, int length, int startoffset,
720              int options, int *ovector, int ovecsize);              int options, int *ovector, int ovecsize);
721    
722    
723    PCRE NATIVE API AUXILIARY FUNCTIONS
724    
725           pcre_jit_stack *pcre_jit_stack_alloc(int startsize, int maxsize);
726    
727           void pcre_jit_stack_free(pcre_jit_stack *stack);
728    
729           void pcre_assign_jit_stack(pcre_extra *extra,
730                pcre_jit_callback callback, void *data);
731    
732         int pcre_dfa_exec(const pcre *code, const pcre_extra *extra,         int pcre_dfa_exec(const pcre *code, const pcre_extra *extra,
733              const char *subject, int length, int startoffset,              const char *subject, int length, int startoffset,
734              int options, int *ovector, int ovecsize,              int options, int *ovector, int ovecsize,
# Line 749  PCRE NATIVE API Line 778  PCRE NATIVE API
778    
779         char *pcre_version(void);         char *pcre_version(void);
780    
781    
782    PCRE NATIVE API INDIRECTED FUNCTIONS
783    
784         void *(*pcre_malloc)(size_t);         void *(*pcre_malloc)(size_t);
785    
786         void (*pcre_free)(void *);         void (*pcre_free)(void *);
# Line 764  PCRE API OVERVIEW Line 796  PCRE API OVERVIEW
796    
797         PCRE has its own native API, which is described in this document. There         PCRE has its own native API, which is described in this document. There
798         are also some wrapper functions that correspond to  the  POSIX  regular         are also some wrapper functions that correspond to  the  POSIX  regular
799         expression  API.  These  are  described in the pcreposix documentation.         expression  API,  but they do not give access to all the functionality.
800         Both of these APIs define a set of C function calls. A C++  wrapper  is         They are described in the pcreposix documentation. Both of  these  APIs
801         distributed with PCRE. It is documented in the pcrecpp page.         define  a  set  of  C function calls. A C++ wrapper is also distributed
802           with PCRE. It is documented in the pcrecpp page.
803    
804         The  native  API  C  function prototypes are defined in the header file         The native API C function prototypes are defined  in  the  header  file
805         pcre.h, and on Unix systems the library itself is called  libpcre.   It         pcre.h,  and  on Unix systems the library itself is called libpcre.  It
806         can normally be accessed by adding -lpcre to the command for linking an         can normally be accessed by adding -lpcre to the command for linking an
807         application  that  uses  PCRE.  The  header  file  defines  the  macros         application  that  uses  PCRE.  The  header  file  defines  the  macros
808         PCRE_MAJOR  and  PCRE_MINOR to contain the major and minor release num-         PCRE_MAJOR and PCRE_MINOR to contain the major and minor  release  num-
809         bers for the library.  Applications can use these  to  include  support         bers  for  the  library.  Applications can use these to include support
810         for different releases of PCRE.         for different releases of PCRE.
811    
812         In a Windows environment, if you want to statically link an application         In a Windows environment, if you want to statically link an application
813         program against a non-dll pcre.a  file,  you  must  define  PCRE_STATIC         program  against  a  non-dll  pcre.a  file, you must define PCRE_STATIC
814         before  including  pcre.h or pcrecpp.h, because otherwise the pcre_mal-         before including pcre.h or pcrecpp.h, because otherwise  the  pcre_mal-
815         loc()   and   pcre_free()   exported   functions   will   be   declared         loc()   and   pcre_free()   exported   functions   will   be   declared
816         __declspec(dllimport), with unwanted results.         __declspec(dllimport), with unwanted results.
817    
818         The   functions   pcre_compile(),  pcre_compile2(),  pcre_study(),  and         The  functions  pcre_compile(),  pcre_compile2(),   pcre_study(),   and
819         pcre_exec() are used for compiling and matching regular expressions  in         pcre_exec()  are used for compiling and matching regular expressions in
820         a  Perl-compatible  manner. A sample program that demonstrates the sim-         a Perl-compatible manner. A sample program that demonstrates  the  sim-
821         plest way of using them is provided in the file  called  pcredemo.c  in         plest  way  of  using them is provided in the file called pcredemo.c in
822         the PCRE source distribution. A listing of this program is given in the         the PCRE source distribution. A listing of this program is given in the
823         pcredemo documentation, and the pcresample documentation describes  how         pcredemo  documentation, and the pcresample documentation describes how
824         to compile and run it.         to compile and run it.
825    
826           Just-in-time compiler support is an optional feature of PCRE  that  can
827           be built in appropriate hardware environments. It greatly speeds up the
828           matching performance of  many  patterns.  Simple  programs  can  easily
829           request  that  it  be  used  if available, by setting an option that is
830           ignored when it is not relevant. More complicated programs  might  need
831           to     make    use    of    the    functions    pcre_jit_stack_alloc(),
832           pcre_jit_stack_free(), and pcre_assign_jit_stack() in order to  control
833           the  JIT  code's  memory  usage.   These functions are discussed in the
834           pcrejit documentation.
835    
836         A second matching function, pcre_dfa_exec(), which is not Perl-compati-         A second matching function, pcre_dfa_exec(), which is not Perl-compati-
837         ble, is also provided. This uses a different algorithm for  the  match-         ble,  is  also provided. This uses a different algorithm for the match-
838         ing.  The  alternative algorithm finds all possible matches (at a given         ing. The alternative algorithm finds all possible matches (at  a  given
839         point in the subject), and scans the subject just  once  (unless  there         point  in  the  subject), and scans the subject just once (unless there
840         are  lookbehind  assertions).  However,  this algorithm does not return         are lookbehind assertions). However, this  algorithm  does  not  return
841         captured substrings. A description of the two matching  algorithms  and         captured  substrings.  A description of the two matching algorithms and
842         their  advantages  and disadvantages is given in the pcrematching docu-         their advantages and disadvantages is given in the  pcrematching  docu-
843         mentation.         mentation.
844    
845         In addition to the main compiling and  matching  functions,  there  are         In  addition  to  the  main compiling and matching functions, there are
846         convenience functions for extracting captured substrings from a subject         convenience functions for extracting captured substrings from a subject
847         string that is matched by pcre_exec(). They are:         string that is matched by pcre_exec(). They are:
848    
# Line 814  PCRE API OVERVIEW Line 857  PCRE API OVERVIEW
857         pcre_free_substring() and pcre_free_substring_list() are also provided,         pcre_free_substring() and pcre_free_substring_list() are also provided,
858         to free the memory used for extracted strings.         to free the memory used for extracted strings.
859    
860         The  function  pcre_maketables()  is  used  to build a set of character         The function pcre_maketables() is used to  build  a  set  of  character
861         tables  in  the  current  locale   for   passing   to   pcre_compile(),         tables   in   the   current   locale  for  passing  to  pcre_compile(),
862         pcre_exec(),  or  pcre_dfa_exec(). This is an optional facility that is         pcre_exec(), or pcre_dfa_exec(). This is an optional facility  that  is
863         provided for specialist use.  Most  commonly,  no  special  tables  are         provided  for  specialist  use.  Most  commonly,  no special tables are
864         passed,  in  which case internal tables that are generated when PCRE is         passed, in which case internal tables that are generated when  PCRE  is
865         built are used.         built are used.
866    
867         The function pcre_fullinfo() is used to find out  information  about  a         The  function  pcre_fullinfo()  is used to find out information about a
868         compiled  pattern; pcre_info() is an obsolete version that returns only         compiled pattern; pcre_info() is an obsolete version that returns  only
869         some of the available information, but is retained for  backwards  com-         some  of  the available information, but is retained for backwards com-
870         patibility.   The function pcre_version() returns a pointer to a string         patibility.  The function pcre_version() returns a pointer to a  string
871         containing the version of PCRE and its date of release.         containing the version of PCRE and its date of release.
872    
873         The function pcre_refcount() maintains a  reference  count  in  a  data         The  function  pcre_refcount()  maintains  a  reference count in a data
874         block  containing  a compiled pattern. This is provided for the benefit         block containing a compiled pattern. This is provided for  the  benefit
875         of object-oriented applications.         of object-oriented applications.
876    
877         The global variables pcre_malloc and pcre_free  initially  contain  the         The  global  variables  pcre_malloc and pcre_free initially contain the
878         entry  points  of  the  standard malloc() and free() functions, respec-         entry points of the standard malloc()  and  free()  functions,  respec-
879         tively. PCRE calls the memory management functions via these variables,         tively. PCRE calls the memory management functions via these variables,
880         so  a  calling  program  can replace them if it wishes to intercept the         so a calling program can replace them if it  wishes  to  intercept  the
881         calls. This should be done before calling any PCRE functions.         calls. This should be done before calling any PCRE functions.
882    
883         The global variables pcre_stack_malloc  and  pcre_stack_free  are  also         The  global  variables  pcre_stack_malloc  and pcre_stack_free are also
884         indirections  to  memory  management functions. These special functions         indirections to memory management functions.  These  special  functions
885         are used only when PCRE is compiled to use  the  heap  for  remembering         are  used  only  when  PCRE is compiled to use the heap for remembering
886         data, instead of recursive function calls, when running the pcre_exec()         data, instead of recursive function calls, when running the pcre_exec()
887         function. See the pcrebuild documentation for  details  of  how  to  do         function.  See  the  pcrebuild  documentation  for details of how to do
888         this.  It  is  a non-standard way of building PCRE, for use in environ-         this. It is a non-standard way of building PCRE, for  use  in  environ-
889         ments that have limited stacks. Because of the greater  use  of  memory         ments  that  have  limited stacks. Because of the greater use of memory
890         management,  it  runs  more  slowly. Separate functions are provided so         management, it runs more slowly. Separate  functions  are  provided  so
891         that special-purpose external code can be  used  for  this  case.  When         that  special-purpose  external  code  can  be used for this case. When
892         used,  these  functions  are always called in a stack-like manner (last         used, these functions are always called in a  stack-like  manner  (last
893         obtained, first freed), and always for memory blocks of the same  size.         obtained,  first freed), and always for memory blocks of the same size.
894         There  is  a discussion about PCRE's stack usage in the pcrestack docu-         There is a discussion about PCRE's stack usage in the  pcrestack  docu-
895         mentation.         mentation.
896    
897         The global variable pcre_callout initially contains NULL. It can be set         The global variable pcre_callout initially contains NULL. It can be set
898         by  the  caller  to  a "callout" function, which PCRE will then call at         by the caller to a "callout" function, which PCRE  will  then  call  at
899         specified points during a matching operation. Details are given in  the         specified  points during a matching operation. Details are given in the
900         pcrecallout documentation.         pcrecallout documentation.
901    
902    
903  NEWLINES  NEWLINES
904    
905         PCRE  supports five different conventions for indicating line breaks in         PCRE supports five different conventions for indicating line breaks  in
906         strings: a single CR (carriage return) character, a  single  LF  (line-         strings:  a  single  CR (carriage return) character, a single LF (line-
907         feed) character, the two-character sequence CRLF, any of the three pre-         feed) character, the two-character sequence CRLF, any of the three pre-
908         ceding, or any Unicode newline sequence. The Unicode newline  sequences         ceding,  or any Unicode newline sequence. The Unicode newline sequences
909         are  the  three just mentioned, plus the single characters VT (vertical         are the three just mentioned, plus the single characters  VT  (vertical
910         tab, U+000B), FF (formfeed, U+000C), NEL (next line, U+0085), LS  (line         tab,  U+000B), FF (formfeed, U+000C), NEL (next line, U+0085), LS (line
911         separator, U+2028), and PS (paragraph separator, U+2029).         separator, U+2028), and PS (paragraph separator, U+2029).
912    
913         Each  of  the first three conventions is used by at least one operating         Each of the first three conventions is used by at least  one  operating
914         system as its standard newline sequence. When PCRE is built, a  default         system  as its standard newline sequence. When PCRE is built, a default
915         can  be  specified.  The default default is LF, which is the Unix stan-         can be specified.  The default default is LF, which is the  Unix  stan-
916         dard. When PCRE is run, the default can be overridden,  either  when  a         dard.  When  PCRE  is run, the default can be overridden, either when a
917         pattern is compiled, or when it is matched.         pattern is compiled, or when it is matched.
918    
919         At compile time, the newline convention can be specified by the options         At compile time, the newline convention can be specified by the options
920         argument of pcre_compile(), or it can be specified by special  text  at         argument  of  pcre_compile(), or it can be specified by special text at
921         the start of the pattern itself; this overrides any other settings. See         the start of the pattern itself; this overrides any other settings. See
922         the pcrepattern page for details of the special character sequences.         the pcrepattern page for details of the special character sequences.
923    
924         In the PCRE documentation the word "newline" is used to mean "the char-         In the PCRE documentation the word "newline" is used to mean "the char-
925         acter  or pair of characters that indicate a line break". The choice of         acter or pair of characters that indicate a line break". The choice  of
926         newline convention affects the handling of  the  dot,  circumflex,  and         newline  convention  affects  the  handling of the dot, circumflex, and
927         dollar metacharacters, the handling of #-comments in /x mode, and, when         dollar metacharacters, the handling of #-comments in /x mode, and, when
928         CRLF is a recognized line ending sequence, the match position  advance-         CRLF  is a recognized line ending sequence, the match position advance-
929         ment for a non-anchored pattern. There is more detail about this in the         ment for a non-anchored pattern. There is more detail about this in the
930         section on pcre_exec() options below.         section on pcre_exec() options below.
931    
932         The choice of newline convention does not affect the interpretation  of         The  choice of newline convention does not affect the interpretation of
933         the  \n  or  \r  escape  sequences, nor does it affect what \R matches,         the \n or \r escape sequences, nor does  it  affect  what  \R  matches,
934         which is controlled in a similar way, but by separate options.         which is controlled in a similar way, but by separate options.
935    
936    
937  MULTITHREADING  MULTITHREADING
938    
939         The PCRE functions can be used in  multi-threading  applications,  with         The  PCRE  functions  can be used in multi-threading applications, with
940         the  proviso  that  the  memory  management  functions  pointed  to  by         the  proviso  that  the  memory  management  functions  pointed  to  by
941         pcre_malloc, pcre_free, pcre_stack_malloc, and pcre_stack_free, and the         pcre_malloc, pcre_free, pcre_stack_malloc, and pcre_stack_free, and the
942         callout function pointed to by pcre_callout, are shared by all threads.         callout function pointed to by pcre_callout, are shared by all threads.
943    
944         The  compiled form of a regular expression is not altered during match-         The compiled form of a regular expression is not altered during  match-
945         ing, so the same compiled pattern can safely be used by several threads         ing, so the same compiled pattern can safely be used by several threads
946         at once.         at once.
947    
948           If the just-in-time optimization feature is being used, it needs  sepa-
949           rate  memory stack areas for each thread. See the pcrejit documentation
950           for more details.
951    
952    
953  SAVING PRECOMPILED PATTERNS FOR LATER USE  SAVING PRECOMPILED PATTERNS FOR LATER USE
954    
955         The compiled form of a regular expression can be saved and re-used at a         The compiled form of a regular expression can be saved and re-used at a
956         later time, possibly by a different program, and even on a  host  other         later  time,  possibly by a different program, and even on a host other
957         than  the  one  on  which  it  was  compiled.  Details are given in the         than the one on which  it  was  compiled.  Details  are  given  in  the
958         pcreprecompile documentation. However, compiling a  regular  expression         pcreprecompile  documentation.  However, compiling a regular expression
959         with  one version of PCRE for use with a different version is not guar-         with one version of PCRE for use with a different version is not  guar-
960         anteed to work and may cause crashes.         anteed to work and may cause crashes.
961    
962    
# Line 917  CHECKING BUILD-TIME OPTIONS Line 964  CHECKING BUILD-TIME OPTIONS
964    
965         int pcre_config(int what, void *where);         int pcre_config(int what, void *where);
966    
967         The function pcre_config() makes it possible for a PCRE client to  dis-         The  function pcre_config() makes it possible for a PCRE client to dis-
968         cover which optional features have been compiled into the PCRE library.         cover which optional features have been compiled into the PCRE library.
969         The pcrebuild documentation has more details about these optional  fea-         The  pcrebuild documentation has more details about these optional fea-
970         tures.         tures.
971    
972         The  first  argument  for pcre_config() is an integer, specifying which         The first argument for pcre_config() is an  integer,  specifying  which
973         information is required; the second argument is a pointer to a variable         information is required; the second argument is a pointer to a variable
974         into  which  the  information  is  placed. The following information is         into which the information is  placed.  The  following  information  is
975         available:         available:
976    
977           PCRE_CONFIG_UTF8           PCRE_CONFIG_UTF8
978    
979         The output is an integer that is set to one if UTF-8 support is  avail-         The  output is an integer that is set to one if UTF-8 support is avail-
980         able; otherwise it is set to zero.         able; otherwise it is set to zero.
981    
982           PCRE_CONFIG_UNICODE_PROPERTIES           PCRE_CONFIG_UNICODE_PROPERTIES
983    
984         The  output  is  an  integer  that is set to one if support for Unicode         The output is an integer that is set to  one  if  support  for  Unicode
985         character properties is available; otherwise it is set to zero.         character properties is available; otherwise it is set to zero.
986    
987             PCRE_CONFIG_JIT
988    
989           The output is an integer that is set to one if support for just-in-time
990           compiling is available; otherwise it is set to zero.
991    
992           PCRE_CONFIG_NEWLINE           PCRE_CONFIG_NEWLINE
993    
994         The output is an integer whose value specifies  the  default  character         The output is an integer whose value specifies  the  default  character
# Line 1423  STUDYING A PATTERN Line 1475  STUDYING A PATTERN
1475         wants   to   pass   any   of   the   other  fields  to  pcre_exec()  or         wants   to   pass   any   of   the   other  fields  to  pcre_exec()  or
1476         pcre_dfa_exec(), it must set up its own pcre_extra block.         pcre_dfa_exec(), it must set up its own pcre_extra block.
1477    
1478         The second argument of pcre_study() contains option bits.  At  present,         The second argument of pcre_study() contains option bits. There is only
1479         no options are defined, and this argument should always be zero.         one  option:  PCRE_STUDY_JIT_COMPILE.  If this is set, and the just-in-
1480           time compiler is  available,  the  pattern  is  further  compiled  into
1481           machine  code  that  executes much faster than the pcre_exec() matching
1482           function. If the just-in-time compiler is not available, this option is
1483           ignored. All other bits in the options argument must be zero.
1484    
1485           JIT  compilation  is  a heavyweight optimization. It can take some time
1486           for patterns to be analyzed, and for one-off matches  and  simple  pat-
1487           terns  the benefit of faster execution might be offset by a much slower
1488           study time.  Not all patterns can be optimized by the JIT compiler. For
1489           those  that cannot be handled, matching automatically falls back to the
1490           pcre_exec() interpreter. For more details, see the  pcrejit  documenta-
1491           tion.
1492    
1493         The  third argument for pcre_study() is a pointer for an error message.         The  third argument for pcre_study() is a pointer for an error message.
1494         If studying succeeds (even if no data is  returned),  the  variable  it         If studying succeeds (even if no data is  returned),  the  variable  it
# Line 1433  STUDYING A PATTERN Line 1497  STUDYING A PATTERN
1497         must  not  try  to  free it. You should test the error pointer for NULL         must  not  try  to  free it. You should test the error pointer for NULL
1498         after calling pcre_study(), to be sure that it has run successfully.         after calling pcre_study(), to be sure that it has run successfully.
1499    
1500         This is a typical call to pcre_study():         When you are finished with a pattern, you can free the memory used  for
1501           the study data by calling pcre_free_study(). This function was added to
1502           the API for release 8.20. For earlier versions,  the  memory  could  be
1503           freed  with  pcre_free(), just like the pattern itself. This will still
1504           work in cases where PCRE_STUDY_JIT_COMPILE  is  not  used,  but  it  is
1505           advisable to change to the new function when convenient.
1506    
1507           This  is  a typical way in which pcre_study() is used (except that in a
1508           real application there should be tests for errors):
1509    
1510           pcre_extra *pe;           int rc;
1511           pe = pcre_study(           pcre *re;
1512             pcre_extra *sd;
1513             re = pcre_compile("pattern", 0, &error, &erroroffset, NULL);
1514             sd = pcre_study(
1515             re,             /* result of pcre_compile() */             re,             /* result of pcre_compile() */
1516             0,              /* no options exist */             0,              /* no options */
1517             &error);        /* set to NULL or points to a message */             &error);        /* set to NULL or points to a message */
1518             rc = pcre_exec(   /* see below for details of pcre_exec() options */
1519               re, sd, "subject", 7, 0, 0, ovector, 30);
1520             ...
1521             pcre_free_study(sd);
1522             pcre_free(re);
1523    
1524         Studying a pattern does two things: first, a lower bound for the length         Studying a pattern does two things: first, a lower bound for the length
1525         of subject string that is needed to match the pattern is computed. This         of subject string that is needed to match the pattern is computed. This
# Line 1454  STUDYING A PATTERN Line 1534  STUDYING A PATTERN
1534         bytes is created. This speeds up finding a position in the  subject  at         bytes is created. This speeds up finding a position in the  subject  at
1535         which to start matching.         which to start matching.
1536    
1537         The  two  optimizations  just  described can be disabled by setting the         These  two optimizations apply to both pcre_exec() and pcre_dfa_exec().
1538         PCRE_NO_START_OPTIMIZE   option    when    calling    pcre_exec()    or         However, they are not used by pcre_exec()  if  pcre_study()  is  called
1539         pcre_dfa_exec().  You  might  want  to do this if your pattern contains         with  the  PCRE_STUDY_JIT_COMPILE option, and just-in-time compiling is
1540         callouts or (*MARK), and you want to make use of  these  facilities  in         successful.  The  optimizations  can  be  disabled   by   setting   the
1541         cases  where  matching fails. See the discussion of PCRE_NO_START_OPTI-         PCRE_NO_START_OPTIMIZE    option    when    calling    pcre_exec()   or
1542         MIZE below.         pcre_dfa_exec(). You might want to do this  if  your  pattern  contains
1543           callouts  or (*MARK) (which cannot be handled by the JIT compiler), and
1544           you want to make use of these facilities in cases where matching fails.
1545           See the discussion of PCRE_NO_START_OPTIMIZE below.
1546    
1547    
1548  LOCALE SUPPORT  LOCALE SUPPORT
1549    
1550         PCRE handles caseless matching, and determines whether  characters  are         PCRE  handles  caseless matching, and determines whether characters are
1551         letters,  digits, or whatever, by reference to a set of tables, indexed         letters, digits, or whatever, by reference to a set of tables,  indexed
1552         by character value. When running in UTF-8 mode, this  applies  only  to         by  character  value.  When running in UTF-8 mode, this applies only to
1553         characters  with  codes  less than 128. By default, higher-valued codes         characters with codes less than 128. By  default,  higher-valued  codes
1554         never match escapes such as \w or \d, but they can be tested with \p if         never match escapes such as \w or \d, but they can be tested with \p if
1555         PCRE  is  built with Unicode character property support. Alternatively,         PCRE is built with Unicode character property  support.  Alternatively,
1556         the PCRE_UCP option can be set at compile  time;  this  causes  \w  and         the  PCRE_UCP  option  can  be  set at compile time; this causes \w and
1557         friends to use Unicode property support instead of built-in tables. The         friends to use Unicode property support instead of built-in tables. The
1558         use of locales with Unicode is discouraged. If you are handling charac-         use of locales with Unicode is discouraged. If you are handling charac-
1559         ters  with codes greater than 128, you should either use UTF-8 and Uni-         ters with codes greater than 128, you should either use UTF-8 and  Uni-
1560         code, or use locales, but not try to mix the two.         code, or use locales, but not try to mix the two.
1561    
1562         PCRE contains an internal set of tables that are used  when  the  final         PCRE  contains  an  internal set of tables that are used when the final
1563         argument  of  pcre_compile()  is  NULL.  These  are sufficient for many         argument of pcre_compile() is  NULL.  These  are  sufficient  for  many
1564         applications.  Normally, the internal tables recognize only ASCII char-         applications.  Normally, the internal tables recognize only ASCII char-
1565         acters. However, when PCRE is built, it is possible to cause the inter-         acters. However, when PCRE is built, it is possible to cause the inter-
1566         nal tables to be rebuilt in the default "C" locale of the local system,         nal tables to be rebuilt in the default "C" locale of the local system,
1567         which may cause them to be different.         which may cause them to be different.
1568    
1569         The  internal tables can always be overridden by tables supplied by the         The internal tables can always be overridden by tables supplied by  the
1570         application that calls PCRE. These may be created in a different locale         application that calls PCRE. These may be created in a different locale
1571         from  the  default.  As more and more applications change to using Uni-         from the default. As more and more applications change  to  using  Uni-
1572         code, the need for this locale support is expected to die away.         code, the need for this locale support is expected to die away.
1573    
1574         External tables are built by calling  the  pcre_maketables()  function,         External  tables  are  built by calling the pcre_maketables() function,
1575         which  has no arguments, in the relevant locale. The result can then be         which has no arguments, in the relevant locale. The result can then  be
1576         passed to pcre_compile() or pcre_exec()  as  often  as  necessary.  For         passed  to  pcre_compile()  or  pcre_exec()  as often as necessary. For
1577         example,  to  build  and use tables that are appropriate for the French         example, to build and use tables that are appropriate  for  the  French
1578         locale (where accented characters with  values  greater  than  128  are         locale  (where  accented  characters  with  values greater than 128 are
1579         treated as letters), the following code could be used:         treated as letters), the following code could be used:
1580    
1581           setlocale(LC_CTYPE, "fr_FR");           setlocale(LC_CTYPE, "fr_FR");
1582           tables = pcre_maketables();           tables = pcre_maketables();
1583           re = pcre_compile(..., tables);           re = pcre_compile(..., tables);
1584    
1585         The  locale  name "fr_FR" is used on Linux and other Unix-like systems;         The locale name "fr_FR" is used on Linux and other  Unix-like  systems;
1586         if you are using Windows, the name for the French locale is "french".         if you are using Windows, the name for the French locale is "french".
1587    
1588         When pcre_maketables() runs, the tables are built  in  memory  that  is         When  pcre_maketables()  runs,  the  tables are built in memory that is
1589         obtained  via  pcre_malloc. It is the caller's responsibility to ensure         obtained via pcre_malloc. It is the caller's responsibility  to  ensure
1590         that the memory containing the tables remains available for as long  as         that  the memory containing the tables remains available for as long as
1591         it is needed.         it is needed.
1592    
1593         The pointer that is passed to pcre_compile() is saved with the compiled         The pointer that is passed to pcre_compile() is saved with the compiled
1594         pattern, and the same tables are used via this pointer by  pcre_study()         pattern,  and the same tables are used via this pointer by pcre_study()
1595         and normally also by pcre_exec(). Thus, by default, for any single pat-         and normally also by pcre_exec(). Thus, by default, for any single pat-
1596         tern, compilation, studying and matching all happen in the same locale,         tern, compilation, studying and matching all happen in the same locale,
1597         but different patterns can be compiled in different locales.         but different patterns can be compiled in different locales.
1598    
1599         It  is  possible to pass a table pointer or NULL (indicating the use of         It is possible to pass a table pointer or NULL (indicating the  use  of
1600         the internal tables) to pcre_exec(). Although  not  intended  for  this         the  internal  tables)  to  pcre_exec(). Although not intended for this
1601         purpose,  this facility could be used to match a pattern in a different         purpose, this facility could be used to match a pattern in a  different
1602         locale from the one in which it was compiled. Passing table pointers at         locale from the one in which it was compiled. Passing table pointers at
1603         run time is discussed below in the section on matching a pattern.         run time is discussed below in the section on matching a pattern.
1604    
# Line 1525  INFORMATION ABOUT A PATTERN Line 1608  INFORMATION ABOUT A PATTERN
1608         int pcre_fullinfo(const pcre *code, const pcre_extra *extra,         int pcre_fullinfo(const pcre *code, const pcre_extra *extra,
1609              int what, void *where);              int what, void *where);
1610    
1611         The  pcre_fullinfo() function returns information about a compiled pat-         The pcre_fullinfo() function returns information about a compiled  pat-
1612         tern. It replaces the obsolete pcre_info() function, which is neverthe-         tern. It replaces the obsolete pcre_info() function, which is neverthe-
1613         less retained for backwards compability (and is documented below).         less retained for backwards compability (and is documented below).
1614    
1615         The  first  argument  for  pcre_fullinfo() is a pointer to the compiled         The first argument for pcre_fullinfo() is a  pointer  to  the  compiled
1616         pattern. The second argument is the result of pcre_study(), or NULL  if         pattern.  The second argument is the result of pcre_study(), or NULL if
1617         the  pattern  was not studied. The third argument specifies which piece         the pattern was not studied. The third argument specifies  which  piece
1618         of information is required, and the fourth argument is a pointer  to  a         of  information  is required, and the fourth argument is a pointer to a
1619         variable  to  receive  the  data. The yield of the function is zero for         variable to receive the data. The yield of the  function  is  zero  for
1620         success, or one of the following negative numbers:         success, or one of the following negative numbers:
1621    
1622           PCRE_ERROR_NULL       the argument code was NULL           PCRE_ERROR_NULL       the argument code was NULL
# Line 1541  INFORMATION ABOUT A PATTERN Line 1624  INFORMATION ABOUT A PATTERN
1624           PCRE_ERROR_BADMAGIC   the "magic number" was not found           PCRE_ERROR_BADMAGIC   the "magic number" was not found
1625           PCRE_ERROR_BADOPTION  the value of what was invalid           PCRE_ERROR_BADOPTION  the value of what was invalid
1626    
1627         The "magic number" is placed at the start of each compiled  pattern  as         The  "magic  number" is placed at the start of each compiled pattern as
1628         an  simple check against passing an arbitrary memory pointer. Here is a         an simple check against passing an arbitrary memory pointer. Here is  a
1629         typical call of pcre_fullinfo(), to obtain the length of  the  compiled         typical  call  of pcre_fullinfo(), to obtain the length of the compiled
1630         pattern:         pattern:
1631    
1632           int rc;           int rc;
1633           size_t length;           size_t length;
1634           rc = pcre_fullinfo(           rc = pcre_fullinfo(
1635             re,               /* result of pcre_compile() */             re,               /* result of pcre_compile() */
1636             pe,               /* result of pcre_study(), or NULL */             sd,               /* result of pcre_study(), or NULL */
1637             PCRE_INFO_SIZE,   /* what is required */             PCRE_INFO_SIZE,   /* what is required */
1638             &length);         /* where to put the data */             &length);         /* where to put the data */
1639    
1640         The  possible  values for the third argument are defined in pcre.h, and         The possible values for the third argument are defined in  pcre.h,  and
1641         are as follows:         are as follows:
1642    
1643           PCRE_INFO_BACKREFMAX           PCRE_INFO_BACKREFMAX
1644    
1645         Return the number of the highest back reference  in  the  pattern.  The         Return  the  number  of  the highest back reference in the pattern. The
1646         fourth  argument  should  point to an int variable. Zero is returned if         fourth argument should point to an int variable. Zero  is  returned  if
1647         there are no back references.         there are no back references.
1648    
1649           PCRE_INFO_CAPTURECOUNT           PCRE_INFO_CAPTURECOUNT
1650    
1651         Return the number of capturing subpatterns in the pattern.  The  fourth         Return  the  number of capturing subpatterns in the pattern. The fourth
1652         argument should point to an int variable.         argument should point to an int variable.
1653    
1654           PCRE_INFO_DEFAULT_TABLES           PCRE_INFO_DEFAULT_TABLES
1655    
1656         Return  a pointer to the internal default character tables within PCRE.         Return a pointer to the internal default character tables within  PCRE.
1657         The fourth argument should point to an unsigned char *  variable.  This         The  fourth  argument should point to an unsigned char * variable. This
1658         information call is provided for internal use by the pcre_study() func-         information call is provided for internal use by the pcre_study() func-
1659         tion. External callers can cause PCRE to use  its  internal  tables  by         tion.  External  callers  can  cause PCRE to use its internal tables by
1660         passing a NULL table pointer.         passing a NULL table pointer.
1661    
1662           PCRE_INFO_FIRSTBYTE           PCRE_INFO_FIRSTBYTE
1663    
1664         Return  information  about  the first byte of any matched string, for a         Return information about the first byte of any matched  string,  for  a
1665         non-anchored pattern. The fourth argument should point to an int  vari-         non-anchored  pattern. The fourth argument should point to an int vari-
1666         able.  (This option used to be called PCRE_INFO_FIRSTCHAR; the old name         able. (This option used to be called PCRE_INFO_FIRSTCHAR; the old  name
1667         is still recognized for backwards compatibility.)         is still recognized for backwards compatibility.)
1668    
1669         If there is a fixed first byte, for example, from  a  pattern  such  as         If  there  is  a  fixed first byte, for example, from a pattern such as
1670         (cat|cow|coyote), its value is returned. Otherwise, if either         (cat|cow|coyote), its value is returned. Otherwise, if either
1671    
1672         (a)  the pattern was compiled with the PCRE_MULTILINE option, and every         (a) the pattern was compiled with the PCRE_MULTILINE option, and  every
1673         branch starts with "^", or         branch starts with "^", or
1674    
1675         (b) every branch of the pattern starts with ".*" and PCRE_DOTALL is not         (b) every branch of the pattern starts with ".*" and PCRE_DOTALL is not
1676         set (if it were set, the pattern would be anchored),         set (if it were set, the pattern would be anchored),
1677    
1678         -1  is  returned, indicating that the pattern matches only at the start         -1 is returned, indicating that the pattern matches only at  the  start
1679         of a subject string or after any newline within the  string.  Otherwise         of  a  subject string or after any newline within the string. Otherwise
1680         -2 is returned. For anchored patterns, -2 is returned.         -2 is returned. For anchored patterns, -2 is returned.
1681    
1682           PCRE_INFO_FIRSTTABLE           PCRE_INFO_FIRSTTABLE
1683    
1684         If  the pattern was studied, and this resulted in the construction of a         If the pattern was studied, and this resulted in the construction of  a
1685         256-bit table indicating a fixed set of bytes for the first byte in any         256-bit table indicating a fixed set of bytes for the first byte in any
1686         matching  string, a pointer to the table is returned. Otherwise NULL is         matching string, a pointer to the table is returned. Otherwise NULL  is
1687         returned. The fourth argument should point to an unsigned char *  vari-         returned.  The fourth argument should point to an unsigned char * vari-
1688         able.         able.
1689    
1690           PCRE_INFO_HASCRORLF           PCRE_INFO_HASCRORLF
1691    
1692         Return  1  if  the  pattern  contains any explicit matches for CR or LF         Return 1 if the pattern contains any explicit  matches  for  CR  or  LF
1693         characters, otherwise 0. The fourth argument should  point  to  an  int         characters,  otherwise  0.  The  fourth argument should point to an int
1694         variable.  An explicit match is either a literal CR or LF character, or         variable. An explicit match is either a literal CR or LF character,  or
1695         \r or \n.         \r or \n.
1696    
1697           PCRE_INFO_JCHANGED           PCRE_INFO_JCHANGED
1698    
1699         Return 1 if the (?J) or (?-J) option setting is used  in  the  pattern,         Return  1  if  the (?J) or (?-J) option setting is used in the pattern,
1700         otherwise  0. The fourth argument should point to an int variable. (?J)         otherwise 0. The fourth argument should point to an int variable.  (?J)
1701         and (?-J) set and unset the local PCRE_DUPNAMES option, respectively.         and (?-J) set and unset the local PCRE_DUPNAMES option, respectively.
1702    
1703             PCRE_INFO_JIT
1704    
1705           Return  1  if  the  pattern was studied with the PCRE_STUDY_JIT_COMPILE
1706           option, and just-in-time compiling was successful. The fourth  argument
1707           should  point  to  an  int variable. A return value of 0 means that JIT
1708           support is not available in this version of PCRE, or that  the  pattern
1709           was not studied with the PCRE_STUDY_JIT_COMPILE option, or that the JIT
1710           compiler could not handle this particular pattern. See the pcrejit doc-
1711           umentation for details of what can and cannot be handled.
1712    
1713           PCRE_INFO_LASTLITERAL           PCRE_INFO_LASTLITERAL
1714    
1715         Return the value of the rightmost literal byte that must exist  in  any         Return  the  value of the rightmost literal byte that must exist in any
1716         matched  string,  other  than  at  its  start,  if such a byte has been         matched string, other than at its  start,  if  such  a  byte  has  been
1717         recorded. The fourth argument should point to an int variable. If there         recorded. The fourth argument should point to an int variable. If there
1718         is  no such byte, -1 is returned. For anchored patterns, a last literal         is no such byte, -1 is returned. For anchored patterns, a last  literal
1719         byte is recorded only if it follows something of variable  length.  For         byte  is  recorded only if it follows something of variable length. For
1720         example, for the pattern /^a\d+z\d+/ the returned value is "z", but for         example, for the pattern /^a\d+z\d+/ the returned value is "z", but for
1721         /^a\dz\d/ the returned value is -1.         /^a\dz\d/ the returned value is -1.
1722    
1723           PCRE_INFO_MINLENGTH           PCRE_INFO_MINLENGTH
1724    
1725         If the pattern was studied and a minimum length  for  matching  subject         If  the  pattern  was studied and a minimum length for matching subject
1726         strings  was  computed,  its  value is returned. Otherwise the returned         strings was computed, its value is  returned.  Otherwise  the  returned
1727         value is -1. The value is a number of characters, not bytes  (this  may         value  is  -1. The value is a number of characters, not bytes (this may
1728         be  relevant in UTF-8 mode). The fourth argument should point to an int         be relevant in UTF-8 mode). The fourth argument should point to an  int
1729         variable. A non-negative value is a lower bound to the  length  of  any         variable.  A  non-negative  value is a lower bound to the length of any
1730         matching  string.  There  may not be any strings of that length that do         matching string. There may not be any strings of that  length  that  do
1731         actually match, but every string that does match is at least that long.         actually match, but every string that does match is at least that long.
1732    
1733           PCRE_INFO_NAMECOUNT           PCRE_INFO_NAMECOUNT
1734           PCRE_INFO_NAMEENTRYSIZE           PCRE_INFO_NAMEENTRYSIZE
1735           PCRE_INFO_NAMETABLE           PCRE_INFO_NAMETABLE
1736    
1737         PCRE supports the use of named as well as numbered capturing  parenthe-         PCRE  supports the use of named as well as numbered capturing parenthe-
1738         ses.  The names are just an additional way of identifying the parenthe-         ses. The names are just an additional way of identifying the  parenthe-
1739         ses, which still acquire numbers. Several convenience functions such as         ses, which still acquire numbers. Several convenience functions such as
1740         pcre_get_named_substring()  are  provided  for extracting captured sub-         pcre_get_named_substring() are provided for  extracting  captured  sub-
1741         strings by name. It is also possible to extract the data  directly,  by         strings  by  name. It is also possible to extract the data directly, by
1742         first  converting  the  name to a number in order to access the correct         first converting the name to a number in order to  access  the  correct
1743         pointers in the output vector (described with pcre_exec() below). To do         pointers in the output vector (described with pcre_exec() below). To do
1744         the  conversion,  you  need  to  use  the  name-to-number map, which is         the conversion, you need  to  use  the  name-to-number  map,  which  is
1745         described by these three values.         described by these three values.
1746    
1747         The map consists of a number of fixed-size entries. PCRE_INFO_NAMECOUNT         The map consists of a number of fixed-size entries. PCRE_INFO_NAMECOUNT
1748         gives the number of entries, and PCRE_INFO_NAMEENTRYSIZE gives the size         gives the number of entries, and PCRE_INFO_NAMEENTRYSIZE gives the size
1749         of each entry; both of these  return  an  int  value.  The  entry  size         of  each  entry;  both  of  these  return  an int value. The entry size
1750         depends  on the length of the longest name. PCRE_INFO_NAMETABLE returns         depends on the length of the longest name. PCRE_INFO_NAMETABLE  returns
1751         a pointer to the first entry of the table  (a  pointer  to  char).  The         a  pointer  to  the  first  entry of the table (a pointer to char). The
1752         first two bytes of each entry are the number of the capturing parenthe-         first two bytes of each entry are the number of the capturing parenthe-
1753         sis, most significant byte first. The rest of the entry is  the  corre-         sis,  most  significant byte first. The rest of the entry is the corre-
1754         sponding name, zero terminated.         sponding name, zero terminated.
1755    
1756         The  names are in alphabetical order. Duplicate names may appear if (?|         The names are in alphabetical order. Duplicate names may appear if  (?|
1757         is used to create multiple groups with the same number, as described in         is used to create multiple groups with the same number, as described in
1758         the  section  on  duplicate subpattern numbers in the pcrepattern page.         the section on duplicate subpattern numbers in  the  pcrepattern  page.
1759         Duplicate names for subpatterns with different  numbers  are  permitted         Duplicate  names  for  subpatterns with different numbers are permitted
1760         only  if  PCRE_DUPNAMES  is  set. In all cases of duplicate names, they         only if PCRE_DUPNAMES is set. In all cases  of  duplicate  names,  they
1761         appear in the table in the order in which they were found in  the  pat-         appear  in  the table in the order in which they were found in the pat-
1762         tern.  In  the  absence  of (?| this is the order of increasing number;         tern. In the absence of (?| this is the  order  of  increasing  number;
1763         when (?| is used this is not necessarily the case because later subpat-         when (?| is used this is not necessarily the case because later subpat-
1764         terns may have lower numbers.         terns may have lower numbers.
1765    
1766         As  a  simple  example of the name/number table, consider the following         As a simple example of the name/number table,  consider  the  following
1767         pattern (assume PCRE_EXTENDED is set, so white space -  including  new-         pattern  (assume  PCRE_EXTENDED is set, so white space - including new-
1768         lines - is ignored):         lines - is ignored):
1769    
1770           (?<date> (?<year>(\d\d)?\d\d) -           (?<date> (?<year>(\d\d)?\d\d) -
1771           (?<month>\d\d) - (?<day>\d\d) )           (?<month>\d\d) - (?<day>\d\d) )
1772    
1773         There  are  four  named subpatterns, so the table has four entries, and         There are four named subpatterns, so the table has  four  entries,  and
1774         each entry in the table is eight bytes long. The table is  as  follows,         each  entry  in the table is eight bytes long. The table is as follows,
1775         with non-printing bytes shows in hexadecimal, and undefined bytes shown         with non-printing bytes shows in hexadecimal, and undefined bytes shown
1776         as ??:         as ??:
1777    
# Line 1687  INFORMATION ABOUT A PATTERN Line 1780  INFORMATION ABOUT A PATTERN
1780           00 04 m  o  n  t  h  00           00 04 m  o  n  t  h  00
1781           00 02 y  e  a  r  00 ??           00 02 y  e  a  r  00 ??
1782    
1783         When writing code to extract data  from  named  subpatterns  using  the         When  writing  code  to  extract  data from named subpatterns using the
1784         name-to-number  map,  remember that the length of the entries is likely         name-to-number map, remember that the length of the entries  is  likely
1785         to be different for each compiled pattern.         to be different for each compiled pattern.
1786    
1787           PCRE_INFO_OKPARTIAL           PCRE_INFO_OKPARTIAL
1788    
1789         Return 1  if  the  pattern  can  be  used  for  partial  matching  with         Return  1  if  the  pattern  can  be  used  for  partial  matching with
1790         pcre_exec(),  otherwise  0.  The fourth argument should point to an int         pcre_exec(), otherwise 0. The fourth argument should point  to  an  int
1791         variable. From  release  8.00,  this  always  returns  1,  because  the         variable.  From  release  8.00,  this  always  returns  1,  because the
1792         restrictions  that  previously  applied  to  partial matching have been         restrictions that previously applied  to  partial  matching  have  been
1793         lifted. The pcrepartial documentation gives details of  partial  match-         lifted.  The  pcrepartial documentation gives details of partial match-
1794         ing.         ing.
1795    
1796           PCRE_INFO_OPTIONS           PCRE_INFO_OPTIONS
1797    
1798         Return  a  copy of the options with which the pattern was compiled. The         Return a copy of the options with which the pattern was  compiled.  The
1799         fourth argument should point to an unsigned long  int  variable.  These         fourth  argument  should  point to an unsigned long int variable. These
1800         option bits are those specified in the call to pcre_compile(), modified         option bits are those specified in the call to pcre_compile(), modified
1801         by any top-level option settings at the start of the pattern itself. In         by any top-level option settings at the start of the pattern itself. In
1802         other  words,  they are the options that will be in force when matching         other words, they are the options that will be in force  when  matching
1803         starts. For example, if the pattern /(?im)abc(?-i)d/ is  compiled  with         starts.  For  example, if the pattern /(?im)abc(?-i)d/ is compiled with
1804         the  PCRE_EXTENDED option, the result is PCRE_CASELESS, PCRE_MULTILINE,         the PCRE_EXTENDED option, the result is PCRE_CASELESS,  PCRE_MULTILINE,
1805         and PCRE_EXTENDED.         and PCRE_EXTENDED.
1806    
1807         A pattern is automatically anchored by PCRE if  all  of  its  top-level         A  pattern  is  automatically  anchored by PCRE if all of its top-level
1808         alternatives begin with one of the following:         alternatives begin with one of the following:
1809    
1810           ^     unless PCRE_MULTILINE is set           ^     unless PCRE_MULTILINE is set
# Line 1725  INFORMATION ABOUT A PATTERN Line 1818  INFORMATION ABOUT A PATTERN
1818    
1819           PCRE_INFO_SIZE           PCRE_INFO_SIZE
1820    
1821         Return the size of the compiled pattern, that is, the  value  that  was         Return  the  size  of the compiled pattern, that is, the value that was
1822         passed as the argument to pcre_malloc() when PCRE was getting memory in         passed as the argument to pcre_malloc() when PCRE was getting memory in
1823         which to place the compiled data. The fourth argument should point to a         which to place the compiled data. The fourth argument should point to a
1824         size_t variable.         size_t variable.
# Line 1733  INFORMATION ABOUT A PATTERN Line 1826  INFORMATION ABOUT A PATTERN
1826           PCRE_INFO_STUDYSIZE           PCRE_INFO_STUDYSIZE
1827    
1828         Return the size of the data block pointed to by the study_data field in         Return the size of the data block pointed to by the study_data field in
1829         a pcre_extra block. If pcre_extra is NULL, or there is no  study  data,         a  pcre_extra  block. If pcre_extra is NULL, or there is no study data,
1830         zero  is  returned.  The fourth argument should point to a size_t vari-         zero is returned. The fourth argument should point to  a  size_t  vari-
1831         able.  The study_data field is set by pcre_study() to  record  informa-         able.   The  study_data field is set by pcre_study() to record informa-
1832         tion  that will speed up matching (see the section entitled "Studying a         tion that will speed up matching (see the section entitled "Studying  a
1833         pattern" above). The format of the study_data block is private, but its         pattern" above). The format of the study_data block is private, but its
1834         length  is  made  available via this option so that it can be saved and         length is made available via this option so that it can  be  saved  and
1835         restored (see the pcreprecompile documentation for details).         restored (see the pcreprecompile documentation for details).
1836    
1837    
# Line 1746  OBSOLETE INFO FUNCTION Line 1839  OBSOLETE INFO FUNCTION
1839    
1840         int pcre_info(const pcre *code, int *optptr, int *firstcharptr);         int pcre_info(const pcre *code, int *optptr, int *firstcharptr);
1841    
1842         The pcre_info() function is now obsolete because its interface  is  too         The  pcre_info()  function is now obsolete because its interface is too
1843         restrictive  to return all the available data about a compiled pattern.         restrictive to return all the available data about a compiled  pattern.
1844         New  programs  should  use  pcre_fullinfo()  instead.  The   yield   of         New   programs   should  use  pcre_fullinfo()  instead.  The  yield  of
1845         pcre_info()  is the number of capturing subpatterns, or one of the fol-         pcre_info() is the number of capturing subpatterns, or one of the  fol-
1846         lowing negative numbers:         lowing negative numbers:
1847    
1848           PCRE_ERROR_NULL       the argument code was NULL           PCRE_ERROR_NULL       the argument code was NULL
1849           PCRE_ERROR_BADMAGIC   the "magic number" was not found           PCRE_ERROR_BADMAGIC   the "magic number" was not found
1850    
1851         If the optptr argument is not NULL, a copy of the  options  with  which         If  the  optptr  argument is not NULL, a copy of the options with which
1852         the  pattern  was  compiled  is placed in the integer it points to (see         the pattern was compiled is placed in the integer  it  points  to  (see
1853         PCRE_INFO_OPTIONS above).         PCRE_INFO_OPTIONS above).
1854    
1855         If the pattern is not anchored and the  firstcharptr  argument  is  not         If  the  pattern  is  not anchored and the firstcharptr argument is not
1856         NULL,  it is used to pass back information about the first character of         NULL, it is used to pass back information about the first character  of
1857         any matched string (see PCRE_INFO_FIRSTBYTE above).         any matched string (see PCRE_INFO_FIRSTBYTE above).
1858    
1859    
# Line 1768  REFERENCE COUNTS Line 1861  REFERENCE COUNTS
1861    
1862         int pcre_refcount(pcre *code, int adjust);         int pcre_refcount(pcre *code, int adjust);
1863    
1864         The pcre_refcount() function is used to maintain a reference  count  in         The  pcre_refcount()  function is used to maintain a reference count in
1865         the data block that contains a compiled pattern. It is provided for the         the data block that contains a compiled pattern. It is provided for the
1866         benefit of applications that  operate  in  an  object-oriented  manner,         benefit  of  applications  that  operate  in an object-oriented manner,
1867         where different parts of the application may be using the same compiled         where different parts of the application may be using the same compiled
1868         pattern, but you want to free the block when they are all done.         pattern, but you want to free the block when they are all done.
1869    
1870         When a pattern is compiled, the reference count field is initialized to         When a pattern is compiled, the reference count field is initialized to
1871         zero.   It is changed only by calling this function, whose action is to         zero.  It is changed only by calling this function, whose action is  to
1872         add the adjust value (which may be positive or  negative)  to  it.  The         add  the  adjust  value  (which may be positive or negative) to it. The
1873         yield of the function is the new value. However, the value of the count         yield of the function is the new value. However, the value of the count
1874         is constrained to lie between 0 and 65535, inclusive. If the new  value         is  constrained to lie between 0 and 65535, inclusive. If the new value
1875         is outside these limits, it is forced to the appropriate limit value.         is outside these limits, it is forced to the appropriate limit value.
1876    
1877         Except  when it is zero, the reference count is not correctly preserved         Except when it is zero, the reference count is not correctly  preserved
1878         if a pattern is compiled on one host and then  transferred  to  a  host         if  a  pattern  is  compiled on one host and then transferred to a host
1879         whose byte-order is different. (This seems a highly unlikely scenario.)         whose byte-order is different. (This seems a highly unlikely scenario.)
1880    
1881    
# Line 1792  MATCHING A PATTERN: THE TRADITIONAL FUNC Line 1885  MATCHING A PATTERN: THE TRADITIONAL FUNC
1885              const char *subject, int length, int startoffset,              const char *subject, int length, int startoffset,
1886              int options, int *ovector, int ovecsize);              int options, int *ovector, int ovecsize);
1887    
1888         The  function pcre_exec() is called to match a subject string against a         The function pcre_exec() is called to match a subject string against  a
1889         compiled pattern, which is passed in the code argument. If the  pattern         compiled  pattern, which is passed in the code argument. If the pattern
1890         was  studied,  the  result  of  the study should be passed in the extra         was studied, the result of the study should  be  passed  in  the  extra
1891         argument. This function is the main matching facility of  the  library,         argument.  This  function is the main matching facility of the library,
1892         and it operates in a Perl-like manner. For specialist use there is also         and it operates in a Perl-like manner. For specialist use there is also
1893         an alternative matching function, which is described below in the  sec-         an  alternative matching function, which is described below in the sec-
1894         tion about the pcre_dfa_exec() function.         tion about the pcre_dfa_exec() function.
1895    
1896         In  most applications, the pattern will have been compiled (and option-         In most applications, the pattern will have been compiled (and  option-
1897         ally studied) in the same process that calls pcre_exec().  However,  it         ally  studied)  in the same process that calls pcre_exec(). However, it
1898         is possible to save compiled patterns and study data, and then use them         is possible to save compiled patterns and study data, and then use them
1899         later in different processes, possibly even on different hosts.  For  a         later  in  different processes, possibly even on different hosts. For a
1900         discussion about this, see the pcreprecompile documentation.         discussion about this, see the pcreprecompile documentation.
1901    
1902         Here is an example of a simple call to pcre_exec():         Here is an example of a simple call to pcre_exec():
# Line 1822  MATCHING A PATTERN: THE TRADITIONAL FUNC Line 1915  MATCHING A PATTERN: THE TRADITIONAL FUNC
1915    
1916     Extra data for pcre_exec()     Extra data for pcre_exec()
1917    
1918         If  the  extra argument is not NULL, it must point to a pcre_extra data         If the extra argument is not NULL, it must point to a  pcre_extra  data
1919         block. The pcre_study() function returns such a block (when it  doesn't         block.  The pcre_study() function returns such a block (when it doesn't
1920         return  NULL), but you can also create one for yourself, and pass addi-         return NULL), but you can also create one for yourself, and pass  addi-
1921         tional information in it. The pcre_extra block contains  the  following         tional  information  in it. The pcre_extra block contains the following
1922         fields (not necessarily in this order):         fields (not necessarily in this order):
1923    
1924           unsigned long int flags;           unsigned long int flags;
1925           void *study_data;           void *study_data;
1926             void *executable_jit;
1927           unsigned long int match_limit;           unsigned long int match_limit;
1928           unsigned long int match_limit_recursion;           unsigned long int match_limit_recursion;
1929           void *callout_data;           void *callout_data;
1930           const unsigned char *tables;           const unsigned char *tables;
1931           unsigned char **mark;           unsigned char **mark;
1932    
1933         The  flags  field  is a bitmap that specifies which of the other fields         The flags field is a bitmap that specifies which of  the  other  fields
1934         are set. The flag bits are:         are set. The flag bits are:
1935    
1936           PCRE_EXTRA_STUDY_DATA           PCRE_EXTRA_STUDY_DATA
1937             PCRE_EXTRA_EXECUTABLE_JIT
1938           PCRE_EXTRA_MATCH_LIMIT           PCRE_EXTRA_MATCH_LIMIT
1939           PCRE_EXTRA_MATCH_LIMIT_RECURSION           PCRE_EXTRA_MATCH_LIMIT_RECURSION
1940           PCRE_EXTRA_CALLOUT_DATA           PCRE_EXTRA_CALLOUT_DATA
1941           PCRE_EXTRA_TABLES           PCRE_EXTRA_TABLES
1942           PCRE_EXTRA_MARK           PCRE_EXTRA_MARK
1943    
1944         Other flag bits should be set to zero. The study_data field is  set  in         Other  flag  bits should be set to zero. The study_data field and some-
1945         the  pcre_extra  block  that is returned by pcre_study(), together with         times the executable_jit field are set in the pcre_extra block that  is
1946         the appropriate flag bit. You should not set this yourself, but you may         returned  by pcre_study(), together with the appropriate flag bits. You
1947         add  to  the  block by setting the other fields and their corresponding         should not set these yourself, but you may add to the block by  setting
1948         flag bits.         the other fields and their corresponding flag bits.
1949    
1950         The match_limit field provides a means of preventing PCRE from using up         The match_limit field provides a means of preventing PCRE from using up
1951         a  vast amount of resources when running patterns that are not going to         a vast amount of resources when running patterns that are not going  to
1952         match, but which have a very large number  of  possibilities  in  their         match,  but  which  have  a very large number of possibilities in their
1953         search  trees. The classic example is a pattern that uses nested unlim-         search trees. The classic example is a pattern that uses nested  unlim-
1954         ited repeats.         ited repeats.
1955    
1956         Internally, PCRE uses a function called match() which it calls  repeat-         Internally,  pcre_exec() uses a function called match(), which it calls
1957         edly  (sometimes  recursively). The limit set by match_limit is imposed         repeatedly (sometimes recursively). The limit  set  by  match_limit  is
1958         on the number of times this function is called during  a  match,  which         imposed  on the number of times this function is called during a match,
1959         has  the  effect  of  limiting the amount of backtracking that can take         which has the effect of limiting the amount of  backtracking  that  can
1960         place. For patterns that are not anchored, the count restarts from zero         take place. For patterns that are not anchored, the count restarts from
1961         for each position in the subject string.         zero for each position in the subject string.
1962    
1963           When pcre_exec() is called with a pattern that was successfully studied
1964           with  the  PCRE_STUDY_JIT_COMPILE  option, the way that the matching is
1965           executed is entirely different. However, there is still the possibility
1966           of  runaway  matching  that  goes  on  for a very long time, and so the
1967           match_limit value is also used in this case (but in a different way) to
1968           limit how long the matching can continue.
1969    
1970         The  default  value  for  the  limit can be set when PCRE is built; the         The  default  value  for  the  limit can be set when PCRE is built; the
1971         default default is 10 million, which handles all but the  most  extreme         default default is 10 million, which handles all but the  most  extreme
# Line 1878  MATCHING A PATTERN: THE TRADITIONAL FUNC Line 1980  MATCHING A PATTERN: THE TRADITIONAL FUNC
1980         the  total number of calls, because not all calls to match() are recur-         the  total number of calls, because not all calls to match() are recur-
1981         sive.  This limit is of use only if it is set smaller than match_limit.         sive.  This limit is of use only if it is set smaller than match_limit.
1982    
1983         Limiting the recursion depth limits the amount of  stack  that  can  be         Limiting the recursion depth limits the amount of  machine  stack  that
1984         used, or, when PCRE has been compiled to use memory on the heap instead         can  be used, or, when PCRE has been compiled to use memory on the heap
1985         of the stack, the amount of heap memory that can be used.         instead of the stack, the amount of heap memory that can be used.  This
1986           limit  is not relevant, and is ignored, if the pattern was successfully
1987           studied with PCRE_STUDY_JIT_COMPILE.
1988    
1989         The default value for match_limit_recursion can be  set  when  PCRE  is         The default value for match_limit_recursion can be  set  when  PCRE  is
1990         built;  the  default  default  is  the  same  value  as the default for         built;  the  default  default  is  the  same  value  as the default for
# Line 1923  MATCHING A PATTERN: THE TRADITIONAL FUNC Line 2027  MATCHING A PATTERN: THE TRADITIONAL FUNC
2027         PCRE_NO_START_OPTIMIZE,  PCRE_NO_UTF8_CHECK,   PCRE_PARTIAL_SOFT,   and         PCRE_NO_START_OPTIMIZE,  PCRE_NO_UTF8_CHECK,   PCRE_PARTIAL_SOFT,   and
2028         PCRE_PARTIAL_HARD.         PCRE_PARTIAL_HARD.
2029    
2030           If the pattern was successfully studied with the PCRE_STUDY_JIT_COMPILE
2031           option,  the   only   supported   options   for   JIT   execution   are
2032           PCRE_NO_UTF8_CHECK,   PCRE_NOTBOL,   PCRE_NOTEOL,   PCRE_NOTEMPTY,  and
2033           PCRE_NOTEMPTY_ATSTART. Note in particular that partial matching is  not
2034           supported.  If an unsupported option is used, JIT execution is disabled
2035           and the normal interpretive code in pcre_exec() is run.
2036    
2037           PCRE_ANCHORED           PCRE_ANCHORED
2038    
2039         The  PCRE_ANCHORED  option  limits pcre_exec() to matching at the first         The PCRE_ANCHORED option limits pcre_exec() to matching  at  the  first
2040         matching position. If a pattern was  compiled  with  PCRE_ANCHORED,  or         matching  position.  If  a  pattern was compiled with PCRE_ANCHORED, or
2041         turned  out to be anchored by virtue of its contents, it cannot be made         turned out to be anchored by virtue of its contents, it cannot be  made
2042         unachored at matching time.         unachored at matching time.
2043    
2044           PCRE_BSR_ANYCRLF           PCRE_BSR_ANYCRLF
2045           PCRE_BSR_UNICODE           PCRE_BSR_UNICODE
2046    
2047         These options (which are mutually exclusive) control what the \R escape         These options (which are mutually exclusive) control what the \R escape
2048         sequence  matches.  The choice is either to match only CR, LF, or CRLF,         sequence matches. The choice is either to match only CR, LF,  or  CRLF,
2049         or to match any Unicode newline sequence. These  options  override  the         or  to  match  any Unicode newline sequence. These options override the
2050         choice that was made or defaulted when the pattern was compiled.         choice that was made or defaulted when the pattern was compiled.
2051    
2052           PCRE_NEWLINE_CR           PCRE_NEWLINE_CR
# Line 1944  MATCHING A PATTERN: THE TRADITIONAL FUNC Line 2055  MATCHING A PATTERN: THE TRADITIONAL FUNC
2055           PCRE_NEWLINE_ANYCRLF           PCRE_NEWLINE_ANYCRLF
2056           PCRE_NEWLINE_ANY           PCRE_NEWLINE_ANY
2057    
2058         These  options  override  the  newline  definition  that  was chosen or         These options override  the  newline  definition  that  was  chosen  or
2059         defaulted when the pattern was compiled. For details, see the  descrip-         defaulted  when the pattern was compiled. For details, see the descrip-
2060         tion  of  pcre_compile()  above.  During  matching,  the newline choice         tion of pcre_compile()  above.  During  matching,  the  newline  choice
2061         affects the behaviour of the dot, circumflex,  and  dollar  metacharac-         affects  the  behaviour  of the dot, circumflex, and dollar metacharac-
2062         ters.  It may also alter the way the match position is advanced after a         ters. It may also alter the way the match position is advanced after  a
2063         match failure for an unanchored pattern.         match failure for an unanchored pattern.
2064    
2065         When PCRE_NEWLINE_CRLF, PCRE_NEWLINE_ANYCRLF,  or  PCRE_NEWLINE_ANY  is         When  PCRE_NEWLINE_CRLF,  PCRE_NEWLINE_ANYCRLF,  or PCRE_NEWLINE_ANY is
2066         set,  and a match attempt for an unanchored pattern fails when the cur-         set, and a match attempt for an unanchored pattern fails when the  cur-
2067         rent position is at a  CRLF  sequence,  and  the  pattern  contains  no         rent  position  is  at  a  CRLF  sequence,  and the pattern contains no
2068         explicit  matches  for  CR  or  LF  characters,  the  match position is         explicit matches for  CR  or  LF  characters,  the  match  position  is
2069         advanced by two characters instead of one, in other words, to after the         advanced by two characters instead of one, in other words, to after the
2070         CRLF.         CRLF.
2071    
2072         The above rule is a compromise that makes the most common cases work as         The above rule is a compromise that makes the most common cases work as
2073         expected. For example, if the  pattern  is  .+A  (and  the  PCRE_DOTALL         expected.  For  example,  if  the  pattern  is .+A (and the PCRE_DOTALL
2074         option is not set), it does not match the string "\r\nA" because, after         option is not set), it does not match the string "\r\nA" because, after
2075         failing at the start, it skips both the CR and the LF before  retrying.         failing  at the start, it skips both the CR and the LF before retrying.
2076         However,  the  pattern  [\r\n]A does match that string, because it con-         However, the pattern [\r\n]A does match that string,  because  it  con-
2077         tains an explicit CR or LF reference, and so advances only by one char-         tains an explicit CR or LF reference, and so advances only by one char-
2078         acter after the first failure.         acter after the first failure.
2079    
2080         An explicit match for CR of LF is either a literal appearance of one of         An explicit match for CR of LF is either a literal appearance of one of
2081         those characters, or one of the \r or  \n  escape  sequences.  Implicit         those  characters,  or  one  of the \r or \n escape sequences. Implicit
2082         matches  such  as [^X] do not count, nor does \s (which includes CR and         matches such as [^X] do not count, nor does \s (which includes  CR  and
2083         LF in the characters that it matches).         LF in the characters that it matches).
2084    
2085         Notwithstanding the above, anomalous effects may still occur when  CRLF         Notwithstanding  the above, anomalous effects may still occur when CRLF
2086         is a valid newline sequence and explicit \r or \n escapes appear in the         is a valid newline sequence and explicit \r or \n escapes appear in the
2087         pattern.         pattern.
2088    
2089           PCRE_NOTBOL           PCRE_NOTBOL
2090    
2091         This option specifies that first character of the subject string is not         This option specifies that first character of the subject string is not
2092         the  beginning  of  a  line, so the circumflex metacharacter should not         the beginning of a line, so the  circumflex  metacharacter  should  not
2093         match before it. Setting this without PCRE_MULTILINE (at compile  time)         match  before it. Setting this without PCRE_MULTILINE (at compile time)
2094         causes  circumflex  never to match. This option affects only the behav-         causes circumflex never to match. This option affects only  the  behav-
2095         iour of the circumflex metacharacter. It does not affect \A.         iour of the circumflex metacharacter. It does not affect \A.
2096    
2097           PCRE_NOTEOL           PCRE_NOTEOL
2098    
2099         This option specifies that the end of the subject string is not the end         This option specifies that the end of the subject string is not the end
2100         of  a line, so the dollar metacharacter should not match it nor (except         of a line, so the dollar metacharacter should not match it nor  (except
2101         in multiline mode) a newline immediately before it. Setting this  with-         in  multiline mode) a newline immediately before it. Setting this with-
2102         out PCRE_MULTILINE (at compile time) causes dollar never to match. This         out PCRE_MULTILINE (at compile time) causes dollar never to match. This
2103         option affects only the behaviour of the dollar metacharacter. It  does         option  affects only the behaviour of the dollar metacharacter. It does
2104         not affect \Z or \z.         not affect \Z or \z.
2105    
2106           PCRE_NOTEMPTY           PCRE_NOTEMPTY
2107    
2108         An empty string is not considered to be a valid match if this option is         An empty string is not considered to be a valid match if this option is
2109         set. If there are alternatives in the pattern, they are tried.  If  all         set.  If  there are alternatives in the pattern, they are tried. If all
2110         the  alternatives  match  the empty string, the entire match fails. For         the alternatives match the empty string, the entire  match  fails.  For
2111         example, if the pattern         example, if the pattern
2112    
2113           a?b?           a?b?
2114    
2115         is applied to a string not beginning with "a" or  "b",  it  matches  an         is  applied  to  a  string not beginning with "a" or "b", it matches an
2116         empty  string at the start of the subject. With PCRE_NOTEMPTY set, this         empty string at the start of the subject. With PCRE_NOTEMPTY set,  this
2117         match is not valid, so PCRE searches further into the string for occur-         match is not valid, so PCRE searches further into the string for occur-
2118         rences of "a" or "b".         rences of "a" or "b".
2119    
2120           PCRE_NOTEMPTY_ATSTART           PCRE_NOTEMPTY_ATSTART
2121    
2122         This  is  like PCRE_NOTEMPTY, except that an empty string match that is         This is like PCRE_NOTEMPTY, except that an empty string match  that  is
2123         not at the start of  the  subject  is  permitted.  If  the  pattern  is         not  at  the  start  of  the  subject  is  permitted. If the pattern is
2124         anchored, such a match can occur only if the pattern contains \K.         anchored, such a match can occur only if the pattern contains \K.
2125    
2126         Perl     has    no    direct    equivalent    of    PCRE_NOTEMPTY    or         Perl    has    no    direct    equivalent    of    PCRE_NOTEMPTY     or
2127         PCRE_NOTEMPTY_ATSTART, but it does make a special  case  of  a  pattern         PCRE_NOTEMPTY_ATSTART,  but  it  does  make a special case of a pattern
2128         match  of  the empty string within its split() function, and when using         match of the empty string within its split() function, and  when  using
2129         the /g modifier. It is  possible  to  emulate  Perl's  behaviour  after         the  /g  modifier.  It  is  possible  to emulate Perl's behaviour after
2130         matching a null string by first trying the match again at the same off-         matching a null string by first trying the match again at the same off-
2131         set with PCRE_NOTEMPTY_ATSTART and  PCRE_ANCHORED,  and  then  if  that         set  with  PCRE_NOTEMPTY_ATSTART  and  PCRE_ANCHORED,  and then if that
2132         fails, by advancing the starting offset (see below) and trying an ordi-         fails, by advancing the starting offset (see below) and trying an ordi-
2133         nary match again. There is some code that demonstrates how to  do  this         nary  match  again. There is some code that demonstrates how to do this
2134         in  the  pcredemo sample program. In the most general case, you have to         in the pcredemo sample program. In the most general case, you  have  to
2135         check to see if the newline convention recognizes CRLF  as  a  newline,         check  to  see  if the newline convention recognizes CRLF as a newline,
2136         and  if so, and the current character is CR followed by LF, advance the         and if so, and the current character is CR followed by LF, advance  the
2137         starting offset by two characters instead of one.         starting offset by two characters instead of one.
2138    
2139           PCRE_NO_START_OPTIMIZE           PCRE_NO_START_OPTIMIZE
2140    
2141         There are a number of optimizations that pcre_exec() uses at the  start         There  are a number of optimizations that pcre_exec() uses at the start
2142         of  a  match,  in  order to speed up the process. For example, if it is         of a match, in order to speed up the process. For  example,  if  it  is
2143         known that an unanchored match must start with a specific character, it         known that an unanchored match must start with a specific character, it
2144         searches  the  subject  for that character, and fails immediately if it         searches the subject for that character, and fails  immediately  if  it
2145         cannot find it, without actually running the  main  matching  function.         cannot  find  it,  without actually running the main matching function.
2146         This means that a special item such as (*COMMIT) at the start of a pat-         This means that a special item such as (*COMMIT) at the start of a pat-
2147         tern is not considered until after a suitable starting  point  for  the         tern  is  not  considered until after a suitable starting point for the
2148         match  has been found. When callouts or (*MARK) items are in use, these         match has been found. When callouts or (*MARK) items are in use,  these
2149         "start-up" optimizations can cause them to be skipped if the pattern is         "start-up" optimizations can cause them to be skipped if the pattern is
2150         never  actually  used.  The start-up optimizations are in effect a pre-         never actually used. The start-up optimizations are in  effect  a  pre-
2151         scan of the subject that takes place before the pattern is run.         scan of the subject that takes place before the pattern is run.
2152    
2153         The PCRE_NO_START_OPTIMIZE option disables the start-up  optimizations,         The  PCRE_NO_START_OPTIMIZE option disables the start-up optimizations,
2154         possibly  causing  performance  to  suffer,  but ensuring that in cases         possibly causing performance to suffer,  but  ensuring  that  in  cases
2155         where the result is "no match", the callouts do occur, and  that  items         where  the  result is "no match", the callouts do occur, and that items
2156         such as (*COMMIT) and (*MARK) are considered at every possible starting         such as (*COMMIT) and (*MARK) are considered at every possible starting
2157         position in the subject string. If  PCRE_NO_START_OPTIMIZE  is  set  at         position  in  the  subject  string. If PCRE_NO_START_OPTIMIZE is set at
2158         compile time, it cannot be unset at matching time.         compile time, it cannot be unset at matching time.
2159    
2160         Setting  PCRE_NO_START_OPTIMIZE  can  change  the outcome of a matching         Setting PCRE_NO_START_OPTIMIZE can change the  outcome  of  a  matching
2161         operation.  Consider the pattern         operation.  Consider the pattern
2162    
2163           (*COMMIT)ABC           (*COMMIT)ABC
2164    
2165         When this is compiled, PCRE records the fact that a  match  must  start         When  this  is  compiled, PCRE records the fact that a match must start
2166         with  the  character  "A".  Suppose the subject string is "DEFABC". The         with the character "A". Suppose the subject  string  is  "DEFABC".  The
2167         start-up optimization scans along the subject, finds "A" and  runs  the         start-up  optimization  scans along the subject, finds "A" and runs the
2168         first  match attempt from there. The (*COMMIT) item means that the pat-         first match attempt from there. The (*COMMIT) item means that the  pat-
2169         tern must match the current starting position, which in this  case,  it         tern  must  match the current starting position, which in this case, it
2170         does.  However,  if  the  same match is run with PCRE_NO_START_OPTIMIZE         does. However, if the same match  is  run  with  PCRE_NO_START_OPTIMIZE
2171         set, the initial scan along the subject string  does  not  happen.  The         set,  the  initial  scan  along the subject string does not happen. The
2172         first  match  attempt  is  run  starting  from "D" and when this fails,         first match attempt is run starting  from  "D"  and  when  this  fails,
2173         (*COMMIT) prevents any further matches  being  tried,  so  the  overall         (*COMMIT)  prevents  any  further  matches  being tried, so the overall
2174         result  is  "no  match". If the pattern is studied, more start-up opti-         result is "no match". If the pattern is studied,  more  start-up  opti-
2175         mizations may be used. For example, a minimum length  for  the  subject         mizations  may  be  used. For example, a minimum length for the subject
2176         may be recorded. Consider the pattern         may be recorded. Consider the pattern
2177    
2178           (*MARK:A)(X|Y)           (*MARK:A)(X|Y)
2179    
2180         The  minimum  length  for  a  match is one character. If the subject is         The minimum length for a match is one  character.  If  the  subject  is
2181         "ABC", there will be attempts to  match  "ABC",  "BC",  "C",  and  then         "ABC",  there  will  be  attempts  to  match "ABC", "BC", "C", and then
2182         finally  an empty string.  If the pattern is studied, the final attempt         finally an empty string.  If the pattern is studied, the final  attempt
2183         does not take place, because PCRE knows that the subject is too  short,         does  not take place, because PCRE knows that the subject is too short,
2184         and  so  the  (*MARK) is never encountered.  In this case, studying the         and so the (*MARK) is never encountered.  In this  case,  studying  the
2185         pattern does not affect the overall match result, which  is  still  "no         pattern  does  not  affect the overall match result, which is still "no
2186         match", but it does affect the auxiliary information that is returned.         match", but it does affect the auxiliary information that is returned.
2187    
2188           PCRE_NO_UTF8_CHECK           PCRE_NO_UTF8_CHECK
2189    
2190         When PCRE_UTF8 is set at compile time, the validity of the subject as a         When PCRE_UTF8 is set at compile time, the validity of the subject as a
2191         UTF-8 string is automatically checked when pcre_exec() is  subsequently         UTF-8  string is automatically checked when pcre_exec() is subsequently
2192         called.   The  value  of  startoffset is also checked to ensure that it         called.  The value of startoffset is also checked  to  ensure  that  it
2193         points to the start of a UTF-8 character. There is a  discussion  about         points  to  the start of a UTF-8 character. There is a discussion about
2194         the  validity  of  UTF-8 strings in the section on UTF-8 support in the         the validity of UTF-8 strings in the section on UTF-8  support  in  the
2195         main pcre page. If  an  invalid  UTF-8  sequence  of  bytes  is  found,         main  pcre  page.  If  an  invalid  UTF-8  sequence  of bytes is found,
2196         pcre_exec()  returns  the  error  PCRE_ERROR_BADUTF8  or,  if PCRE_PAR-         pcre_exec() returns  the  error  PCRE_ERROR_BADUTF8  or,  if  PCRE_PAR-
2197         TIAL_HARD is set and the problem is a truncated UTF-8 character at  the         TIAL_HARD  is set and the problem is a truncated UTF-8 character at the
2198         end  of  the  subject, PCRE_ERROR_SHORTUTF8. In both cases, information         end of the subject, PCRE_ERROR_SHORTUTF8. In  both  cases,  information
2199         about the precise nature of the error may also  be  returned  (see  the         about  the  precise  nature  of the error may also be returned (see the
2200         descriptions  of these errors in the section entitled Error return val-         descriptions of these errors in the section entitled Error return  val-
2201         ues from pcre_exec() below).  If startoffset contains a value that does         ues from pcre_exec() below).  If startoffset contains a value that does
2202         not  point to the start of a UTF-8 character (or to the end of the sub-         not point to the start of a UTF-8 character (or to the end of the  sub-
2203         ject), PCRE_ERROR_BADUTF8_OFFSET is returned.         ject), PCRE_ERROR_BADUTF8_OFFSET is returned.
2204    
2205         If you already know that your subject is valid, and you  want  to  skip         If  you  already  know that your subject is valid, and you want to skip
2206         these    checks    for   performance   reasons,   you   can   set   the         these   checks   for   performance   reasons,   you   can    set    the
2207         PCRE_NO_UTF8_CHECK option when calling pcre_exec(). You might  want  to         PCRE_NO_UTF8_CHECK  option  when calling pcre_exec(). You might want to
2208         do  this  for the second and subsequent calls to pcre_exec() if you are         do this for the second and subsequent calls to pcre_exec() if  you  are
2209         making repeated calls to find all  the  matches  in  a  single  subject         making  repeated  calls  to  find  all  the matches in a single subject
2210         string.  However,  you  should  be  sure  that the value of startoffset         string. However, you should be  sure  that  the  value  of  startoffset
2211         points to the start of a UTF-8 character (or the end of  the  subject).         points  to  the start of a UTF-8 character (or the end of the subject).
2212         When  PCRE_NO_UTF8_CHECK is set, the effect of passing an invalid UTF-8         When PCRE_NO_UTF8_CHECK is set, the effect of passing an invalid  UTF-8
2213         string as a subject or an invalid value of  startoffset  is  undefined.         string  as  a  subject or an invalid value of startoffset is undefined.
2214         Your program may crash.         Your program may crash.
2215    
2216           PCRE_PARTIAL_HARD           PCRE_PARTIAL_HARD
2217           PCRE_PARTIAL_SOFT           PCRE_PARTIAL_SOFT
2218    
2219         These  options turn on the partial matching feature. For backwards com-         These options turn on the partial matching feature. For backwards  com-
2220         patibility, PCRE_PARTIAL is a synonym for PCRE_PARTIAL_SOFT. A  partial         patibility,  PCRE_PARTIAL is a synonym for PCRE_PARTIAL_SOFT. A partial
2221         match  occurs if the end of the subject string is reached successfully,         match occurs if the end of the subject string is reached  successfully,
2222         but there are not enough subject characters to complete the  match.  If         but  there  are not enough subject characters to complete the match. If
2223         this happens when PCRE_PARTIAL_SOFT (but not PCRE_PARTIAL_HARD) is set,         this happens when PCRE_PARTIAL_SOFT (but not PCRE_PARTIAL_HARD) is set,
2224         matching continues by testing any remaining alternatives.  Only  if  no         matching  continues  by  testing any remaining alternatives. Only if no
2225         complete  match  can be found is PCRE_ERROR_PARTIAL returned instead of         complete match can be found is PCRE_ERROR_PARTIAL returned  instead  of
2226         PCRE_ERROR_NOMATCH. In other words,  PCRE_PARTIAL_SOFT  says  that  the         PCRE_ERROR_NOMATCH.  In  other  words,  PCRE_PARTIAL_SOFT says that the
2227         caller  is  prepared to handle a partial match, but only if no complete         caller is prepared to handle a partial match, but only if  no  complete
2228         match can be found.         match can be found.
2229    
2230         If PCRE_PARTIAL_HARD is set, it overrides  PCRE_PARTIAL_SOFT.  In  this         If  PCRE_PARTIAL_HARD  is  set, it overrides PCRE_PARTIAL_SOFT. In this
2231         case,  if  a  partial  match  is found, pcre_exec() immediately returns         case, if a partial match  is  found,  pcre_exec()  immediately  returns
2232         PCRE_ERROR_PARTIAL, without  considering  any  other  alternatives.  In         PCRE_ERROR_PARTIAL,  without  considering  any  other  alternatives. In
2233         other  words, when PCRE_PARTIAL_HARD is set, a partial match is consid-         other words, when PCRE_PARTIAL_HARD is set, a partial match is  consid-
2234         ered to be more important that an alternative complete match.         ered to be more important that an alternative complete match.
2235    
2236         In both cases, the portion of the string that was  inspected  when  the         In  both  cases,  the portion of the string that was inspected when the
2237         partial match was found is set as the first matching string. There is a         partial match was found is set as the first matching string. There is a
2238         more detailed discussion of partial and  multi-segment  matching,  with         more  detailed  discussion  of partial and multi-segment matching, with
2239         examples, in the pcrepartial documentation.         examples, in the pcrepartial documentation.
2240    
2241     The string to be matched by pcre_exec()     The string to be matched by pcre_exec()
2242    
2243         The  subject string is passed to pcre_exec() as a pointer in subject, a         The subject string is passed to pcre_exec() as a pointer in subject,  a
2244         length (in bytes) in length, and a starting byte offset in startoffset.         length (in bytes) in length, and a starting byte offset in startoffset.
2245         If  this  is  negative  or  greater  than  the  length  of the subject,         If this is  negative  or  greater  than  the  length  of  the  subject,
2246         pcre_exec() returns PCRE_ERROR_BADOFFSET. When the starting  offset  is         pcre_exec()  returns  PCRE_ERROR_BADOFFSET. When the starting offset is
2247         zero,  the  search  for a match starts at the beginning of the subject,         zero, the search for a match starts at the beginning  of  the  subject,
2248         and this is by far the most common case. In UTF-8 mode, the byte offset         and this is by far the most common case. In UTF-8 mode, the byte offset
2249         must  point  to  the start of a UTF-8 character (or the end of the sub-         must point to the start of a UTF-8 character (or the end  of  the  sub-
2250         ject). Unlike the pattern string, the subject may contain  binary  zero         ject).  Unlike  the pattern string, the subject may contain binary zero
2251         bytes.         bytes.
2252    
2253         A  non-zero  starting offset is useful when searching for another match         A non-zero starting offset is useful when searching for  another  match
2254         in the same subject by calling pcre_exec() again after a previous  suc-         in  the same subject by calling pcre_exec() again after a previous suc-
2255         cess.   Setting  startoffset differs from just passing over a shortened         cess.  Setting startoffset differs from just passing over  a  shortened
2256         string and setting PCRE_NOTBOL in the case of  a  pattern  that  begins         string  and  setting  PCRE_NOTBOL  in the case of a pattern that begins
2257         with any kind of lookbehind. For example, consider the pattern         with any kind of lookbehind. For example, consider the pattern
2258    
2259           \Biss\B           \Biss\B
2260    
2261         which  finds  occurrences  of "iss" in the middle of words. (\B matches         which finds occurrences of "iss" in the middle of  words.  (\B  matches
2262         only if the current position in the subject is not  a  word  boundary.)         only  if  the  current position in the subject is not a word boundary.)
2263         When  applied  to the string "Mississipi" the first call to pcre_exec()         When applied to the string "Mississipi" the first call  to  pcre_exec()
2264         finds the first occurrence. If pcre_exec() is called  again  with  just         finds  the  first  occurrence. If pcre_exec() is called again with just
2265         the  remainder  of  the  subject,  namely  "issipi", it does not match,         the remainder of the subject,  namely  "issipi",  it  does  not  match,
2266         because \B is always false at the start of the subject, which is deemed         because \B is always false at the start of the subject, which is deemed
2267         to  be  a  word  boundary. However, if pcre_exec() is passed the entire         to be a word boundary. However, if pcre_exec()  is  passed  the  entire
2268         string again, but with startoffset set to 4, it finds the second occur-         string again, but with startoffset set to 4, it finds the second occur-
2269         rence  of "iss" because it is able to look behind the starting point to         rence of "iss" because it is able to look behind the starting point  to
2270         discover that it is preceded by a letter.         discover that it is preceded by a letter.
2271    
2272         Finding all the matches in a subject is tricky  when  the  pattern  can         Finding  all  the  matches  in a subject is tricky when the pattern can
2273         match an empty string. It is possible to emulate Perl's /g behaviour by         match an empty string. It is possible to emulate Perl's /g behaviour by
2274         first  trying  the  match  again  at  the   same   offset,   with   the         first   trying   the   match   again  at  the  same  offset,  with  the
2275         PCRE_NOTEMPTY_ATSTART  and  PCRE_ANCHORED  options,  and  then  if that         PCRE_NOTEMPTY_ATSTART and  PCRE_ANCHORED  options,  and  then  if  that
2276         fails, advancing the starting  offset  and  trying  an  ordinary  match         fails,  advancing  the  starting  offset  and  trying an ordinary match
2277         again. There is some code that demonstrates how to do this in the pcre-         again. There is some code that demonstrates how to do this in the pcre-
2278         demo sample program. In the most general case, you have to check to see         demo sample program. In the most general case, you have to check to see
2279         if  the newline convention recognizes CRLF as a newline, and if so, and         if the newline convention recognizes CRLF as a newline, and if so,  and
2280         the current character is CR followed by LF, advance the starting offset         the current character is CR followed by LF, advance the starting offset
2281         by two characters instead of one.         by two characters instead of one.
2282    
2283         If  a  non-zero starting offset is passed when the pattern is anchored,         If a non-zero starting offset is passed when the pattern  is  anchored,
2284         one attempt to match at the given offset is made. This can only succeed         one attempt to match at the given offset is made. This can only succeed
2285         if  the  pattern  does  not require the match to be at the start of the         if the pattern does not require the match to be at  the  start  of  the
2286         subject.         subject.
2287    
2288     How pcre_exec() returns captured substrings     How pcre_exec() returns captured substrings
2289    
2290         In general, a pattern matches a certain portion of the subject, and  in         In  general, a pattern matches a certain portion of the subject, and in
2291         addition,  further  substrings  from  the  subject may be picked out by         addition, further substrings from the subject  may  be  picked  out  by
2292         parts of the pattern. Following the usage  in  Jeffrey  Friedl's  book,         parts  of  the  pattern.  Following the usage in Jeffrey Friedl's book,
2293         this  is  called "capturing" in what follows, and the phrase "capturing         this is called "capturing" in what follows, and the  phrase  "capturing
2294         subpattern" is used for a fragment of a pattern that picks out  a  sub-         subpattern"  is  used for a fragment of a pattern that picks out a sub-
2295         string.  PCRE  supports several other kinds of parenthesized subpattern         string. PCRE supports several other kinds of  parenthesized  subpattern
2296         that do not cause substrings to be captured.         that do not cause substrings to be captured.
2297    
2298         Captured substrings are returned to the caller via a vector of integers         Captured substrings are returned to the caller via a vector of integers
2299         whose  address is passed in ovector. The number of elements in the vec-         whose address is passed in ovector. The number of elements in the  vec-
2300         tor is passed in ovecsize, which must be a non-negative  number.  Note:         tor  is  passed in ovecsize, which must be a non-negative number. Note:
2301         this argument is NOT the size of ovector in bytes.         this argument is NOT the size of ovector in bytes.
2302    
2303         The  first  two-thirds of the vector is used to pass back captured sub-         The first two-thirds of the vector is used to pass back  captured  sub-
2304         strings, each substring using a pair of integers. The  remaining  third         strings,  each  substring using a pair of integers. The remaining third
2305         of  the  vector is used as workspace by pcre_exec() while matching cap-         of the vector is used as workspace by pcre_exec() while  matching  cap-
2306         turing subpatterns, and is not available for passing back  information.         turing  subpatterns, and is not available for passing back information.
2307         The  number passed in ovecsize should always be a multiple of three. If         The number passed in ovecsize should always be a multiple of three.  If
2308         it is not, it is rounded down.         it is not, it is rounded down.
2309    
2310         When a match is successful, information about  captured  substrings  is         When  a  match  is successful, information about captured substrings is
2311         returned  in  pairs  of integers, starting at the beginning of ovector,         returned in pairs of integers, starting at the  beginning  of  ovector,
2312         and continuing up to two-thirds of its length at the  most.  The  first         and  continuing  up  to two-thirds of its length at the most. The first
2313         element  of  each pair is set to the byte offset of the first character         element of each pair is set to the byte offset of the  first  character
2314         in a substring, and the second is set to the byte offset of  the  first         in  a  substring, and the second is set to the byte offset of the first
2315         character  after  the end of a substring. Note: these values are always         character after the end of a substring. Note: these values  are  always
2316         byte offsets, even in UTF-8 mode. They are not character counts.         byte offsets, even in UTF-8 mode. They are not character counts.
2317    
2318         The first pair of integers, ovector[0]  and  ovector[1],  identify  the         The  first  pair  of  integers, ovector[0] and ovector[1], identify the
2319         portion  of  the subject string matched by the entire pattern. The next         portion of the subject string matched by the entire pattern.  The  next
2320         pair is used for the first capturing subpattern, and so on.  The  value         pair  is  used for the first capturing subpattern, and so on. The value
2321         returned by pcre_exec() is one more than the highest numbered pair that         returned by pcre_exec() is one more than the highest numbered pair that
2322         has been set.  For example, if two substrings have been  captured,  the         has  been  set.  For example, if two substrings have been captured, the
2323         returned  value is 3. If there are no capturing subpatterns, the return         returned value is 3. If there are no capturing subpatterns, the  return
2324         value from a successful match is 1, indicating that just the first pair         value from a successful match is 1, indicating that just the first pair
2325         of offsets has been set.         of offsets has been set.
2326    
2327         If a capturing subpattern is matched repeatedly, it is the last portion         If a capturing subpattern is matched repeatedly, it is the last portion
2328         of the string that it matched that is returned.         of the string that it matched that is returned.
2329    
2330         If the vector is too small to hold all the captured substring  offsets,         If  the vector is too small to hold all the captured substring offsets,
2331         it is used as far as possible (up to two-thirds of its length), and the         it is used as far as possible (up to two-thirds of its length), and the
2332         function returns a value of zero. If the substring offsets are  not  of         function  returns a value of zero. If neither the actual string matched
2333         interest,  pcre_exec()  may  be  called with ovector passed as NULL and         not any captured substrings are of interest, pcre_exec() may be  called
2334         ovecsize as zero. However, if the pattern contains back references  and         with  ovector passed as NULL and ovecsize as zero. However, if the pat-
2335         the  ovector is not big enough to remember the related substrings, PCRE         tern contains back references and the ovector  is  not  big  enough  to
2336         has to get additional memory for use during matching. Thus it  is  usu-         remember  the related substrings, PCRE has to get additional memory for
2337         ally advisable to supply an ovector.         use during matching. Thus it is usually advisable to supply an  ovector
2338           of reasonable size.
2339    
2340           There  are  some  cases where zero is returned (indicating vector over-
2341           flow) when in fact the vector is exactly the right size for  the  final
2342           match. For example, consider the pattern
2343    
2344             (a)(?:(b)c|bd)
2345    
2346           If  a  vector of 6 elements (allowing for only 1 captured substring) is
2347           given with subject string "abd", pcre_exec() will try to set the second
2348           captured string, thereby recording a vector overflow, before failing to
2349           match "c" and backing up  to  try  the  second  alternative.  The  zero
2350           return,  however,  does  correctly  indicate that the maximum number of
2351           slots (namely 2) have been filled. In similar cases where there is tem-
2352           porary  overflow,  but  the final number of used slots is actually less
2353           than the maximum, a non-zero value is returned.
2354    
2355         The pcre_fullinfo() function can be used to find out how many capturing         The pcre_fullinfo() function can be used to find out how many capturing
2356         subpatterns there are in a compiled  pattern.  The  smallest  size  for         subpatterns  there  are  in  a  compiled pattern. The smallest size for
2357         ovector  that  will allow for n captured substrings, in addition to the         ovector that will allow for n captured substrings, in addition  to  the
2358         offsets of the substring matched by the whole pattern, is (n+1)*3.         offsets of the substring matched by the whole pattern, is (n+1)*3.
2359    
2360         It is possible for capturing subpattern number n+1 to match  some  part         It  is  possible for capturing subpattern number n+1 to match some part
2361         of the subject when subpattern n has not been used at all. For example,         of the subject when subpattern n has not been used at all. For example,
2362         if the string "abc" is matched  against  the  pattern  (a|(z))(bc)  the         if  the  string  "abc"  is  matched against the pattern (a|(z))(bc) the
2363         return from the function is 4, and subpatterns 1 and 3 are matched, but         return from the function is 4, and subpatterns 1 and 3 are matched, but
2364         2 is not. When this happens, both values in  the  offset  pairs  corre-         2  is  not.  When  this happens, both values in the offset pairs corre-
2365         sponding to unused subpatterns are set to -1.         sponding to unused subpatterns are set to -1.
2366    
2367         Offset  values  that correspond to unused subpatterns at the end of the         Offset values that correspond to unused subpatterns at the end  of  the
2368         expression are also set to -1. For example,  if  the  string  "abc"  is         expression  are  also  set  to  -1. For example, if the string "abc" is
2369         matched  against the pattern (abc)(x(yz)?)? subpatterns 2 and 3 are not         matched against the pattern (abc)(x(yz)?)? subpatterns 2 and 3 are  not
2370         matched. The return from the function is 2, because  the  highest  used         matched.  The  return  from the function is 2, because the highest used
2371         capturing  subpattern  number  is 1, and the offsets for for the second         capturing subpattern number is 1, and the offsets for  for  the  second
2372         and third capturing subpatterns (assuming the vector is  large  enough,         and  third  capturing subpatterns (assuming the vector is large enough,
2373         of course) are set to -1.         of course) are set to -1.
2374    
2375         Note: Elements of ovector that do not correspond to capturing parenthe-         Note: Elements in the first two-thirds of ovector that  do  not  corre-
2376         ses in the pattern are never changed. That is, if a pattern contains  n         spond  to  capturing parentheses in the pattern are never changed. That
2377         capturing parentheses, no more than ovector[0] to ovector[2n+1] are set         is, if a pattern contains n capturing parentheses, no more  than  ovec-
2378         by pcre_exec(). The other elements retain whatever values  they  previ-         tor[0]  to ovector[2n+1] are set by pcre_exec(). The other elements (in
2379         ously had.         the first two-thirds) retain whatever values they previously had.
2380    
2381         Some  convenience  functions  are  provided for extracting the captured         Some convenience functions are provided  for  extracting  the  captured
2382         substrings as separate strings. These are described below.         substrings as separate strings. These are described below.
2383    
2384     Error return values from pcre_exec()     Error return values from pcre_exec()
2385    
2386         If pcre_exec() fails, it returns a negative number. The  following  are         If  pcre_exec()  fails, it returns a negative number. The following are
2387         defined in the header file:         defined in the header file:
2388    
2389           PCRE_ERROR_NOMATCH        (-1)           PCRE_ERROR_NOMATCH        (-1)
# Line 2265  MATCHING A PATTERN: THE TRADITIONAL FUNC Line 2392  MATCHING A PATTERN: THE TRADITIONAL FUNC
2392    
2393           PCRE_ERROR_NULL           (-2)           PCRE_ERROR_NULL           (-2)
2394    
2395         Either  code  or  subject  was  passed as NULL, or ovector was NULL and         Either code or subject was passed as NULL,  or  ovector  was  NULL  and
2396         ovecsize was not zero.         ovecsize was not zero.
2397    
2398           PCRE_ERROR_BADOPTION      (-3)           PCRE_ERROR_BADOPTION      (-3)
# Line 2274  MATCHING A PATTERN: THE TRADITIONAL FUNC Line 2401  MATCHING A PATTERN: THE TRADITIONAL FUNC
2401    
2402           PCRE_ERROR_BADMAGIC       (-4)           PCRE_ERROR_BADMAGIC       (-4)
2403    
2404         PCRE stores a 4-byte "magic number" at the start of the compiled  code,         PCRE  stores a 4-byte "magic number" at the start of the compiled code,
2405         to catch the case when it is passed a junk pointer and to detect when a         to catch the case when it is passed a junk pointer and to detect when a
2406         pattern that was compiled in an environment of one endianness is run in         pattern that was compiled in an environment of one endianness is run in
2407         an  environment  with the other endianness. This is the error that PCRE         an environment with the other endianness. This is the error  that  PCRE
2408         gives when the magic number is not present.         gives when the magic number is not present.
2409    
2410           PCRE_ERROR_UNKNOWN_OPCODE (-5)           PCRE_ERROR_UNKNOWN_OPCODE (-5)
2411    
2412         While running the pattern match, an unknown item was encountered in the         While running the pattern match, an unknown item was encountered in the
2413         compiled  pattern.  This  error  could be caused by a bug in PCRE or by         compiled pattern. This error could be caused by a bug  in  PCRE  or  by
2414         overwriting of the compiled pattern.         overwriting of the compiled pattern.
2415    
2416           PCRE_ERROR_NOMEMORY       (-6)           PCRE_ERROR_NOMEMORY       (-6)
2417    
2418         If a pattern contains back references, but the ovector that  is  passed         If  a  pattern contains back references, but the ovector that is passed
2419         to pcre_exec() is not big enough to remember the referenced substrings,         to pcre_exec() is not big enough to remember the referenced substrings,
2420         PCRE gets a block of memory at the start of matching to  use  for  this         PCRE  gets  a  block of memory at the start of matching to use for this
2421         purpose.  If the call via pcre_malloc() fails, this error is given. The         purpose. If the call via pcre_malloc() fails, this error is given.  The
2422         memory is automatically freed at the end of matching.         memory is automatically freed at the end of matching.
2423    
2424         This error is also given if pcre_stack_malloc() fails  in  pcre_exec().         This  error  is also given if pcre_stack_malloc() fails in pcre_exec().
2425         This  can happen only when PCRE has been compiled with --disable-stack-         This can happen only when PCRE has been compiled with  --disable-stack-
2426         for-recursion.         for-recursion.
2427    
2428           PCRE_ERROR_NOSUBSTRING    (-7)           PCRE_ERROR_NOSUBSTRING    (-7)
2429    
2430         This error is used by the pcre_copy_substring(),  pcre_get_substring(),         This  error is used by the pcre_copy_substring(), pcre_get_substring(),
2431         and  pcre_get_substring_list()  functions  (see  below).  It  is  never         and  pcre_get_substring_list()  functions  (see  below).  It  is  never
2432         returned by pcre_exec().         returned by pcre_exec().
2433    
2434           PCRE_ERROR_MATCHLIMIT     (-8)           PCRE_ERROR_MATCHLIMIT     (-8)
2435    
2436         The backtracking limit, as specified by  the  match_limit  field  in  a         The  backtracking  limit,  as  specified  by the match_limit field in a
2437         pcre_extra  structure  (or  defaulted) was reached. See the description         pcre_extra structure (or defaulted) was reached.  See  the  description
2438         above.         above.
2439    
2440           PCRE_ERROR_CALLOUT        (-9)           PCRE_ERROR_CALLOUT        (-9)
2441    
2442         This error is never generated by pcre_exec() itself. It is provided for         This error is never generated by pcre_exec() itself. It is provided for
2443         use  by  callout functions that want to yield a distinctive error code.         use by callout functions that want to yield a distinctive  error  code.
2444         See the pcrecallout documentation for details.         See the pcrecallout documentation for details.
2445    
2446           PCRE_ERROR_BADUTF8        (-10)           PCRE_ERROR_BADUTF8        (-10)
2447    
2448         A string that contains an invalid UTF-8 byte sequence was passed  as  a         A  string  that contains an invalid UTF-8 byte sequence was passed as a
2449         subject,  and the PCRE_NO_UTF8_CHECK option was not set. If the size of         subject, and the PCRE_NO_UTF8_CHECK option was not set. If the size  of
2450         the output vector (ovecsize) is at least 2,  the  byte  offset  to  the         the  output  vector  (ovecsize)  is  at least 2, the byte offset to the
2451         start  of  the  the invalid UTF-8 character is placed in the first ele-         start of the the invalid UTF-8 character is placed in  the  first  ele-
2452         ment, and a reason code is placed in the  second  element.  The  reason         ment,  and  a  reason  code is placed in the second element. The reason
2453         codes are listed in the following section.  For backward compatibility,         codes are listed in the following section.  For backward compatibility,
2454         if PCRE_PARTIAL_HARD is set and the problem is a truncated UTF-8  char-         if  PCRE_PARTIAL_HARD is set and the problem is a truncated UTF-8 char-
2455         acter   at   the   end   of   the   subject  (reason  codes  1  to  5),         acter  at  the  end  of  the   subject   (reason   codes   1   to   5),
2456         PCRE_ERROR_SHORTUTF8 is returned instead of PCRE_ERROR_BADUTF8.         PCRE_ERROR_SHORTUTF8 is returned instead of PCRE_ERROR_BADUTF8.
2457    
2458           PCRE_ERROR_BADUTF8_OFFSET (-11)           PCRE_ERROR_BADUTF8_OFFSET (-11)
2459    
2460         The UTF-8 byte sequence that was passed as a subject  was  checked  and         The  UTF-8  byte  sequence that was passed as a subject was checked and
2461         found  to be valid (the PCRE_NO_UTF8_CHECK option was not set), but the         found to be valid (the PCRE_NO_UTF8_CHECK option was not set), but  the
2462         value of startoffset did not point to the beginning of a UTF-8  charac-         value  of startoffset did not point to the beginning of a UTF-8 charac-
2463         ter or the end of the subject.         ter or the end of the subject.
2464    
2465           PCRE_ERROR_PARTIAL        (-12)           PCRE_ERROR_PARTIAL        (-12)
2466    
2467         The  subject  string did not match, but it did match partially. See the         The subject string did not match, but it did match partially.  See  the
2468         pcrepartial documentation for details of partial matching.         pcrepartial documentation for details of partial matching.
2469    
2470           PCRE_ERROR_BADPARTIAL     (-13)           PCRE_ERROR_BADPARTIAL     (-13)
2471    
2472         This code is no longer in  use.  It  was  formerly  returned  when  the         This  code  is  no  longer  in  use.  It was formerly returned when the
2473         PCRE_PARTIAL  option  was used with a compiled pattern containing items         PCRE_PARTIAL option was used with a compiled pattern  containing  items
2474         that were  not  supported  for  partial  matching.  From  release  8.00         that  were  not  supported  for  partial  matching.  From  release 8.00
2475         onwards, there are no restrictions on partial matching.         onwards, there are no restrictions on partial matching.
2476    
2477           PCRE_ERROR_INTERNAL       (-14)           PCRE_ERROR_INTERNAL       (-14)
2478    
2479         An  unexpected  internal error has occurred. This error could be caused         An unexpected internal error has occurred. This error could  be  caused
2480         by a bug in PCRE or by overwriting of the compiled pattern.         by a bug in PCRE or by overwriting of the compiled pattern.
2481    
2482           PCRE_ERROR_BADCOUNT       (-15)           PCRE_ERROR_BADCOUNT       (-15)
# Line 2359  MATCHING A PATTERN: THE TRADITIONAL FUNC Line 2486  MATCHING A PATTERN: THE TRADITIONAL FUNC
2486           PCRE_ERROR_RECURSIONLIMIT (-21)           PCRE_ERROR_RECURSIONLIMIT (-21)
2487    
2488         The internal recursion limit, as specified by the match_limit_recursion         The internal recursion limit, as specified by the match_limit_recursion
2489         field  in  a  pcre_extra  structure (or defaulted) was reached. See the         field in a pcre_extra structure (or defaulted)  was  reached.  See  the
2490         description above.         description above.
2491    
2492           PCRE_ERROR_BADNEWLINE     (-23)           PCRE_ERROR_BADNEWLINE     (-23)
# Line 2373  MATCHING A PATTERN: THE TRADITIONAL FUNC Line 2500  MATCHING A PATTERN: THE TRADITIONAL FUNC
2500    
2501           PCRE_ERROR_SHORTUTF8      (-25)           PCRE_ERROR_SHORTUTF8      (-25)
2502    
2503         This  error  is returned instead of PCRE_ERROR_BADUTF8 when the subject         This error is returned instead of PCRE_ERROR_BADUTF8 when  the  subject
2504         string ends with a truncated UTF-8 character and the  PCRE_PARTIAL_HARD         string  ends with a truncated UTF-8 character and the PCRE_PARTIAL_HARD
2505         option  is  set.   Information  about  the  failure  is returned as for         option is set.  Information  about  the  failure  is  returned  as  for
2506         PCRE_ERROR_BADUTF8. It is in fact sufficient to detect this  case,  but         PCRE_ERROR_BADUTF8.  It  is in fact sufficient to detect this case, but
2507         this  special error code for PCRE_PARTIAL_HARD precedes the implementa-         this special error code for PCRE_PARTIAL_HARD precedes the  implementa-
2508         tion of returned information; it is retained for backwards  compatibil-         tion  of returned information; it is retained for backwards compatibil-
2509         ity.         ity.
2510    
2511           PCRE_ERROR_RECURSELOOP    (-26)           PCRE_ERROR_RECURSELOOP    (-26)
2512    
2513         This error is returned when pcre_exec() detects a recursion loop within         This error is returned when pcre_exec() detects a recursion loop within
2514         the pattern. Specifically, it means that either the whole pattern or  a         the  pattern. Specifically, it means that either the whole pattern or a
2515         subpattern  has been called recursively for the second time at the same         subpattern has been called recursively for the second time at the  same
2516         position in the subject string. Some simple patterns that might do this         position in the subject string. Some simple patterns that might do this
2517         are  detected  and faulted at compile time, but more complicated cases,         are detected and faulted at compile time, but more  complicated  cases,
2518         in particular mutual recursions between two different subpatterns, can-         in particular mutual recursions between two different subpatterns, can-
2519         not be detected until run time.         not be detected until run time.
2520    
2521             PCRE_ERROR_JIT_STACKLIMIT (-27)
2522    
2523           This error is returned when a pattern  that  was  successfully  studied
2524           using  the PCRE_STUDY_JIT_COMPILE option is being matched, but the mem-
2525           ory available for  the  just-in-time  processing  stack  is  not  large
2526           enough. See the pcrejit documentation for more details.
2527    
2528         Error numbers -16 to -20 and -22 are not used by pcre_exec().         Error numbers -16 to -20 and -22 are not used by pcre_exec().
2529    
2530     Reason codes for invalid UTF-8 strings     Reason codes for invalid UTF-8 strings
# Line 2785  MATCHING A PATTERN: THE ALTERNATIVE FUNC Line 2919  MATCHING A PATTERN: THE ALTERNATIVE FUNC
2919         The strings are returned in reverse order of length; that is, the long-         The strings are returned in reverse order of length; that is, the long-
2920         est  matching  string is given first. If there were too many matches to         est  matching  string is given first. If there were too many matches to
2921         fit into ovector, the yield of the function is zero, and the vector  is         fit into ovector, the yield of the function is zero, and the vector  is
2922         filled with the longest matches.         filled  with  the  longest matches. Unlike pcre_exec(), pcre_dfa_exec()
2923           can use the entire ovector for returning matched strings.
2924    
2925     Error returns from pcre_dfa_exec()     Error returns from pcre_dfa_exec()
2926    
2927         The  pcre_dfa_exec()  function returns a negative number when it fails.         The pcre_dfa_exec() function returns a negative number when  it  fails.
2928         Many of the errors are the same  as  for  pcre_exec(),  and  these  are         Many  of  the  errors  are  the  same as for pcre_exec(), and these are
2929         described  above.   There are in addition the following errors that are         described above.  There are in addition the following errors  that  are
2930         specific to pcre_dfa_exec():         specific to pcre_dfa_exec():
2931    
2932           PCRE_ERROR_DFA_UITEM      (-16)           PCRE_ERROR_DFA_UITEM      (-16)
2933    
2934         This return is given if pcre_dfa_exec() encounters an item in the  pat-         This  return is given if pcre_dfa_exec() encounters an item in the pat-
2935         tern  that  it  does not support, for instance, the use of \C or a back         tern that it does not support, for instance, the use of \C  or  a  back
2936         reference.         reference.
2937    
2938           PCRE_ERROR_DFA_UCOND      (-17)           PCRE_ERROR_DFA_UCOND      (-17)
2939    
2940         This return is given if pcre_dfa_exec()  encounters  a  condition  item         This  return  is  given  if pcre_dfa_exec() encounters a condition item
2941         that  uses  a back reference for the condition, or a test for recursion         that uses a back reference for the condition, or a test  for  recursion
2942         in a specific group. These are not supported.         in a specific group. These are not supported.
2943    
2944           PCRE_ERROR_DFA_UMLIMIT    (-18)           PCRE_ERROR_DFA_UMLIMIT    (-18)
2945    
2946         This return is given if pcre_dfa_exec() is called with an  extra  block         This  return  is given if pcre_dfa_exec() is called with an extra block
2947         that contains a setting of the match_limit field. This is not supported         that contains a setting of  the  match_limit  or  match_limit_recursion
2948         (it is meaningless).         fields.  This  is  not  supported (these fields are meaningless for DFA
2949           matching).
2950    
2951           PCRE_ERROR_DFA_WSSIZE     (-19)           PCRE_ERROR_DFA_WSSIZE     (-19)
2952    
# Line 2840  AUTHOR Line 2976  AUTHOR
2976    
2977  REVISION  REVISION
2978    
2979         Last updated: 13 August 2011         Last updated: 06 September 2011
2980         Copyright (c) 1997-2011 University of Cambridge.         Copyright (c) 1997-2011 University of Cambridge.
2981  ------------------------------------------------------------------------------  ------------------------------------------------------------------------------
2982    
2983    
2984  PCRECALLOUT(3)                                                  PCRECALLOUT(3)  PCRECALLOUT(3)                                                  PCRECALLOUT(3)
2985    
2986    
# Line 2888  PCRE CALLOUTS Line 3024  PCRE CALLOUTS
3024         pattern is matched. This is useful information when you are  trying  to         pattern is matched. This is useful information when you are  trying  to
3025         optimize the performance of a particular pattern.         optimize the performance of a particular pattern.
3026    
3027           The  use  of callouts in a pattern makes it ineligible for optimization
3028           by  the  just-in-time  compiler.  Studying  such  a  pattern  with  the
3029           PCRE_STUDY_JIT_COMPILE option always fails.
3030    
3031    
3032  MISSING CALLOUTS  MISSING CALLOUTS
3033    
# Line 3029  AUTHOR Line 3169  AUTHOR
3169    
3170  REVISION  REVISION
3171    
3172         Last updated: 31 July 2011         Last updated: 26 August 2011
3173         Copyright (c) 1997-2011 University of Cambridge.         Copyright (c) 1997-2011 University of Cambridge.
3174  ------------------------------------------------------------------------------  ------------------------------------------------------------------------------
3175    
3176    
3177  PCRECOMPAT(3)                                                    PCRECOMPAT(3)  PCRECOMPAT(3)                                                    PCRECOMPAT(3)
3178    
3179    
# Line 3198  REVISION Line 3338  REVISION
3338         Last updated: 24 August 2011         Last updated: 24 August 2011
3339         Copyright (c) 1997-2011 University of Cambridge.         Copyright (c) 1997-2011 University of Cambridge.
3340  ------------------------------------------------------------------------------  ------------------------------------------------------------------------------
3341    
3342    
3343  PCREPATTERN(3)                                                  PCREPATTERN(3)  PCREPATTERN(3)                                                  PCREPATTERN(3)
3344    
3345    
# Line 5707  REVISION Line 5847  REVISION
5847         Last updated: 24 August 2011         Last updated: 24 August 2011
5848         Copyright (c) 1997-2011 University of Cambridge.         Copyright (c) 1997-2011 University of Cambridge.
5849  ------------------------------------------------------------------------------  ------------------------------------------------------------------------------
5850    
5851    
5852  PCRESYNTAX(3)                                                    PCRESYNTAX(3)  PCRESYNTAX(3)                                                    PCRESYNTAX(3)
5853    
5854    
# Line 6077  REVISION Line 6217  REVISION
6217         Last updated: 21 November 2010         Last updated: 21 November 2010
6218         Copyright (c) 1997-2010 University of Cambridge.         Copyright (c) 1997-2010 University of Cambridge.
6219  ------------------------------------------------------------------------------  ------------------------------------------------------------------------------
6220    
6221    
6222  PCREUNICODE(3)                                                  PCREUNICODE(3)  PCREUNICODE(3)                                                  PCREUNICODE(3)
6223    
6224    
# Line 6150  UTF-8 AND UNICODE PROPERTY SUPPORT Line 6290  UTF-8 AND UNICODE PROPERTY SUPPORT
6290         If you pass an invalid UTF-8 string  when  PCRE_NO_UTF8_CHECK  is  set,         If you pass an invalid UTF-8 string  when  PCRE_NO_UTF8_CHECK  is  set,
6291         what  happens  depends on why the string is invalid. If the string con-         what  happens  depends on why the string is invalid. If the string con-
6292         forms to the "old" definition of UTF-8 (RFC 2279), it is processed as a         forms to the "old" definition of UTF-8 (RFC 2279), it is processed as a
6293         string  of  characters  in  the  range 0 to 0x7FFFFFFF. In other words,         string  of  characters  in the range 0 to 0x7FFFFFFF by pcre_dfa_exec()
6294         apart from the initial validity test, PCRE (when in UTF-8 mode) handles         and the interpreted version of pcre_exec(). In other words, apart  from
6295         strings  according  to  the more liberal rules of RFC 2279. However, if         the  initial validity test, these functions (when in UTF-8 mode) handle
6296         the string does not even conform to RFC 2279, the result is  undefined.         strings according to the more liberal rules of RFC 2279.  However,  the
6297         Your program may crash.         just-in-time (JIT) optimization for pcre_exec() supports only RFC 3629.
6298           If you are using JIT optimization, or if the string does not even  con-
6299           form to RFC 2279, the result is undefined. Your program may crash.
6300    
6301         If  you  want  to  process  strings  of  values  in the full range 0 to         If  you  want  to  process  strings  of  values  in the full range 0 to
6302         0x7FFFFFFF, encoded in a UTF-8-like manner as per the old RFC, you  can         0x7FFFFFFF, encoded in a UTF-8-like manner as per the old RFC, you  can
6303         set PCRE_NO_UTF8_CHECK to bypass the more restrictive test. However, in         set PCRE_NO_UTF8_CHECK to bypass the more restrictive test. However, in
6304         this situation, you will have to apply your own validity check.         this situation, you will have to apply your  own  validity  check,  and
6305           avoid the use of JIT optimization.
6306    
6307     General comments about UTF-8 mode     General comments about UTF-8 mode
6308    
6309         1. An unbraced hexadecimal escape sequence (such  as  \xb3)  matches  a         1.  An  unbraced  hexadecimal  escape sequence (such as \xb3) matches a
6310         two-byte UTF-8 character if the value is greater than 127.         two-byte UTF-8 character if the value is greater than 127.
6311    
6312         2.  Octal  numbers  up to \777 are recognized, and match two-byte UTF-8         2. Octal numbers up to \777 are recognized, and  match  two-byte  UTF-8
6313         characters for values greater than \177.         characters for values greater than \177.
6314    
6315         3. Repeat quantifiers apply to complete UTF-8 characters, not to  indi-         3.  Repeat quantifiers apply to complete UTF-8 characters, not to indi-
6316         vidual bytes, for example: \x{100}{3}.         vidual bytes, for example: \x{100}{3}.
6317    
6318         4.  The dot metacharacter matches one UTF-8 character instead of a sin-         4. The dot metacharacter matches one UTF-8 character instead of a  sin-
6319         gle byte.         gle byte.
6320    
6321         5. The escape sequence \C can be used to match a single byte  in  UTF-8         5.  The  escape sequence \C can be used to match a single byte in UTF-8
6322         mode,  but  its  use can lead to some strange effects. This facility is         mode, but its use can lead to some strange effects.  This  facility  is
6323         not available in the alternative matching function, pcre_dfa_exec().         not  available  in  the alternative matching function, pcre_dfa_exec(),
6324           nor is it supported by the JIT  optimization  of  pcre_exec().  If  JIT
6325           optimization  is  requested for a pattern that contains \C, it will not
6326           succeed, and so the matching will be carried out by the  normal  inter-
6327           pretive function.
6328    
6329         6. The character escapes \b, \B, \d, \D, \s, \S, \w, and  \W  correctly         6.  The  character escapes \b, \B, \d, \D, \s, \S, \w, and \W correctly
6330         test characters of any code value, but, by default, the characters that         test characters of any code value, but, by default, the characters that
6331         PCRE recognizes as digits, spaces, or word characters remain  the  same         PCRE  recognizes  as digits, spaces, or word characters remain the same
6332         set  as  before,  all with values less than 256. This remains true even         set as before, all with values less than 256. This  remains  true  even
6333         when PCRE is built to include Unicode property support, because  to  do         when  PCRE  is built to include Unicode property support, because to do
6334         otherwise would slow down PCRE in many common cases. Note in particular         otherwise would slow down PCRE in many common cases. Note in particular
6335         that this applies to \b and \B, because they are defined in terms of \w         that this applies to \b and \B, because they are defined in terms of \w
6336         and  \W. If you really want to test for a wider sense of, say, "digit",         and \W. If you really want to test for a wider sense of, say,  "digit",
6337         you can use explicit Unicode property tests such  as  \p{Nd}.  Alterna-         you  can  use  explicit Unicode property tests such as \p{Nd}. Alterna-
6338         tively,  if  you  set  the  PCRE_UCP option, the way that the character         tively, if you set the PCRE_UCP option,  the  way  that  the  character
6339         escapes work is changed so that Unicode properties are used  to  deter-         escapes  work  is changed so that Unicode properties are used to deter-
6340         mine  which  characters match. There are more details in the section on         mine which characters match. There are more details in the  section  on
6341         generic character types in the pcrepattern documentation.         generic character types in the pcrepattern documentation.
6342    
6343         7. Similarly, characters that match the POSIX named  character  classes         7.  Similarly,  characters that match the POSIX named character classes
6344         are all low-valued characters, unless the PCRE_UCP option is set.         are all low-valued characters, unless the PCRE_UCP option is set.
6345    
6346         8.  However,  the  horizontal  and vertical whitespace matching escapes         8. However, the horizontal and  vertical  whitespace  matching  escapes
6347         (\h, \H, \v, and \V) do match all the appropriate  Unicode  characters,         (\h,  \H,  \v, and \V) do match all the appropriate Unicode characters,
6348         whether or not PCRE_UCP is set.         whether or not PCRE_UCP is set.
6349    
6350         9.  Case-insensitive  matching  applies only to characters whose values         9. Case-insensitive matching applies only to  characters  whose  values
6351         are less than 128, unless PCRE is built with Unicode property  support.         are  less than 128, unless PCRE is built with Unicode property support.
6352         Even  when  Unicode  property support is available, PCRE still uses its         Even when Unicode property support is available, PCRE  still  uses  its
6353         own character tables when checking the case of  low-valued  characters,         own  character  tables when checking the case of low-valued characters,
6354         so  as not to degrade performance.  The Unicode property information is         so as not to degrade performance.  The Unicode property information  is
6355         used only for characters with higher values. Furthermore, PCRE supports         used only for characters with higher values. Furthermore, PCRE supports
6356         case-insensitive  matching  only  when  there  is  a one-to-one mapping         case-insensitive matching only  when  there  is  a  one-to-one  mapping
6357         between a letter's cases. There are a small number of many-to-one  map-         between  a letter's cases. There are a small number of many-to-one map-
6358         pings in Unicode; these are not supported by PCRE.         pings in Unicode; these are not supported by PCRE.
6359    
6360    
# Line 6220  AUTHOR Line 6367  AUTHOR
6367    
6368  REVISION  REVISION
6369    
6370         Last updated: 24 August 2011         Last updated: 06 September 2011
6371         Copyright (c) 1997-2011 University of Cambridge.         Copyright (c) 1997-2011 University of Cambridge.
6372  ------------------------------------------------------------------------------  ------------------------------------------------------------------------------
6373    
6374    
6375    PCREJIT(3)                                                          PCREJIT(3)
6376    
6377    
6378    NAME
6379           PCRE - Perl-compatible regular expressions
6380    
6381    
6382    PCRE JUST-IN-TIME COMPILER SUPPORT
6383    
6384           Just-in-time  compiling  is a heavyweight optimization that can greatly
6385           speed up pattern matching. However, it comes at the cost of extra  pro-
6386           cessing before the match is performed. Therefore, it is of most benefit
6387           when the same pattern is going to be matched many times. This does  not
6388           necessarily  mean  many  calls  of  pcre_exec();  if the pattern is not
6389           anchored, matching attempts may take place many times at various  posi-
6390           tions  in  the  subject,  even for a single call to pcre_exec(). If the
6391           subject string is very long, it may still pay to use  JIT  for  one-off
6392           matches.
6393    
6394           JIT   support  applies  only  to  the  traditional  matching  function,
6395           pcre_exec(). It does not apply when pcre_dfa_exec() is being used.  The
6396           code for this support was written by Zoltan Herczeg.
6397    
6398    
6399    AVAILABILITY OF JIT SUPPORT
6400    
6401           JIT  support  is  an  optional  feature of PCRE. The "configure" option
6402           --enable-jit (or equivalent CMake option) must  be  set  when  PCRE  is
6403           built  if  you want to use JIT. The support is limited to the following
6404           hardware platforms:
6405    
6406             ARM v5, v7, and Thumb2
6407             Intel x86 32-bit and 64-bit
6408             MIPS 32-bit
6409             Power PC 32-bit and 64-bit
6410    
6411           If --enable-jit is set on an unsupported platform, compilation fails.
6412    
6413           A program can tell if JIT support is available by calling pcre_config()
6414           with the PCRE_CONFIG_JIT option. The result is 1 when JIT is available,
6415           and 0 otherwise. However, a simple program does not need to check  this
6416           in order to use JIT. The API is implemented in a way that falls back to
6417           the ordinary PCRE code if JIT is not available.
6418    
6419    
6420    SIMPLE USE OF JIT
6421    
6422           You have to do two things to make use of the JIT support  in  the  sim-
6423           plest way:
6424    
6425             (1) Call pcre_study() with the PCRE_STUDY_JIT_COMPILE option for
6426                 each compiled pattern, and pass the resulting pcre_extra block to
6427                 pcre_exec().
6428    
6429             (2) Use pcre_free_study() to free the pcre_extra block when it is
6430                 no longer needed instead of just freeing it yourself. This
6431                 ensures that any JIT data is also freed.
6432    
6433           In  some circumstances you may need to call additional functions. These
6434           are described in the  section  entitled  "Controlling  the  JIT  stack"
6435           below.
6436    
6437           If JIT support is not available, PCRE_STUDY_JIT_COMPILE is ignored, and
6438           no JIT data is set up. Otherwise, the compiled pattern is passed to the
6439           JIT  compiler,  which  turns  it  into  machine code that executes much
6440           faster than the normal interpretive code. When pcre_exec() is passed  a
6441           pcre_extra  block  containing  a  pointer  to  JIT  code, it obeys that
6442           instead of the normal code. The result is identical, but the code  runs
6443           much faster.
6444    
6445           There  are some pcre_exec() options that are not supported for JIT exe-
6446           cution. There are also some  pattern  items  that  JIT  cannot  handle.
6447           Details  are  given below. In both cases, execution automatically falls
6448           back to the interpretive code.
6449    
6450           If the JIT compiler finds an unsupported item, no JIT  data  is  gener-
6451           ated.  You  can find out if JIT execution is available after studying a
6452           pattern by calling pcre_fullinfo() with  the  PCRE_INFO_JIT  option.  A
6453           result  of  1 means that JIT compilationw was successful. A result of 0
6454           means that JIT support is not available, or the pattern was not studied
6455           with PCRE_STUDY_JIT_COMPILE, or the JIT compiler was not able to handle
6456           the pattern.
6457    
6458    
6459    UNSUPPORTED OPTIONS AND PATTERN ITEMS
6460    
6461           The only pcre_exec() options that are supported for JIT  execution  are
6462           PCRE_NO_UTF8_CHECK,   PCRE_NOTBOL,   PCRE_NOTEOL,   PCRE_NOTEMPTY,  and
6463           PCRE_NOTEMPTY_ATSTART. Note in particular that partial matching is  not
6464           supported.
6465    
6466           The unsupported pattern items are:
6467    
6468             \C            match a single byte, even in UTF-8 mode
6469             (?Cn)          callouts
6470             (?(<name>)...  conditional test on setting of a named subpattern
6471             (?(R)...       conditional test on whole pattern recursion
6472             (?(Rn)...      conditional test on recursion, by number
6473             (?(R&name)...  conditional test on recursion, by name
6474             (*COMMIT)      )
6475             (*MARK)        )
6476             (*PRUNE)       ) the backtracking control verbs
6477             (*SKIP)        )
6478             (*THEN)        )
6479    
6480           Support for some of these may be added in future.
6481    
6482    
6483    RETURN VALUES FROM JIT EXECUTION
6484    
6485           When  a  pattern  is matched using JIT execution, the return values are
6486           the same as those given by the interpretive pcre_exec() code, with  the
6487           addition  of  one new error code: PCRE_ERROR_JIT_STACKLIMIT. This means
6488           that the memory used for the JIT stack was insufficient. See  "Control-
6489           ling the JIT stack" below for a discussion of JIT stack usage. For com-
6490           patibility with the interpretive pcre_exec() code, no  more  than  two-
6491           thirds  of  the ovector argument is used for passing back captured sub-
6492           strings.
6493    
6494           The error code PCRE_ERROR_MATCHLIMIT is returned by  the  JIT  code  if
6495           searching  a  very large pattern tree goes on for too long, as it is in
6496           the same circumstance when JIT is not used, but the details of  exactly
6497           what  is  counted are not the same. The PCRE_ERROR_RECURSIONLIMIT error
6498           code is never returned by JIT execution.
6499    
6500    
6501    SAVING AND RESTORING COMPILED PATTERNS
6502    
6503           The code that is generated by the  JIT  compiler  is  architecture-spe-
6504           cific,  and  is also position dependent. For those reasons it cannot be
6505           saved and restored like the bytecode and other data of a compiled  pat-
6506           tern.  You should be able run pcre_study() on a saved and restored pat-
6507           tern, and thereby recreate the JIT data, but  because  JIT  compilation
6508           uses significant resources, it is probably not worth doing this.
6509    
6510    
6511    CONTROLLING THE JIT STACK
6512    
6513           When the compiled JIT code runs, it needs a block of memory to use as a
6514           stack.  By default, it uses 32K on the  machine  stack.  However,  some
6515           large   or   complicated  patterns  need  more  than  this.  The  error
6516           PCRE_ERROR_JIT_STACKLIMIT is given when  there  is  not  enough  stack.
6517           Three  functions  are provided for managing blocks of memory for use as
6518           JIT stacks.
6519    
6520           The pcre_jit_stack_alloc() function creates a JIT stack. Its  arguments
6521           are  a starting size and a maximum size, and it returns a pointer to an
6522           opaque structure of type pcre_jit_stack, or NULL if there is an  error.
6523           The  pcre_jit_stack_free() function can be used to free a stack that is
6524           no longer needed. (For the technically minded:  the  address  space  is
6525           allocated by mmap or VirtualAlloc.)
6526    
6527           JIT  uses far less memory for recursion than the interpretive code, and
6528           a maximum stack size of 512K to 1M should be more than enough  for  any
6529           pattern.
6530    
6531           The  pcre_assign_jit_stack()  function  specifies  which stack JIT code
6532           should use. Its arguments are as follows:
6533    
6534             pcre_extra         *extra
6535             pcre_jit_callback  callback
6536             void               *data
6537    
6538           The extra argument must be  the  result  of  studying  a  pattern  with
6539           PCRE_STUDY_JIT_COMPILE.  There  are  three  cases for the values of the
6540           other two options:
6541    
6542             (1) If callback is NULL and data is NULL, an internal 32K block
6543                 on the machine stack is used.
6544    
6545             (2) If callback is NULL and data is not NULL, data must be
6546                 a valid JIT stack, the result of calling pcre_jit_stack_alloc().
6547    
6548             (3) If callback not NULL, it must point to a function that is called
6549                 with data as an argument at the start of matching, in order to
6550                 set up a JIT stack. If the result is NULL, the internal 32K stack
6551                 is used; otherwise the return value must be a valid JIT stack,
6552                 the result of calling pcre_jit_stack_alloc().
6553    
6554           You may safely assign the same JIT stack to more than one  pattern,  as
6555           long as they are all matched sequentially in the same thread. In a mul-
6556           tithread application, each thread must use its own JIT stack.
6557    
6558           Strictly speaking, even more is allowed. You can assign the same  stack
6559           to  any number of patterns as long as they are not used for matching by
6560           multiple threads at the same time. For example, you can assign the same
6561           stack  to all compiled patterns, and use a global mutex in the callback
6562           to wait until the stack is available for use. However, this is an inef-
6563           ficient solution, and not recommended.
6564    
6565           This  is  a  suggestion  for  how a typical multithreaded program might
6566           operate:
6567    
6568             During thread initalization
6569               thread_local_var = pcre_jit_stack_alloc(...)
6570    
6571             During thread exit
6572               pcre_jit_stack_free(thread_local_var)
6573    
6574             Use a one-line callback function
6575               return thread_local_var
6576    
6577           All the functions described in this section do nothing if  JIT  is  not
6578           available,  and  pcre_assign_jit_stack()  does nothing unless the extra
6579           argument is non-NULL and points to  a  pcre_extra  block  that  is  the
6580           result of a successful study with PCRE_STUDY_JIT_COMPILE.
6581    
6582    
6583    EXAMPLE CODE
6584    
6585           This  is  a  single-threaded example that specifies a JIT stack without
6586           using a callback.
6587    
6588             int rc;
6589             int ovector[30];
6590             pcre *re;
6591             pcre_extra *extra;
6592             pcre_jit_stack *jit_stack;
6593    
6594             re = pcre_compile(pattern, 0, &error, &erroffset, NULL);
6595             /* Check for errors */
6596             extra = pcre_study(re, PCRE_STUDY_JIT_COMPILE, &error);
6597             jit_stack = pcre_jit_stack_alloc(32*1024, 512*1024);
6598             /* Check for error (NULL) */
6599             pcre_assign_jit_stack(extra, NULL, jit_stack);
6600             rc = pcre_exec(re, extra, subject, length, 0, 0, ovector, 30);
6601             /* Check results */
6602             pcre_free(re);
6603             pcre_free_study(extra);
6604             pcre_jit_stack_free(jit_stack);
6605    
6606    
6607    SEE ALSO
6608    
6609           pcreapi(3)
6610    
6611    
6612    AUTHOR
6613    
6614           Philip Hazel
6615           University Computing Service
6616           Cambridge CB2 3QH, England.
6617    
6618    
6619    REVISION
6620    
6621           Last updated: 06 September 2011
6622           Copyright (c) 1997-2011 University of Cambridge.
6623  ------------------------------------------------------------------------------  ------------------------------------------------------------------------------
6624    
6625    
6626  PCREPARTIAL(3)                                                  PCREPARTIAL(3)  PCREPARTIAL(3)                                                  PCREPARTIAL(3)
6627    
6628    
# Line 6266  PARTIAL MATCHING IN PCRE Line 6661  PARTIAL MATCHING IN PCRE
6661         plete  match,  though the details differ between the two matching func-         plete  match,  though the details differ between the two matching func-
6662         tions. If both options are set, PCRE_PARTIAL_HARD takes precedence.         tions. If both options are set, PCRE_PARTIAL_HARD takes precedence.
6663    
6664         Setting a partial matching option disables two of PCRE's optimizations.         Setting a partial matching option for pcre_exec() disables the  use  of
6665         PCRE  remembers the last literal byte in a pattern, and abandons match-         any  just-in-time code that was set up by calling pcre_study() with the
6666         ing immediately if such a byte is not present in  the  subject  string.         PCRE_STUDY_JIT_COMPILE option. It also disables two of PCRE's  standard
6667         This  optimization cannot be used for a subject string that might match         optimizations.  PCRE  remembers the last literal byte in a pattern, and
6668         only partially. If the pattern was  studied,  PCRE  knows  the  minimum         abandons matching immediately if such a byte is not present in the sub-
6669         length  of  a  matching string, and does not bother to run the matching         ject string. This optimization cannot be used for a subject string that
6670         function on shorter strings. This optimization  is  also  disabled  for         might match only partially. If the pattern was studied, PCRE knows  the
6671         partial matching.         minimum  length  of  a  matching string, and does not bother to run the
6672           matching function on shorter strings. This optimization  is  also  dis-
6673           abled for partial matching.
6674    
6675    
6676  PARTIAL MATCHING USING pcre_exec()  PARTIAL MATCHING USING pcre_exec()
# Line 6643  AUTHOR Line 7040  AUTHOR
7040    
7041  REVISION  REVISION
7042    
7043         Last updated: 07 November 2010         Last updated: 26 August 2011
7044         Copyright (c) 1997-2010 University of Cambridge.         Copyright (c) 1997-2011 University of Cambridge.
7045  ------------------------------------------------------------------------------  ------------------------------------------------------------------------------
7046    
7047    
7048  PCREPRECOMPILE(3)                                            PCREPRECOMPILE(3)  PCREPRECOMPILE(3)                                            PCREPRECOMPILE(3)
7049    
7050    
# Line 6662  SAVING AND RE-USING PRECOMPILED PCRE PAT Line 7059  SAVING AND RE-USING PRECOMPILED PCRE PAT
7059         form  instead  of  having to compile them every time the application is         form  instead  of  having to compile them every time the application is
7060         run.  If you are not  using  any  private  character  tables  (see  the         run.  If you are not  using  any  private  character  tables  (see  the
7061         pcre_maketables()  documentation),  this is relatively straightforward.         pcre_maketables()  documentation),  this is relatively straightforward.
7062         If you are using private tables, it is a little bit more complicated.         If you are using private tables, it is a little bit  more  complicated.
7063           However,  if  you  are  using  the just-in-time optimization feature of
7064           pcre_study(), it is not possible to save and reload the JIT data.
7065    
7066         If you save compiled patterns to a file, you can copy them to a differ-         If you save compiled patterns to a file, you can copy them to a differ-
7067         ent  host  and  run them there. This works even if the new host has the         ent  host  and  run them there. This works even if the new host has the
# Line 6670  SAVING AND RE-USING PRECOMPILED PCRE PAT Line 7069  SAVING AND RE-USING PRECOMPILED PCRE PAT
7069         There  may  be a small performance penalty, but it should be insignifi-         There  may  be a small performance penalty, but it should be insignifi-
7070         cant. However, compiling regular expressions with one version  of  PCRE         cant. However, compiling regular expressions with one version  of  PCRE
7071         for  use  with  a  different  version is not guaranteed to work and may         for  use  with  a  different  version is not guaranteed to work and may
7072         cause crashes.         cause crashes, and saving and restoring a compiled  pattern  loses  any
7073           JIT optimization data.
7074    
7075    
7076  SAVING A COMPILED PATTERN  SAVING A COMPILED PATTERN
7077    
7078         The value returned by pcre_compile() points to a single block of memory         The value returned by pcre_compile() points to a single block of memory
7079         that  holds  the compiled pattern and associated data. You can find the         that holds the compiled pattern and associated data. You can  find  the
7080         length of this block in bytes by calling pcre_fullinfo() with an  argu-         length  of this block in bytes by calling pcre_fullinfo() with an argu-
7081         ment  of  PCRE_INFO_SIZE. You can then save the data in any appropriate         ment of PCRE_INFO_SIZE. You can then save the data in  any  appropriate
7082         manner. Here is sample code that compiles a pattern and writes it to  a         manner.  Here is sample code that compiles a pattern and writes it to a
7083         file. It assumes that the variable fd refers to a file that is open for         file. It assumes that the variable fd refers to a file that is open for
7084         output:         output:
7085    
# Line 6694  SAVING A COMPILED PATTERN Line 7094  SAVING A COMPILED PATTERN
7094           rc = fwrite(re, 1, size, fd);           rc = fwrite(re, 1, size, fd);
7095           if (rc != size) { ... handle errors ... }           if (rc != size) { ... handle errors ... }
7096    
7097         In this example, the bytes  that  comprise  the  compiled  pattern  are         In  this  example,  the  bytes  that  comprise the compiled pattern are
7098         copied  exactly.  Note that this is binary data that may contain any of         copied exactly. Note that this is binary data that may contain  any  of
7099         the 256 possible byte  values.  On  systems  that  make  a  distinction         the  256  possible  byte  values.  On  systems  that make a distinction
7100         between binary and non-binary data, be sure that the file is opened for         between binary and non-binary data, be sure that the file is opened for
7101         binary output.         binary output.
7102    
7103         If you want to write more than one pattern to a file, you will have  to         If  you want to write more than one pattern to a file, you will have to
7104         devise  a  way of separating them. For binary data, preceding each pat-         devise a way of separating them. For binary data, preceding  each  pat-
7105         tern with its length is probably  the  most  straightforward  approach.         tern  with  its  length  is probably the most straightforward approach.
7106         Another  possibility is to write out the data in hexadecimal instead of         Another possibility is to write out the data in hexadecimal instead  of
7107         binary, one pattern to a line.         binary, one pattern to a line.
7108    
7109         Saving compiled patterns in a file is only one possible way of  storing         Saving  compiled patterns in a file is only one possible way of storing
7110         them  for later use. They could equally well be saved in a database, or         them for later use. They could equally well be saved in a database,  or
7111         in the memory of some daemon process that passes them  via  sockets  to         in  the  memory  of some daemon process that passes them via sockets to
7112         the processes that want them.         the processes that want them.
7113    
7114         If  the pattern has been studied, it is also possible to save the study         If the pattern has been studied, it is also possible to save the normal
7115         data in a similar way to the compiled  pattern  itself.  When  studying         study data in a similar way to the compiled pattern itself. However, if
7116         generates  additional  information, pcre_study() returns a pointer to a         the PCRE_STUDY_JIT_COMPILE was used, the just-in-time data that is cre-
7117         pcre_extra data block. Its format is defined in the section on matching         ated  cannot  be saved because it is too dependent on the current envi-
7118         a  pattern in the pcreapi documentation. The study_data field points to         ronment. When studying generates additional  information,  pcre_study()
7119         the binary study data,  and  this  is  what  you  must  save  (not  the         returns  a pointer to a pcre_extra data block. Its format is defined in
7120         pcre_extra  block itself). The length of the study data can be obtained         the section on matching a pattern in  the  pcreapi  documentation.  The
7121         by calling pcre_fullinfo() with  an  argument  of  PCRE_INFO_STUDYSIZE.         study_data  field points to the binary study data, and this is what you
7122         Remember  to check that pcre_study() did return a non-NULL value before         must save (not the pcre_extra block itself). The length  of  the  study
7123         trying to save the study data.         data  can  be  obtained  by calling pcre_fullinfo() with an argument of
7124           PCRE_INFO_STUDYSIZE. Remember to check that pcre_study() did  return  a
7125           non-NULL value before trying to save the study data.
7126    
7127    
7128  RE-USING A PRECOMPILED PATTERN  RE-USING A PRECOMPILED PATTERN
7129    
7130         Re-using a precompiled pattern is straightforward. Having  reloaded  it         Re-using  a  precompiled pattern is straightforward. Having reloaded it
7131         into   main   memory,   you   pass   its   pointer  to  pcre_exec()  or         into  main  memory,  you   pass   its   pointer   to   pcre_exec()   or
7132         pcre_dfa_exec() in the usual way. This  should  work  even  on  another         pcre_dfa_exec()  in  the  usual  way.  This should work even on another
7133         host,  and  even  if  that  host has the opposite endianness to the one         host, and even if that host has the  opposite  endianness  to  the  one
7134         where the pattern was compiled.         where the pattern was compiled.
7135    
7136         However, if you passed a pointer to custom character  tables  when  the         However,  if  you  passed a pointer to custom character tables when the
7137         pattern  was  compiled  (the  tableptr argument of pcre_compile()), you         pattern was compiled (the tableptr  argument  of  pcre_compile()),  you
7138         must now pass a similar  pointer  to  pcre_exec()  or  pcre_dfa_exec(),         must  now  pass  a  similar  pointer to pcre_exec() or pcre_dfa_exec(),
7139         because  the  value  saved  with the compiled pattern will obviously be         because the value saved with the compiled  pattern  will  obviously  be
7140         nonsense. A field in a pcre_extra() block is used to pass this data, as         nonsense. A field in a pcre_extra() block is used to pass this data, as
7141         described  in the section on matching a pattern in the pcreapi documen-         described in the section on matching a pattern in the pcreapi  documen-
7142         tation.         tation.
7143    
7144         If you did not provide custom character tables  when  the  pattern  was         If  you  did  not  provide custom character tables when the pattern was
7145         compiled,  the  pointer  in  the compiled pattern is NULL, which causes         compiled, the pointer in the compiled pattern  is  NULL,  which  causes
7146         pcre_exec() to use PCRE's internal tables. Thus, you  do  not  need  to         pcre_exec()  to  use  PCRE's  internal tables. Thus, you do not need to
7147         take any special action at run time in this case.         take any special action at run time in this case.
7148    
7149         If  you  saved study data with the compiled pattern, you need to create         If you saved study data with the compiled pattern, you need  to  create
7150         your own pcre_extra data block and set the study_data field to point to         your own pcre_extra data block and set the study_data field to point to
7151         the  reloaded  study  data. You must also set the PCRE_EXTRA_STUDY_DATA         the reloaded study data. You must also  set  the  PCRE_EXTRA_STUDY_DATA
7152         bit in the flags field to indicate that study  data  is  present.  Then         bit  in  the  flags  field to indicate that study data is present. Then
7153         pass  the  pcre_extra  block  to  pcre_exec() or pcre_dfa_exec() in the         pass the pcre_extra block to  pcre_exec()  or  pcre_dfa_exec()  in  the
7154         usual way.         usual  way.  If  the pattern was studied for just-in-time optimization,
7155           that data cannot be saved, and so is lost by a save/restore cycle.
7156    
7157    
7158  COMPATIBILITY WITH DIFFERENT PCRE RELEASES  COMPATIBILITY WITH DIFFERENT PCRE RELEASES
# Line 6768  AUTHOR Line 7171  AUTHOR
7171    
7172  REVISION  REVISION
7173    
7174         Last updated: 17 November 2010         Last updated: 26 August 2011
7175         Copyright (c) 1997-2010 University of Cambridge.         Copyright (c) 1997-2011 University of Cambridge.
7176  ------------------------------------------------------------------------------  ------------------------------------------------------------------------------
7177    
7178    
7179  PCREPERFORM(3)                                                  PCREPERFORM(3)  PCREPERFORM(3)                                                  PCREPERFORM(3)
7180    
7181    
# Line 6939  REVISION Line 7342  REVISION
7342         Last updated: 16 May 2010         Last updated: 16 May 2010
7343         Copyright (c) 1997-2010 University of Cambridge.         Copyright (c) 1997-2010 University of Cambridge.
7344  ------------------------------------------------------------------------------  ------------------------------------------------------------------------------
7345    
7346    
7347  PCREPOSIX(3)                                                      PCREPOSIX(3)  PCREPOSIX(3)                                                      PCREPOSIX(3)
7348    
7349    
# Line 7202  REVISION Line 7605  REVISION
7605         Last updated: 16 May 2010         Last updated: 16 May 2010
7606         Copyright (c) 1997-2010 University of Cambridge.         Copyright (c) 1997-2010 University of Cambridge.
7607  ------------------------------------------------------------------------------  ------------------------------------------------------------------------------
7608    
7609    
7610  PCRECPP(3)                                                          PCRECPP(3)  PCRECPP(3)                                                          PCRECPP(3)
7611    
7612    
# Line 7544  REVISION Line 7947  REVISION
7947         Last updated: 17 March 2009         Last updated: 17 March 2009
7948         Minor typo fixed: 25 July 2011         Minor typo fixed: 25 July 2011
7949  ------------------------------------------------------------------------------  ------------------------------------------------------------------------------
7950    
7951    
7952  PCRESAMPLE(3)                                                    PCRESAMPLE(3)  PCRESAMPLE(3)                                                    PCRESAMPLE(3)
7953    
7954    
# Line 7679  REVISION Line 8082  REVISION
8082         Last updated: 24 August 2011         Last updated: 24 August 2011
8083         Copyright (c) 1997-2011 University of Cambridge.         Copyright (c) 1997-2011 University of Cambridge.
8084  ------------------------------------------------------------------------------  ------------------------------------------------------------------------------
8085    
8086    
8087  PCRESTACK(3)                                                      PCRESTACK(3)  PCRESTACK(3)                                                      PCRESTACK(3)
8088    
8089    
# Line 7706  PCRE DISCUSSION OF STACK USAGE Line 8109  PCRE DISCUSSION OF STACK USAGE
8109         result of the current call (a "tail recursion"), the function  is  just         result of the current call (a "tail recursion"), the function  is  just
8110         restarted instead.         restarted instead.
8111    
8112           The  above  comments apply when pcre_exec() is run in its normal inter-
8113           pretive manner. If the pattern was studied with the PCRE_STUDY_JIT_COM-
8114           PILE option, and just-in-time compiling was successful, and the options
8115           passed to pcre_exec() were not incompatible, the matching process  uses
8116           the  JIT-compiled  code  instead of the match() function. In this case,
8117           the memory requirements are handled entirely differently. See the pcre-
8118           jit documentation for details.
8119    
8120         The pcre_dfa_exec() function operates in an entirely different way, and         The pcre_dfa_exec() function operates in an entirely different way, and
8121         uses recursion only when there is a  regular  expression  recursion  or         uses recursion only when there is a  regular  expression  recursion  or
8122         subroutine  call in the pattern. This includes the processing of asser-         subroutine  call in the pattern. This includes the processing of asser-
# Line 7717  PCRE DISCUSSION OF STACK USAGE Line 8128  PCRE DISCUSSION OF STACK USAGE
8128         stack. At present, there is no protection against this.         stack. At present, there is no protection against this.
8129    
8130         The comments that follow do NOT apply to pcre_dfa_exec(); they are rel-         The comments that follow do NOT apply to pcre_dfa_exec(); they are rel-
8131         evant only for pcre_exec().         evant only for pcre_exec() without the JIT optimization.
8132    
8133     Reducing pcre_exec()'s stack usage     Reducing pcre_exec()'s stack usage
8134    
# Line 7829  AUTHOR Line 8240  AUTHOR
8240    
8241  REVISION  REVISION
8242    
8243         Last updated: 22 July 2011         Last updated: 26 August 2011
8244         Copyright (c) 1997-2011 University of Cambridge.         Copyright (c) 1997-2011 University of Cambridge.
8245  ------------------------------------------------------------------------------  ------------------------------------------------------------------------------
8246    
8247    

Legend:
Removed from v.678  
changed lines
  Added in v.691

  ViewVC Help
Powered by ViewVC 1.1.5