/[pcre]/code/trunk/doc/pcresyntax.3
ViewVC logotype

Diff of /code/trunk/doc/pcresyntax.3

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 345 by ph10, Mon Apr 28 15:10:02 2008 UTC revision 412 by ph10, Sat Apr 11 10:34:37 2009 UTC
# Line 120  Braille, Line 120  Braille,
120  Buginese,  Buginese,
121  Buhid,  Buhid,
122  Canadian_Aboriginal,  Canadian_Aboriginal,
123    Carian,
124    Cham,
125  Cherokee,  Cherokee,
126  Common,  Common,
127  Coptic,  Coptic,
# Line 143  Hiragana, Line 145  Hiragana,
145  Inherited,  Inherited,
146  Kannada,  Kannada,
147  Katakana,  Katakana,
148    Kayah_Li,
149  Kharoshthi,  Kharoshthi,
150  Khmer,  Khmer,
151  Lao,  Lao,
152  Latin,  Latin,
153    Lepcha,
154  Limbu,  Limbu,
155  Linear_B,  Linear_B,
156    Lycian,
157    Lydian,
158  Malayalam,  Malayalam,
159  Mongolian,  Mongolian,
160  Myanmar,  Myanmar,
# Line 157  Nko, Line 163  Nko,
163  Ogham,  Ogham,
164  Old_Italic,  Old_Italic,
165  Old_Persian,  Old_Persian,
166    Ol_Chiki,
167  Oriya,  Oriya,
168  Osmanya,  Osmanya,
169  Phags_Pa,  Phags_Pa,
170  Phoenician,  Phoenician,
171    Rejang,
172  Runic,  Runic,
173    Saurashtra,
174  Shavian,  Shavian,
175  Sinhala,  Sinhala,
176    Sudanese,
177  Syloti_Nagri,  Syloti_Nagri,
178  Syriac,  Syriac,
179  Tagalog,  Tagalog,
# Line 176  Thai, Line 186  Thai,
186  Tibetan,  Tibetan,
187  Tifinagh,  Tifinagh,
188  Ugaritic,  Ugaritic,
189    Vai,
190  Yi.  Yi.
191  .  .
192  .  .
# Line 231  In PCRE, POSIX character set names recog Line 242  In PCRE, POSIX character set names recog
242  .SH "ANCHORS AND SIMPLE ASSERTIONS"  .SH "ANCHORS AND SIMPLE ASSERTIONS"
243  .rs  .rs
244  .sp  .sp
245    \eb          word boundary    \eb          word boundary (only ASCII letters recognized)
246    \eB          not a word boundary    \eB          not a word boundary
247    ^           start of subject    ^           start of subject
248                 also after internal newline in multiline mode                 also after internal newline in multiline mode
# Line 260  In PCRE, POSIX character set names recog Line 271  In PCRE, POSIX character set names recog
271  .SH "CAPTURING"  .SH "CAPTURING"
272  .rs  .rs
273  .sp  .sp
274    (...)          capturing group    (...)           capturing group
275    (?<name>...)   named capturing group (Perl)    (?<name>...)    named capturing group (Perl)
276    (?'name'...)   named capturing group (Perl)    (?'name'...)    named capturing group (Perl)
277    (?P<name>...)  named capturing group (Python)    (?P<name>...)   named capturing group (Python)
278    (?:...)        non-capturing group    (?:...)         non-capturing group
279    (?|...)        non-capturing group; reset group numbers for    (?|...)         non-capturing group; reset group numbers for
280                    capturing groups in each alternative                     capturing groups in each alternative
281  .  .
282  .  .
283  .SH "ATOMIC GROUPS"  .SH "ATOMIC GROUPS"
284  .rs  .rs
285  .sp  .sp
286    (?>...)        atomic, non-capturing group    (?>...)         atomic, non-capturing group
287  .  .
288  .  .
289  .  .
# Line 280  In PCRE, POSIX character set names recog Line 291  In PCRE, POSIX character set names recog
291  .SH "COMMENT"  .SH "COMMENT"
292  .rs  .rs
293  .sp  .sp
294    (?#....)       comment (not nestable)    (?#....)        comment (not nestable)
295  .  .
296  .  .
297  .SH "OPTION SETTING"  .SH "OPTION SETTING"
298  .rs  .rs
299  .sp  .sp
300    (?i)           caseless    (?i)            caseless
301    (?J)           allow duplicate names    (?J)            allow duplicate names
302    (?m)           multiline    (?m)            multiline
303    (?s)           single line (dotall)    (?s)            single line (dotall)
304    (?U)           default ungreedy (lazy)    (?U)            default ungreedy (lazy)
305    (?x)           extended (ignore white space)    (?x)            extended (ignore white space)
306    (?-...)        unset option(s)    (?-...)         unset option(s)
307    .sp
308    The following is recognized only at the start of a pattern or after one of the
309    newline-setting options with similar syntax:
310    .sp
311      (*UTF8)         set UTF-8 mode
312  .  .
313  .  .
314  .SH "LOOKAHEAD AND LOOKBEHIND ASSERTIONS"  .SH "LOOKAHEAD AND LOOKBEHIND ASSERTIONS"
315  .rs  .rs
316  .sp  .sp
317    (?=...)        positive look ahead    (?=...)         positive look ahead
318    (?!...)        negative look ahead    (?!...)         negative look ahead
319    (?<=...)       positive look behind    (?<=...)        positive look behind
320    (?<!...)       negative look behind    (?<!...)        negative look behind
321  .sp  .sp
322  Each top-level branch of a look behind must be of a fixed length.  Each top-level branch of a look behind must be of a fixed length.
323  .  .
# Line 309  Each top-level branch of a look behind m Line 325  Each top-level branch of a look behind m
325  .SH "BACKREFERENCES"  .SH "BACKREFERENCES"
326  .rs  .rs
327  .sp  .sp
328    \en             reference by number (can be ambiguous)    \en              reference by number (can be ambiguous)
329    \egn            reference by number    \egn             reference by number
330    \eg{n}          reference by number    \eg{n}           reference by number
331    \eg{-n}         relative reference by number    \eg{-n}          relative reference by number
332    \ek<name>       reference by name (Perl)    \ek<name>        reference by name (Perl)
333    \ek'name'       reference by name (Perl)    \ek'name'        reference by name (Perl)
334    \eg{name}       reference by name (Perl)    \eg{name}        reference by name (Perl)
335    \ek{name}       reference by name (.NET)    \ek{name}        reference by name (.NET)
336    (?P=name)      reference by name (Python)    (?P=name)       reference by name (Python)
337  .  .
338  .  .
339  .SH "SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)"  .SH "SUBROUTINE REFERENCES (POSSIBLY RECURSIVE)"
340  .rs  .rs
341  .sp  .sp
342    (?R)           recurse whole pattern    (?R)            recurse whole pattern
343    (?n)           call subpattern by absolute number    (?n)            call subpattern by absolute number
344    (?+n)          call subpattern by relative number    (?+n)           call subpattern by relative number
345    (?-n)          call subpattern by relative number    (?-n)           call subpattern by relative number
346    (?&name)       call subpattern by name (Perl)    (?&name)        call subpattern by name (Perl)
347    (?P>name)      call subpattern by name (Python)    (?P>name)       call subpattern by name (Python)
348    \eg<name>       call subpattern by name (Oniguruma)    \eg<name>        call subpattern by name (Oniguruma)
349    \eg'name'       call subpattern by name (Oniguruma)    \eg'name'        call subpattern by name (Oniguruma)
350    \eg<n>          call subpattern by absolute number (Oniguruma)    \eg<n>           call subpattern by absolute number (Oniguruma)
351    \eg'n'          call subpattern by absolute number (Oniguruma)    \eg'n'           call subpattern by absolute number (Oniguruma)
352    \eg<+n>         call subpattern by relative number (PCRE extension)    \eg<+n>          call subpattern by relative number (PCRE extension)
353    \eg'+n'         call subpattern by relative number (PCRE extension)    \eg'+n'          call subpattern by relative number (PCRE extension)
354    \eg<-n>         call subpattern by relative number (PCRE extension)    \eg<-n>          call subpattern by relative number (PCRE extension)
355    \eg'-n'         call subpattern by relative number (PCRE extension)    \eg'-n'          call subpattern by relative number (PCRE extension)
356  .  .
357  .  .
358  .SH "CONDITIONAL PATTERNS"  .SH "CONDITIONAL PATTERNS"
# Line 345  Each top-level branch of a look behind m Line 361  Each top-level branch of a look behind m
361    (?(condition)yes-pattern)    (?(condition)yes-pattern)
362    (?(condition)yes-pattern|no-pattern)    (?(condition)yes-pattern|no-pattern)
363  .sp  .sp
364    (?(n)...       absolute reference condition    (?(n)...        absolute reference condition
365    (?(+n)...      relative reference condition    (?(+n)...       relative reference condition
366    (?(-n)...      relative reference condition    (?(-n)...       relative reference condition
367    (?(<name>)...  named reference condition (Perl)    (?(<name>)...   named reference condition (Perl)
368    (?('name')...  named reference condition (Perl)    (?('name')...   named reference condition (Perl)
369    (?(name)...    named reference condition (PCRE)    (?(name)...     named reference condition (PCRE)
370    (?(R)...       overall recursion condition    (?(R)...        overall recursion condition
371    (?(Rn)...      specific group recursion condition    (?(Rn)...       specific group recursion condition
372    (?(R&name)...  specific recursion condition    (?(R&name)...   specific recursion condition
373    (?(DEFINE)...  define subpattern for reference    (?(DEFINE)...   define subpattern for reference
374    (?(assert)...  assertion condition    (?(assert)...   assertion condition
375  .  .
376  .  .
377  .SH "BACKTRACKING CONTROL"  .SH "BACKTRACKING CONTROL"
# Line 363  Each top-level branch of a look behind m Line 379  Each top-level branch of a look behind m
379  .sp  .sp
380  The following act immediately they are reached:  The following act immediately they are reached:
381  .sp  .sp
382    (*ACCEPT)      force successful match    (*ACCEPT)       force successful match
383    (*FAIL)        force backtrack; synonym (*F)    (*FAIL)         force backtrack; synonym (*F)
384  .sp  .sp
385  The following act only when a subsequent match failure causes a backtrack to  The following act only when a subsequent match failure causes a backtrack to
386  reach them. They all force a match failure, but they differ in what happens  reach them. They all force a match failure, but they differ in what happens
387  afterwards. Those that advance the start-of-match point do so only if the  afterwards. Those that advance the start-of-match point do so only if the
388  pattern is not anchored.  pattern is not anchored.
389  .sp  .sp
390    (*COMMIT)      overall failure, no advance of starting point    (*COMMIT)       overall failure, no advance of starting point
391    (*PRUNE)       advance to next starting character    (*PRUNE)        advance to next starting character
392    (*SKIP)        advance start to current matching position    (*SKIP)         advance start to current matching position
393    (*THEN)        local failure, backtrack to next alternation    (*THEN)         local failure, backtrack to next alternation
394  .  .
395  .  .
396  .SH "NEWLINE CONVENTIONS"  .SH "NEWLINE CONVENTIONS"
397  .rs  .rs
398  .sp  .sp
399  These are recognized only at the very start of the pattern or after a  These are recognized only at the very start of the pattern or after a
400  (*BSR_...) option.  (*BSR_...) or (*UTF8) option.
401  .sp  .sp
402    (*CR)    (*CR)           carriage return only
403    (*LF)    (*LF)           linefeed only
404    (*CRLF)    (*CRLF)         carriage return followed by linefeed
405    (*ANYCRLF)    (*ANYCRLF)      all three of the above
406    (*ANY)    (*ANY)          any Unicode newline sequence
407  .  .
408  .  .
409  .SH "WHAT \eR MATCHES"  .SH "WHAT \eR MATCHES"
410  .rs  .rs
411  .sp  .sp
412  These are recognized only at the very start of the pattern or after a  These are recognized only at the very start of the pattern or after a
413  (*...) option that sets the newline convention.  (*...) option that sets the newline convention or UTF-8 mode.
414  .sp  .sp
415    (*BSR_ANYCRLF)    (*BSR_ANYCRLF)  CR, LF, or CRLF
416    (*BSR_UNICODE)    (*BSR_UNICODE)  any Unicode newline sequence
417  .  .
418  .  .
419  .SH "CALLOUTS"  .SH "CALLOUTS"
# Line 428  Cambridge CB2 3QH, England. Line 444  Cambridge CB2 3QH, England.
444  .rs  .rs
445  .sp  .sp
446  .nf  .nf
447  Last updated: 09 April 2008  Last updated: 11 April 2009
448  Copyright (c) 1997-2008 University of Cambridge.  Copyright (c) 1997-2009 University of Cambridge.
449  .fi  .fi

Legend:
Removed from v.345  
changed lines
  Added in v.412

  ViewVC Help
Powered by ViewVC 1.1.5