/[pcre]/code/trunk/pcre_newline.c
ViewVC logotype

Diff of /code/trunk/pcre_newline.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 117 by ph10, Fri Mar 9 15:59:06 2007 UTC revision 1033 by ph10, Mon Sep 10 11:02:48 2012 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 42  POSSIBILITY OF SUCH DAMAGE. Line 42  POSSIBILITY OF SUCH DAMAGE.
42  one kind of newline is to be recognized. When a newline is found, its length is  one kind of newline is to be recognized. When a newline is found, its length is
43  returned. In principle, we could implement several newline "types", each  returned. In principle, we could implement several newline "types", each
44  referring to a different set of newline characters. At present, PCRE supports  referring to a different set of newline characters. At present, PCRE supports
45  only NLTYPE_FIXED, which gets handled without these functions, and NLTYPE_ALL,  only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
46  so for now the type isn't passed into the functions. It can easily be added  and NLTYPE_ANY. The full list of Unicode newline characters is taken from
 later if required. The full list of Unicode newline characters is taken from  
47  http://unicode.org/unicode/reports/tr18/. */  http://unicode.org/unicode/reports/tr18/. */
48    
49    
50    #ifdef HAVE_CONFIG_H
51    #include "config.h"
52    #endif
53    
54  #include "pcre_internal.h"  #include "pcre_internal.h"
55    
56    
# Line 57  http://unicode.org/unicode/reports/tr18/ Line 60  http://unicode.org/unicode/reports/tr18/
60  *************************************************/  *************************************************/
61    
62  /* It is guaranteed that the initial value of ptr is less than the end of the  /* It is guaranteed that the initial value of ptr is less than the end of the
63  string that is being processed.  string that is being processed.
64    
65  Arguments:  Arguments:
66    ptr          pointer to possible newline    ptr          pointer to possible newline
67      type         the newline type
68    endptr       pointer to the end of the string    endptr       pointer to the end of the string
69    lenptr       where to return the length    lenptr       where to return the length
70    utf8         TRUE if in utf8 mode    utf          TRUE if in utf mode
71    
72  Returns:       TRUE or FALSE  Returns:       TRUE or FALSE
73  */  */
74    
75  BOOL  BOOL
76  _pcre_is_newline(const uschar *ptr, const uschar *endptr, int *lenptr,  PRIV(is_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR endptr, int *lenptr,
77    BOOL utf8)    BOOL utf)
78  {  {
79  int c;  int c;
80  if (utf8) { GETCHAR(c, ptr); } else c = *ptr;  (void)utf;
81  switch(c)  #ifdef SUPPORT_UTF
82    if (utf)
83      {
84      GETCHAR(c, ptr);
85      }
86    else
87    #endif  /* SUPPORT_UTF */
88      c = *ptr;
89    
90    /* Note that this function is called only for ANY or ANYCRLF. */
91    
92    if (type == NLTYPE_ANYCRLF) switch(c)
93      {
94      case CHAR_LF: *lenptr = 1; return TRUE;
95      case CHAR_CR: *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
96                   return TRUE;
97      default: return FALSE;
98      }
99    
100    /* NLTYPE_ANY */
101    
102    else switch(c)
103    {    {
104    case 0x000a:                                       /* LF */  #ifdef EBCDIC
105    case 0x000b:                                       /* VT */    case CHAR_NEL:
106    case 0x000c: *lenptr = 1; return TRUE;             /* FF */  #endif
107    case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;    case CHAR_LF:
108                 return TRUE;                          /* CR */    case CHAR_VT:
109    case 0x0085: *lenptr = utf8? 2 : 1; return TRUE;   /* NEL */    case CHAR_FF: *lenptr = 1; return TRUE;
110    
111      case CHAR_CR:
112      *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
113      return TRUE;
114    
115    #ifndef EBCDIC
116    #ifdef COMPILE_PCRE8
117      case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE;
118    case 0x2028:                                       /* LS */    case 0x2028:                                       /* LS */
119    case 0x2029: *lenptr = 3; return TRUE;             /* PS */    case 0x2029: *lenptr = 3; return TRUE;             /* PS */
120    #else   /* 16-bit (can't be EBCDIC) */
121      case CHAR_NEL:
122      case 0x2028:                                       /* LS */
123      case 0x2029: *lenptr = 1; return TRUE;             /* PS */
124    #endif  /* COMPILE_PCRE8 */
125    #endif  /* Not EBCDIC */
126    
127    default: return FALSE;    default: return FALSE;
128    }    }
129  }  }
# Line 99  the string that is being processed. Line 139  the string that is being processed.
139    
140  Arguments:  Arguments:
141    ptr          pointer to possible newline    ptr          pointer to possible newline
142      type         the newline type
143    startptr     pointer to the start of the string    startptr     pointer to the start of the string
144    lenptr       where to return the length    lenptr       where to return the length
145    utf8         TRUE if in utf8 mode    utf          TRUE if in utf mode
146    
147  Returns:       TRUE or FALSE  Returns:       TRUE or FALSE
148  */  */
149    
150  BOOL  BOOL
151  _pcre_was_newline(const uschar *ptr, const uschar *startptr, int *lenptr,  PRIV(was_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR startptr, int *lenptr,
152    BOOL utf8)    BOOL utf)
153  {  {
154  int c;  int c;
155    (void)utf;
156  ptr--;  ptr--;
157  if (utf8)  #ifdef SUPPORT_UTF
158    if (utf)
159    {    {
160    BACKCHAR(ptr);    BACKCHAR(ptr);
161    GETCHAR(c, ptr);    GETCHAR(c, ptr);
162    }    }
163  else c = *ptr;  else
164  switch(c)  #endif  /* SUPPORT_UTF */
165      c = *ptr;
166    
167    /* Note that this function is called only for ANY or ANYCRLF. */
168    
169    if (type == NLTYPE_ANYCRLF) switch(c)
170    {    {
171    case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;    case CHAR_LF:
172                 return TRUE;                         /* LF */    *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
173    case 0x000b:                                      /* VT */    return TRUE;
174    case 0x000c:                                      /* FF */  
175    case 0x000d: *lenptr = 1; return TRUE;            /* CR */    case CHAR_CR: *lenptr = 1; return TRUE;
176    case 0x0085: *lenptr = utf8? 2 : 1; return TRUE;  /* NEL */    default: return FALSE;
177    case 0x2028:                                      /* LS */    }
178    case 0x2029: *lenptr = 3; return TRUE;            /* PS */  
179    /* NLTYPE_ANY */
180    
181    else switch(c)
182      {
183      case CHAR_LF:
184      *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
185      return TRUE;
186    
187    #ifdef EBCDIC
188      case CHAR_NEL:
189    #endif
190      case CHAR_VT:
191      case CHAR_FF:
192      case CHAR_CR: *lenptr = 1; return TRUE;
193    
194    #ifndef EBCDIC
195    #ifdef COMPILE_PCRE8
196      case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE;
197      case 0x2028:                                       /* LS */
198      case 0x2029: *lenptr = 3; return TRUE;             /* PS */
199    #else
200      case CHAR_NEL:
201      case 0x2028:                                       /* LS */
202      case 0x2029: *lenptr = 1; return TRUE;             /* PS */
203    #endif  /* COMPILE_PCRE8 */
204    #endif  /* NotEBCDIC */
205    
206    default: return FALSE;    default: return FALSE;
207    }    }
208  }  }

Legend:
Removed from v.117  
changed lines
  Added in v.1033

  ViewVC Help
Powered by ViewVC 1.1.5