/[pcre]/code/trunk/pcre_newline.c
ViewVC logotype

Diff of /code/trunk/pcre_newline.c

Parent Directory Parent Directory | Revision Log Revision Log | View Patch Patch

revision 149 by ph10, Mon Apr 16 15:28:08 2007 UTC revision 1033 by ph10, Mon Sep 10 11:02:48 2012 UTC
# Line 6  Line 6 
6  and semantics are as close as possible to those of the Perl 5 language.  and semantics are as close as possible to those of the Perl 5 language.
7    
8                         Written by Philip Hazel                         Written by Philip Hazel
9             Copyright (c) 1997-2007 University of Cambridge             Copyright (c) 1997-2012 University of Cambridge
10    
11  -----------------------------------------------------------------------------  -----------------------------------------------------------------------------
12  Redistribution and use in source and binary forms, with or without  Redistribution and use in source and binary forms, with or without
# Line 38  POSSIBILITY OF SUCH DAMAGE. Line 38  POSSIBILITY OF SUCH DAMAGE.
38  */  */
39    
40    
41  /* This module contains internal functions for testing newlines when more than  /* This module contains internal functions for testing newlines when more than
42  one kind of newline is to be recognized. When a newline is found, its length is  one kind of newline is to be recognized. When a newline is found, its length is
43  returned. In principle, we could implement several newline "types", each  returned. In principle, we could implement several newline "types", each
44  referring to a different set of newline characters. At present, PCRE supports  referring to a different set of newline characters. At present, PCRE supports
45  only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,  only NLTYPE_FIXED, which gets handled without these functions, NLTYPE_ANYCRLF,
46  and NLTYPE_ANY. The full list of Unicode newline characters is taken from  and NLTYPE_ANY. The full list of Unicode newline characters is taken from
47  http://unicode.org/unicode/reports/tr18/. */  http://unicode.org/unicode/reports/tr18/. */
48    
49    
50    #ifdef HAVE_CONFIG_H
51    #include "config.h"
52    #endif
53    
54  #include "pcre_internal.h"  #include "pcre_internal.h"
55    
56    
# Line 56  http://unicode.org/unicode/reports/tr18/ Line 60  http://unicode.org/unicode/reports/tr18/
60  *************************************************/  *************************************************/
61    
62  /* It is guaranteed that the initial value of ptr is less than the end of the  /* It is guaranteed that the initial value of ptr is less than the end of the
63  string that is being processed.  string that is being processed.
64    
65  Arguments:  Arguments:
66    ptr          pointer to possible newline    ptr          pointer to possible newline
67    type         the newline type    type         the newline type
68    endptr       pointer to the end of the string    endptr       pointer to the end of the string
69    lenptr       where to return the length    lenptr       where to return the length
70    utf8         TRUE if in utf8 mode    utf          TRUE if in utf mode
71    
72  Returns:       TRUE or FALSE  Returns:       TRUE or FALSE
73  */  */
74    
75  BOOL  BOOL
76  _pcre_is_newline(const uschar *ptr, int type, const uschar *endptr,  PRIV(is_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR endptr, int *lenptr,
77    int *lenptr, BOOL utf8)    BOOL utf)
78  {  {
79  int c;  int c;
80  if (utf8) { GETCHAR(c, ptr); } else c = *ptr;  (void)utf;
81    #ifdef SUPPORT_UTF
82    if (utf)
83      {
84      GETCHAR(c, ptr);
85      }
86    else
87    #endif  /* SUPPORT_UTF */
88      c = *ptr;
89    
90    /* Note that this function is called only for ANY or ANYCRLF. */
91    
92  if (type == NLTYPE_ANYCRLF) switch(c)  if (type == NLTYPE_ANYCRLF) switch(c)
93    {    {
94    case 0x000a: *lenptr = 1; return TRUE;             /* LF */    case CHAR_LF: *lenptr = 1; return TRUE;
95    case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;    case CHAR_CR: *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
96                 return TRUE;                          /* CR */                 return TRUE;
97    default: return FALSE;    default: return FALSE;
98    }    }
99    
# Line 87  if (type == NLTYPE_ANYCRLF) switch(c) Line 101  if (type == NLTYPE_ANYCRLF) switch(c)
101    
102  else switch(c)  else switch(c)
103    {    {
104    case 0x000a:                                       /* LF */  #ifdef EBCDIC
105    case 0x000b:                                       /* VT */    case CHAR_NEL:
106    case 0x000c: *lenptr = 1; return TRUE;             /* FF */  #endif
107    case 0x000d: *lenptr = (ptr < endptr - 1 && ptr[1] == 0x0a)? 2 : 1;    case CHAR_LF:
108                 return TRUE;                          /* CR */    case CHAR_VT:
109    case 0x0085: *lenptr = utf8? 2 : 1; return TRUE;   /* NEL */    case CHAR_FF: *lenptr = 1; return TRUE;
110    
111      case CHAR_CR:
112      *lenptr = (ptr < endptr - 1 && ptr[1] == CHAR_LF)? 2 : 1;
113      return TRUE;
114    
115    #ifndef EBCDIC
116    #ifdef COMPILE_PCRE8
117      case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE;
118    case 0x2028:                                       /* LS */    case 0x2028:                                       /* LS */
119    case 0x2029: *lenptr = 3; return TRUE;             /* PS */    case 0x2029: *lenptr = 3; return TRUE;             /* PS */
120    #else   /* 16-bit (can't be EBCDIC) */
121      case CHAR_NEL:
122      case 0x2028:                                       /* LS */
123      case 0x2029: *lenptr = 1; return TRUE;             /* PS */
124    #endif  /* COMPILE_PCRE8 */
125    #endif  /* Not EBCDIC */
126    
127    default: return FALSE;    default: return FALSE;
128    }    }
129  }  }
# Line 113  Arguments: Line 142  Arguments:
142    type         the newline type    type         the newline type
143    startptr     pointer to the start of the string    startptr     pointer to the start of the string
144    lenptr       where to return the length    lenptr       where to return the length
145    utf8         TRUE if in utf8 mode    utf          TRUE if in utf mode
146    
147  Returns:       TRUE or FALSE  Returns:       TRUE or FALSE
148  */  */
149    
150  BOOL  BOOL
151  _pcre_was_newline(const uschar *ptr, int type, const uschar *startptr,  PRIV(was_newline)(PCRE_PUCHAR ptr, int type, PCRE_PUCHAR startptr, int *lenptr,
152    int *lenptr, BOOL utf8)    BOOL utf)
153  {  {
154  int c;  int c;
155    (void)utf;
156  ptr--;  ptr--;
157  if (utf8)  #ifdef SUPPORT_UTF
158    if (utf)
159    {    {
160    BACKCHAR(ptr);    BACKCHAR(ptr);
161    GETCHAR(c, ptr);    GETCHAR(c, ptr);
162    }    }
163  else c = *ptr;  else
164    #endif  /* SUPPORT_UTF */
165      c = *ptr;
166    
167    /* Note that this function is called only for ANY or ANYCRLF. */
168    
169  if (type == NLTYPE_ANYCRLF) switch(c)  if (type == NLTYPE_ANYCRLF) switch(c)
170    {    {
171    case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;    case CHAR_LF:
172                 return TRUE;                         /* LF */    *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
173    case 0x000d: *lenptr = 1; return TRUE;            /* CR */    return TRUE;
174    
175      case CHAR_CR: *lenptr = 1; return TRUE;
176    default: return FALSE;    default: return FALSE;
177    }    }
178    
179    /* NLTYPE_ANY */
180    
181  else switch(c)  else switch(c)
182    {    {
183    case 0x000a: *lenptr = (ptr > startptr && ptr[-1] == 0x0d)? 2 : 1;    case CHAR_LF:
184                 return TRUE;                         /* LF */    *lenptr = (ptr > startptr && ptr[-1] == CHAR_CR)? 2 : 1;
185    case 0x000b:                                      /* VT */    return TRUE;
186    case 0x000c:                                      /* FF */  
187    case 0x000d: *lenptr = 1; return TRUE;            /* CR */  #ifdef EBCDIC
188    case 0x0085: *lenptr = utf8? 2 : 1; return TRUE;  /* NEL */    case CHAR_NEL:
189    case 0x2028:                                      /* LS */  #endif
190    case 0x2029: *lenptr = 3; return TRUE;            /* PS */    case CHAR_VT:
191      case CHAR_FF:
192      case CHAR_CR: *lenptr = 1; return TRUE;
193    
194    #ifndef EBCDIC
195    #ifdef COMPILE_PCRE8
196      case CHAR_NEL: *lenptr = utf? 2 : 1; return TRUE;
197      case 0x2028:                                       /* LS */
198      case 0x2029: *lenptr = 3; return TRUE;             /* PS */
199    #else
200      case CHAR_NEL:
201      case 0x2028:                                       /* LS */
202      case 0x2029: *lenptr = 1; return TRUE;             /* PS */
203    #endif  /* COMPILE_PCRE8 */
204    #endif  /* NotEBCDIC */
205    
206    default: return FALSE;    default: return FALSE;
207    }    }
208  }  }

Legend:
Removed from v.149  
changed lines
  Added in v.1033

  ViewVC Help
Powered by ViewVC 1.1.5