1 |
// Copyright (c) 2005, Google Inc.
|
2 |
// All rights reserved.
|
3 |
//
|
4 |
// Redistribution and use in source and binary forms, with or without
|
5 |
// modification, are permitted provided that the following conditions are
|
6 |
// met:
|
7 |
//
|
8 |
// * Redistributions of source code must retain the above copyright
|
9 |
// notice, this list of conditions and the following disclaimer.
|
10 |
// * Redistributions in binary form must reproduce the above
|
11 |
// copyright notice, this list of conditions and the following disclaimer
|
12 |
// in the documentation and/or other materials provided with the
|
13 |
// distribution.
|
14 |
// * Neither the name of Google Inc. nor the names of its
|
15 |
// contributors may be used to endorse or promote products derived from
|
16 |
// this software without specific prior written permission.
|
17 |
//
|
18 |
// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
19 |
// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
20 |
// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
21 |
// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
22 |
// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
23 |
// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
24 |
// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
25 |
// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
26 |
// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
27 |
// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
28 |
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
29 |
//
|
30 |
// Author: Sanjay Ghemawat
|
31 |
|
32 |
#ifdef HAVE_CONFIG_H
|
33 |
#include "config.h"
|
34 |
#endif
|
35 |
|
36 |
#include <vector>
|
37 |
#include <assert.h>
|
38 |
|
39 |
#include "pcrecpp_internal.h"
|
40 |
#include "pcre_scanner.h"
|
41 |
|
42 |
using std::vector;
|
43 |
|
44 |
namespace pcrecpp {
|
45 |
|
46 |
Scanner::Scanner()
|
47 |
: data_(),
|
48 |
input_(data_),
|
49 |
skip_(NULL),
|
50 |
should_skip_(false),
|
51 |
skip_repeat_(false),
|
52 |
save_comments_(false),
|
53 |
comments_(NULL),
|
54 |
comments_offset_(0) {
|
55 |
}
|
56 |
|
57 |
Scanner::Scanner(const string& in)
|
58 |
: data_(in),
|
59 |
input_(data_),
|
60 |
skip_(NULL),
|
61 |
should_skip_(false),
|
62 |
skip_repeat_(false),
|
63 |
save_comments_(false),
|
64 |
comments_(NULL),
|
65 |
comments_offset_(0) {
|
66 |
}
|
67 |
|
68 |
Scanner::~Scanner() {
|
69 |
delete skip_;
|
70 |
delete comments_;
|
71 |
}
|
72 |
|
73 |
void Scanner::SetSkipExpression(const char* re) {
|
74 |
delete skip_;
|
75 |
if (re != NULL) {
|
76 |
skip_ = new RE(re);
|
77 |
should_skip_ = true;
|
78 |
skip_repeat_ = true;
|
79 |
ConsumeSkip();
|
80 |
} else {
|
81 |
skip_ = NULL;
|
82 |
should_skip_ = false;
|
83 |
skip_repeat_ = false;
|
84 |
}
|
85 |
}
|
86 |
|
87 |
void Scanner::Skip(const char* re) {
|
88 |
delete skip_;
|
89 |
if (re != NULL) {
|
90 |
skip_ = new RE(re);
|
91 |
should_skip_ = true;
|
92 |
skip_repeat_ = false;
|
93 |
ConsumeSkip();
|
94 |
} else {
|
95 |
skip_ = NULL;
|
96 |
should_skip_ = false;
|
97 |
skip_repeat_ = false;
|
98 |
}
|
99 |
}
|
100 |
|
101 |
void Scanner::DisableSkip() {
|
102 |
assert(skip_ != NULL);
|
103 |
should_skip_ = false;
|
104 |
}
|
105 |
|
106 |
void Scanner::EnableSkip() {
|
107 |
assert(skip_ != NULL);
|
108 |
should_skip_ = true;
|
109 |
ConsumeSkip();
|
110 |
}
|
111 |
|
112 |
int Scanner::LineNumber() const {
|
113 |
// TODO: Make it more efficient by keeping track of the last point
|
114 |
// where we computed line numbers and counting newlines since then.
|
115 |
// We could use std:count, but not all systems have it. :-(
|
116 |
int count = 1;
|
117 |
for (const char* p = data_.data(); p < input_.data(); ++p)
|
118 |
if (*p == '\n')
|
119 |
++count;
|
120 |
return count;
|
121 |
}
|
122 |
|
123 |
int Scanner::Offset() const {
|
124 |
return (int)(input_.data() - data_.c_str());
|
125 |
}
|
126 |
|
127 |
bool Scanner::LookingAt(const RE& re) const {
|
128 |
int consumed;
|
129 |
return re.DoMatch(input_, RE::ANCHOR_START, &consumed, 0, 0);
|
130 |
}
|
131 |
|
132 |
|
133 |
bool Scanner::Consume(const RE& re,
|
134 |
const Arg& arg0,
|
135 |
const Arg& arg1,
|
136 |
const Arg& arg2) {
|
137 |
const bool result = re.Consume(&input_, arg0, arg1, arg2);
|
138 |
if (result && should_skip_) ConsumeSkip();
|
139 |
return result;
|
140 |
}
|
141 |
|
142 |
// helper function to consume *skip_ and honour save_comments_
|
143 |
void Scanner::ConsumeSkip() {
|
144 |
const char* start_data = input_.data();
|
145 |
while (skip_->Consume(&input_)) {
|
146 |
if (!skip_repeat_) {
|
147 |
// Only one skip allowed.
|
148 |
break;
|
149 |
}
|
150 |
}
|
151 |
if (save_comments_) {
|
152 |
if (comments_ == NULL) {
|
153 |
comments_ = new vector<StringPiece>;
|
154 |
}
|
155 |
// already pointing one past end, so no need to +1
|
156 |
int length = (int)(input_.data() - start_data);
|
157 |
if (length > 0) {
|
158 |
comments_->push_back(StringPiece(start_data, length));
|
159 |
}
|
160 |
}
|
161 |
}
|
162 |
|
163 |
|
164 |
void Scanner::GetComments(int start, int end, vector<StringPiece> *ranges) {
|
165 |
// short circuit out if we've not yet initialized comments_
|
166 |
// (e.g., when save_comments is false)
|
167 |
if (!comments_) {
|
168 |
return;
|
169 |
}
|
170 |
// TODO: if we guarantee that comments_ will contain StringPieces
|
171 |
// that are ordered by their start, then we can do a binary search
|
172 |
// for the first StringPiece at or past start and then scan for the
|
173 |
// ones contained in the range, quit early (use equal_range or
|
174 |
// lower_bound)
|
175 |
for (vector<StringPiece>::const_iterator it = comments_->begin();
|
176 |
it != comments_->end(); ++it) {
|
177 |
if ((it->data() >= data_.c_str() + start &&
|
178 |
it->data() + it->size() <= data_.c_str() + end)) {
|
179 |
ranges->push_back(*it);
|
180 |
}
|
181 |
}
|
182 |
}
|
183 |
|
184 |
|
185 |
void Scanner::GetNextComments(vector<StringPiece> *ranges) {
|
186 |
// short circuit out if we've not yet initialized comments_
|
187 |
// (e.g., when save_comments is false)
|
188 |
if (!comments_) {
|
189 |
return;
|
190 |
}
|
191 |
for (vector<StringPiece>::const_iterator it =
|
192 |
comments_->begin() + comments_offset_;
|
193 |
it != comments_->end(); ++it) {
|
194 |
ranges->push_back(*it);
|
195 |
++comments_offset_;
|
196 |
}
|
197 |
}
|
198 |
|
199 |
} // namespace pcrecpp
|