1 : /** \file
2 : * Parser for APT records
3 : */
4 :
5 : /*
6 : * Copyright (C) 2007 Enrico Zini <enrico@enricozini.org>
7 : *
8 : * This library is free software; you can redistribute it and/or
9 : * modify it under the terms of the GNU Lesser General Public
10 : * License as published by the Free Software Foundation; either
11 : * version 2.1 of the License, or (at your option) any later version.
12 : *
13 : * This library is distributed in the hope that it will be useful,
14 : * but WITHOUT ANY WARRANTY; without even the implied warranty of
15 : * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
16 : * Lesser General Public License for more details.
17 : *
18 : * You should have received a copy of the GNU Lesser General Public
19 : * License along with this library; if not, write to the Free Software
20 : * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA
21 : */
22 :
23 : #include <ept/core/apt/recordparser.h>
24 :
25 : #include <algorithm>
26 : #include <cctype>
27 :
28 : //#include <iostream>
29 :
30 : using namespace std;
31 :
32 : namespace ept {
33 : namespace core {
34 : namespace record {
35 :
36 : struct rpcompare
37 : {
38 : const RecordParser& rp;
39 1310 : rpcompare(const RecordParser& rp) : rp(rp) {}
40 89600 : bool operator()(size_t a, size_t b)
41 : {
42 89600 : return rp.name(a) < rp.name(b);
43 : }
44 : };
45 :
46 1310 : void RecordParser::scan(const std::string& str)
47 : {
48 1310 : buffer = str;
49 1310 : ends.clear();
50 1310 : sorted.clear();
51 :
52 : //cerr << "PARSE " << endl << buffer << "*****" << endl;
53 :
54 : // Scan the buffer, taking note of all ending offsets of the various fields
55 1310 : size_t pos = 0;
56 1310 : size_t idx = 0;
57 29815 : while (pos < buffer.size() - 1)
58 : {
59 : //cerr << "PREPOS " << pos << " left: " << buffer.substr(pos, 10) << endl;
60 28505 : pos = buffer.find("\n", pos);
61 : //cerr << "POSTPOS " << pos << " left: " << (pos == string::npos ? "NONE" : buffer.substr(pos, 10)) << endl;
62 :
63 : // The buffer does not end with a newline
64 28505 : if (pos == string::npos)
65 : {
66 : //cerr << "ENDNOTEOL" << endl;
67 1 : pos = buffer.size();
68 1 : ends.push_back(pos);
69 1 : sorted.push_back(idx++);
70 1 : break;
71 : }
72 :
73 28504 : ++pos;
74 : //cerr << "POSTPOSINC " << pos << " left: " << buffer.substr(pos, 10) << endl;
75 :
76 : // The buffer ends with a newline
77 28504 : if (pos == buffer.size())
78 : {
79 : //cerr << "ENDEOL" << endl;
80 1309 : ends.push_back(pos);
81 1309 : sorted.push_back(idx++);
82 1309 : break;
83 : }
84 :
85 : // Terminate parsing on double newlines
86 27195 : if (buffer[pos] == '\n')
87 : {
88 : //cerr << "ENDDOUBLENL" << endl;
89 0 : ends.push_back(pos);
90 0 : sorted.push_back(idx++);
91 0 : break;
92 : }
93 :
94 : // Mark the end of the field if it's not a continuation line
95 27195 : if (!isspace(buffer[pos]))
96 : {
97 : //cerr << "INNERFIELD" << endl;
98 17224 : ends.push_back(pos);
99 17224 : sorted.push_back(idx++);
100 : } //else
101 : //cerr << "CONTLINE" << endl;
102 : }
103 :
104 : // Sort the sorted array
105 1310 : sort(sorted.begin(), sorted.end(), rpcompare(*this));
106 :
107 : //for (size_t i = 0; i < ends.size(); ++i)
108 : // cerr << ends[i] << "\t" << name(i) << "\t" << sorted[i] << "\t" << name(sorted[i]) << endl;
109 1310 : }
110 :
111 187124 : std::string RecordParser::field(size_t idx) const
112 : {
113 187124 : if (idx >= ends.size())
114 1 : return string();
115 187123 : if (idx == 0)
116 16577 : return buffer.substr(0, ends[0]);
117 : else
118 170546 : return buffer.substr(ends[idx-1], ends[idx]-ends[idx-1]);
119 : }
120 :
121 185814 : std::string RecordParser::name(size_t idx) const
122 : {
123 185814 : string res = field(idx);
124 185814 : size_t pos = res.find(":");
125 185814 : if (pos == string::npos)
126 2 : return res;
127 185812 : return res.substr(0, pos);
128 : }
129 :
130 1310 : std::string RecordParser::lookup(size_t idx) const
131 : {
132 1310 : string res = field(idx);
133 1310 : size_t pos = res.find(":");
134 1310 : if (pos == string::npos)
135 1 : return res;
136 : // Skip initial whitespace after the :
137 1309 : for (++pos; pos < res.size() && isspace(res[pos]); ++pos)
138 : ;
139 1309 : res = res.substr(pos);
140 : // Trim spaces at the end
141 3927 : while (!res.empty() && isspace(res[res.size() - 1]))
142 1309 : res.resize(res.size() - 1);
143 1309 : return res;
144 : }
145 :
146 1310 : size_t RecordParser::index(const std::string& str) const
147 : {
148 : int begin, end;
149 :
150 : /* Binary search */
151 1310 : begin = -1, end = size();
152 7924 : while (end - begin > 1)
153 : {
154 5304 : int cur = (end + begin) / 2;
155 : //cerr << "Test " << cur << " " << str << " < " << name(cur) << endl;
156 5304 : if (name(sorted[cur]) > str)
157 1724 : end = cur;
158 : else
159 3580 : begin = cur;
160 : }
161 :
162 1310 : if (begin == -1 || name(sorted[begin]) != str)
163 1 : return size();
164 : else
165 1309 : return sorted[begin];
166 : }
167 :
168 : }
169 : }
170 : }
171 :
172 : // vim:set ts=4 sw=4:
|