Line data Source code
1 : #ifndef BOOST_DATE_TIME_STRING_PARSE_TREE___HPP__
2 : #define BOOST_DATE_TIME_STRING_PARSE_TREE___HPP__
3 :
4 : /* Copyright (c) 2004-2005 CrystalClear Software, Inc.
5 : * Use, modification and distribution is subject to the
6 : * Boost Software License, Version 1.0. (See accompanying
7 : * file LICENSE_1_0.txt or http://www.boost.org/LICENSE_1_0.txt)
8 : * Author: Jeff Garland, Bart Garst
9 : * $Date$
10 : */
11 :
12 :
13 : #include <boost/algorithm/string/case_conv.hpp>
14 : #include <cctype>
15 : #include <map>
16 : #include <string>
17 : #include <vector>
18 : #include <ostream>
19 : #include <iterator>
20 : #include <algorithm>
21 :
22 : namespace boost { namespace date_time {
23 :
24 :
25 : template<typename charT>
26 : struct parse_match_result
27 : {
28 30012 : parse_match_result() :
29 15006 : match_depth(0),
30 15006 : current_match(PARSE_ERROR)
31 30012 : {}
32 : typedef std::basic_string<charT> string_type;
33 : string_type remaining() const
34 : {
35 : if (match_depth == cache.size()) {
36 : return string_type();
37 : }
38 : if (current_match == PARSE_ERROR) {
39 : return cache;
40 : }
41 : //some of the cache was used return the rest
42 : return string_type(cache, match_depth);
43 : }
44 : charT last_char() const
45 : {
46 : return cache[cache.size()-1];
47 : }
48 : //! Returns true if more characters were parsed than was necessary
49 : /*! Should be used in conjunction with last_char()
50 : * to get the remaining character.
51 : */
52 2500 : bool has_remaining() const
53 : {
54 2500 : return (cache.size() > match_depth);
55 : }
56 :
57 : // cache will hold characters that have been read from the stream
58 : string_type cache;
59 : unsigned short match_depth;
60 : short current_match;
61 : enum PARSE_STATE { PARSE_ERROR = -1 };
62 : };
63 :
64 : //for debug -- really only char streams...
65 : template<typename charT>
66 : std::basic_ostream<charT>&
67 : operator<<(std::basic_ostream<charT>& os, parse_match_result<charT>& mr)
68 : {
69 : os << "cm: " << mr.current_match
70 : << " C: '" << mr.cache
71 : << "' md: " << mr.match_depth
72 : << " R: " << mr.remaining();
73 : return os;
74 : }
75 :
76 :
77 :
78 : //! Recursive data structure to allow efficient parsing of various strings
79 : /*! This class provides a quick lookup by building what amounts to a
80 : * tree data structure. It also features a match function which can
81 : * can handle nasty input interators by caching values as it recurses
82 : * the tree so that it can backtrack as needed.
83 : */
84 : template<typename charT>
85 : struct string_parse_tree
86 : {
87 : #if BOOST_WORKAROUND( BOOST_BORLANDC, BOOST_TESTED_AT(0x581) )
88 : typedef std::multimap<charT, string_parse_tree< charT> > ptree_coll;
89 : #else
90 : typedef std::multimap<charT, string_parse_tree > ptree_coll;
91 : #endif
92 : typedef typename ptree_coll::value_type value_type;
93 : typedef typename ptree_coll::iterator iterator;
94 : typedef typename ptree_coll::const_iterator const_iterator;
95 : typedef std::basic_string<charT> string_type;
96 : typedef std::vector<std::basic_string<charT> > collection_type;
97 : typedef parse_match_result<charT> parse_match_result_type;
98 :
99 : /*! Parameter "starting_point" designates where the numbering begins.
100 : * A starting_point of zero will start the numbering at zero
101 : * (Sun=0, Mon=1, ...) were a starting_point of one starts the
102 : * numbering at one (Jan=1, Feb=2, ...). The default is zero,
103 : * negative vaules are not allowed */
104 168 : string_parse_tree(collection_type names, unsigned int starting_point=0) :
105 84 : m_value(parse_match_result_type::PARSE_ERROR)
106 84 : {
107 : // iterate thru all the elements and build the tree
108 84 : unsigned short index = 0;
109 812 : while (index != names.size() ) {
110 728 : string_type s = boost::algorithm::to_lower_copy(names[index]);
111 728 : insert(s, static_cast<unsigned short>(index + starting_point));
112 728 : index++;
113 728 : }
114 : //set the last tree node = index+1 indicating a value
115 84 : index++;
116 168 : }
117 :
118 :
119 8232 : string_parse_tree(short value = parse_match_result_type::PARSE_ERROR) :
120 4116 : m_value(value)
121 8232 : {}
122 : ptree_coll m_next_chars;
123 : short m_value;
124 :
125 728 : void insert(const string_type& s, unsigned short value)
126 : {
127 728 : unsigned int i = 0;
128 728 : iterator ti;
129 4816 : while(i < s.size()) {
130 4088 : if (i==0) {
131 728 : if (i == (s.size()-1)) {
132 0 : ti = m_next_chars.insert(value_type(s[i],
133 0 : string_parse_tree<charT>(value)));
134 0 : }
135 : else {
136 1456 : ti = m_next_chars.insert(value_type(s[i],
137 728 : string_parse_tree<charT>()));
138 : }
139 728 : }
140 : else {
141 3360 : if (i == (s.size()-1)) {
142 1456 : ti = ti->second.m_next_chars.insert(value_type(s[i],
143 728 : string_parse_tree<charT>(value)));
144 728 : }
145 :
146 : else {
147 5264 : ti = ti->second.m_next_chars.insert(value_type(s[i],
148 2632 : string_parse_tree<charT>()));
149 : }
150 :
151 : }
152 4088 : i++;
153 : }
154 728 : }
155 :
156 :
157 : //! Recursive function that finds a matching string in the tree.
158 : /*! Must check match_results::has_remaining() after match() is
159 : * called. This is required so the user can determine if
160 : * stream iterator is already pointing to the expected
161 : * character or not (match() might advance sitr to next char in stream).
162 : *
163 : * A parse_match_result that has been returned from a failed match
164 : * attempt can be sent in to the match function of a different
165 : * string_parse_tree to attempt a match there. Use the iterators
166 : * for the partially consumed stream, the parse_match_result object,
167 : * and '0' for the level parameter. */
168 : short
169 6 : match(std::istreambuf_iterator<charT>& sitr,
170 : std::istreambuf_iterator<charT>& stream_end,
171 : parse_match_result_type& result,
172 : unsigned int& level) const
173 : {
174 :
175 6 : level++;
176 : charT c;
177 : // if we conditionally advance sitr, we won't have
178 : // to consume the next character past the input
179 6 : bool adv_itr = true;
180 6 : if (level > result.cache.size()) {
181 0 : if (sitr == stream_end) return 0; //bail - input exhausted
182 0 : c = static_cast<charT>(std::tolower(*sitr));
183 : //result.cache += c;
184 : //sitr++;
185 0 : }
186 : else {
187 : // if we're looking for characters from the cache,
188 : // we don't want to increment sitr
189 6 : adv_itr = false;
190 6 : c = static_cast<charT>(std::tolower(result.cache[level-1]));
191 : }
192 6 : const_iterator litr = m_next_chars.lower_bound(c);
193 6 : const_iterator uitr = m_next_chars.upper_bound(c);
194 6 : while (litr != uitr) { // equal if not found
195 0 : if(adv_itr) {
196 0 : sitr++;
197 0 : result.cache += c;
198 0 : }
199 0 : if (litr->second.m_value != -1) { // -1 is default value
200 0 : if (result.match_depth < level) {
201 0 : result.current_match = litr->second.m_value;
202 0 : result.match_depth = static_cast<unsigned short>(level);
203 0 : }
204 0 : litr->second.match(sitr, stream_end,
205 0 : result, level);
206 0 : level--;
207 0 : }
208 : else {
209 0 : litr->second.match(sitr, stream_end,
210 0 : result, level);
211 0 : level--;
212 : }
213 :
214 0 : if(level <= result.cache.size()) {
215 0 : adv_itr = false;
216 0 : }
217 :
218 0 : litr++;
219 : }
220 6 : return result.current_match;
221 :
222 6 : }
223 :
224 : /*! Must check match_results::has_remaining() after match() is
225 : * called. This is required so the user can determine if
226 : * stream iterator is already pointing to the expected
227 : * character or not (match() might advance sitr to next char in stream).
228 : */
229 : parse_match_result_type
230 0 : match(std::istreambuf_iterator<charT>& sitr,
231 : std::istreambuf_iterator<charT>& stream_end) const
232 : {
233 : // lookup to_lower of char in tree.
234 0 : unsigned int level = 0;
235 : // string_type cache;
236 0 : parse_match_result_type result;
237 0 : match(sitr, stream_end, result, level);
238 0 : return result;
239 0 : }
240 :
241 : void printme(std::ostream& os, int& level)
242 : {
243 : level++;
244 : iterator itr = m_next_chars.begin();
245 : iterator end = m_next_chars.end();
246 : // os << "starting level: " << level << std::endl;
247 : while (itr != end) {
248 : os << "level: " << level
249 : << " node: " << itr->first
250 : << " value: " << itr->second.m_value
251 : << std::endl;
252 : itr->second.printme(os, level);
253 : itr++;
254 : }
255 : level--;
256 : }
257 :
258 : void print(std::ostream& os)
259 : {
260 : int level = 0;
261 : printme(os, level);
262 : }
263 :
264 : void printmatch(std::ostream& os, charT c)
265 : {
266 : iterator litr = m_next_chars.lower_bound(c);
267 : iterator uitr = m_next_chars.upper_bound(c);
268 : os << "matches for: " << c << std::endl;
269 : while (litr != uitr) {
270 : os << " node: " << litr->first
271 : << " value: " << litr->second.m_value
272 : << std::endl;
273 : litr++;
274 : }
275 : }
276 :
277 : };
278 :
279 :
280 : } } //namespace
281 : #endif
|