Open Chinese Convert  1.0.3
A project for conversion between Traditional and Simplified Chinese
Segments.hpp
1 /*
2  * Open Chinese Convert
3  *
4  * Copyright 2010-2014 BYVoid <byvoid@byvoid.com>
5  *
6  * Licensed under the Apache License, Version 2.0 (the "License");
7  * you may not use this file except in compliance with the License.
8  * You may obtain a copy of the License at
9  *
10  * http://www.apache.org/licenses/LICENSE-2.0
11  *
12  * Unless required by applicable law or agreed to in writing, software
13  * distributed under the License is distributed on an "AS IS" BASIS,
14  * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
15  * See the License for the specific language governing permissions and
16  * limitations under the License.
17  */
18 
19 #pragma once
20 
21 #include "Common.hpp"
22 
23 namespace opencc {
28 class OPENCC_EXPORT Segments {
29 public:
30  Segments() {}
31 
32  Segments(std::initializer_list<const char*> initList) {
33  for (const string& item : initList) {
34  AddSegment(item);
35  }
36  }
37 
38  Segments(std::initializer_list<string> initList) {
39  for (const string& item : initList) {
40  AddSegment(item);
41  }
42  }
43 
44  void AddSegment(const char* unmanagedString) {
45  indexes.push_back(std::make_pair(unmanaged.size(), false));
46  unmanaged.push_back(unmanagedString);
47  }
48 
49  void AddSegment(const string& str) {
50  indexes.push_back(std::make_pair(managed.size(), true));
51  managed.push_back(str);
52  }
53 
54  class iterator : public std::iterator<std::input_iterator_tag, const char*> {
55  public:
56  iterator(const Segments* const _segments, size_t _cursor)
57  : segments(_segments), cursor(_cursor) {}
58 
59  iterator& operator++() {
60  cursor++;
61  return *this;
62  }
63 
64  bool operator==(const iterator& that) const {
65  return cursor == that.cursor && segments == that.segments;
66  }
67 
68  bool operator!=(const iterator& that) const {
69  return !this->operator==(that);
70  }
71 
72  const char* operator*() const { return segments->At(cursor); }
73 
74  private:
75  const Segments* const segments;
76  size_t cursor;
77  };
78 
79  const char* At(size_t cursor) const {
80  const auto& index = indexes[cursor];
81  if (index.second) {
82  return managed[index.first].c_str();
83  } else {
84  return unmanaged[index.first];
85  }
86  }
87 
88  size_t Length() const { return indexes.size(); }
89 
90  iterator begin() const { return iterator(this, 0); }
91 
92  iterator end() const { return iterator(this, indexes.size()); }
93 
94  string ToString() const {
95  // TODO implement a nested structure to reduce concatenation,
96  // like a purely functional differential list
97  std::ostringstream buffer;
98  for (const char* segment : *this) {
99  buffer << segment;
100  }
101  return buffer.str();
102  }
103 
104 private:
105  Segments(const Segments&) {}
106 
107  vector<const char*> unmanaged;
108  vector<string> managed;
109  // index, managed
110  vector<std::pair<size_t, bool>> indexes;
111 };
112 }
Segmented text.
Definition: Segments.hpp:28
Definition: BinaryDict.hpp:24
Definition: Segments.hpp:54