tesseract  5.0.0
serialis.cpp
Go to the documentation of this file.
1 /**********************************************************************
2  * File: serialis.cpp (Formerly serialmac.h)
3  * Description: Inline routines and macros for serialisation functions
4  * Author: Phil Cheatle
5  *
6  * (C) Copyright 1990, Hewlett-Packard Ltd.
7  ** Licensed under the Apache License, Version 2.0 (the "License");
8  ** you may not use this file except in compliance with the License.
9  ** You may obtain a copy of the License at
10  ** http://www.apache.org/licenses/LICENSE-2.0
11  ** Unless required by applicable law or agreed to in writing, software
12  ** distributed under the License is distributed on an "AS IS" BASIS,
13  ** WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14  ** See the License for the specific language governing permissions and
15  ** limitations under the License.
16  *
17  **********************************************************************/
18 
19 #include "serialis.h"
20 
21 #include "errcode.h"
22 
23 #include "helpers.h" // for ReverseN
24 
25 #include <climits> // for INT_MAX
26 #include <cstdio>
27 
28 namespace tesseract {
29 
30 // The default FileReader loads the whole file into the vector of char,
31 // returning false on error.
32 bool LoadDataFromFile(const char *filename, std::vector<char> *data) {
33  bool result = false;
34  FILE *fp = fopen(filename, "rb");
35  if (fp != nullptr) {
36  fseek(fp, 0, SEEK_END);
37  auto size = std::ftell(fp);
38  fseek(fp, 0, SEEK_SET);
39  // Trying to open a directory on Linux sets size to LONG_MAX. Catch it here.
40  if (size > 0 && size < LONG_MAX) {
41  // reserve an extra byte in case caller wants to append a '\0' character
42  data->reserve(size + 1);
43  data->resize(size); // TODO: optimize no init
44  result = static_cast<long>(fread(&(*data)[0], 1, size, fp)) == size;
45  }
46  fclose(fp);
47  }
48  return result;
49 }
50 
51 // The default FileWriter writes the vector of char to the filename file,
52 // returning false on error.
53 bool SaveDataToFile(const std::vector<char> &data, const char *filename) {
54  FILE *fp = fopen(filename, "wb");
55  if (fp == nullptr) {
56  return false;
57  }
58  bool result = fwrite(&data[0], 1, data.size(), fp) == data.size();
59  fclose(fp);
60  return result;
61 }
62 
64 }
65 
67  if (data_is_owned_) {
68  delete data_;
69  }
70 }
71 
72 bool TFile::DeSerializeSize(int32_t *pSize) {
73  uint32_t size;
74  if (FReadEndian(&size, sizeof(size), 1) != 1) {
75  return false;
76  }
77  if (size > data_->size() / 4) {
78  // Reverse endianness.
79  swap_ = !swap_;
80  ReverseN(&size, 4);
81  }
82  *pSize = size;
83  return true;
84 }
85 
86 bool TFile::DeSerializeSkip(size_t size) {
87  uint32_t len;
88  if (!DeSerialize(&len)) {
89  return false;
90  }
91  return Skip(len * size);
92 }
93 
94 bool TFile::DeSerialize(std::string &data) {
95  uint32_t size;
96  if (!DeSerialize(&size)) {
97  return false;
98  } else if (size > 0) {
99  // TODO: optimize.
100  data.resize(size);
101  return DeSerialize(&data[0], size);
102  }
103  data.clear();
104  return true;
105 }
106 
107 bool TFile::Serialize(const std::string &data) {
108  uint32_t size = data.size();
109  return Serialize(&size) && Serialize(data.c_str(), size);
110 }
111 
112 bool TFile::DeSerialize(std::vector<char> &data) {
113  uint32_t size;
114  if (!DeSerialize(&size)) {
115  return false;
116  } else if (size > 0) {
117  // TODO: optimize.
118  data.resize(size);
119  return DeSerialize(&data[0], data.size());
120  }
121  data.clear();
122  return true;
123 }
124 
125 bool TFile::Serialize(const std::vector<char> &data) {
126  uint32_t size = data.size();
127  if (!Serialize(&size)) {
128  return false;
129  } else if (size > 0) {
130  return Serialize(&data[0], size);
131  }
132  return true;
133 }
134 
135 bool TFile::Skip(size_t count) {
136  offset_ += count;
137  return true;
138 }
139 
140 bool TFile::Open(const char *filename, FileReader reader) {
141  if (!data_is_owned_) {
142  data_ = new std::vector<char>;
143  data_is_owned_ = true;
144  }
145  offset_ = 0;
146  is_writing_ = false;
147  swap_ = false;
148  if (reader == nullptr) {
149  return LoadDataFromFile(filename, data_);
150  } else {
151  return (*reader)(filename, data_);
152  }
153 }
154 
155 bool TFile::Open(const char *data, size_t size) {
156  offset_ = 0;
157  if (!data_is_owned_) {
158  data_ = new std::vector<char>;
159  data_is_owned_ = true;
160  }
161  is_writing_ = false;
162  swap_ = false;
163  data_->resize(size); // TODO: optimize no init
164  memcpy(&(*data_)[0], data, size);
165  return true;
166 }
167 
168 bool TFile::Open(FILE *fp, int64_t end_offset) {
169  offset_ = 0;
170  auto current_pos = std::ftell(fp);
171  if (current_pos < 0) {
172  // ftell failed.
173  return false;
174  }
175  if (end_offset < 0) {
176  if (fseek(fp, 0, SEEK_END)) {
177  return false;
178  }
179  end_offset = ftell(fp);
180  if (fseek(fp, current_pos, SEEK_SET)) {
181  return false;
182  }
183  }
184  size_t size = end_offset - current_pos;
185  is_writing_ = false;
186  swap_ = false;
187  if (!data_is_owned_) {
188  data_ = new std::vector<char>;
189  data_is_owned_ = true;
190  }
191  data_->resize(size); // TODO: optimize no init
192  return fread(&(*data_)[0], 1, size, fp) == size;
193 }
194 
195 char *TFile::FGets(char *buffer, int buffer_size) {
196  ASSERT_HOST(!is_writing_);
197  int size = 0;
198  while (size + 1 < buffer_size && offset_ < data_->size()) {
199  buffer[size++] = (*data_)[offset_++];
200  if ((*data_)[offset_ - 1] == '\n') {
201  break;
202  }
203  }
204  if (size < buffer_size) {
205  buffer[size] = '\0';
206  }
207  return size > 0 ? buffer : nullptr;
208 }
209 
210 size_t TFile::FReadEndian(void *buffer, size_t size, size_t count) {
211  auto num_read = FRead(buffer, size, count);
212  if (swap_ && size != 1) {
213  char *char_buffer = static_cast<char *>(buffer);
214  for (size_t i = 0; i < num_read; ++i, char_buffer += size) {
215  ReverseN(char_buffer, size);
216  }
217  }
218  return num_read;
219 }
220 
221 size_t TFile::FRead(void *buffer, size_t size, size_t count) {
222  ASSERT_HOST(!is_writing_);
223  ASSERT_HOST(size > 0);
224  size_t required_size;
225  if (SIZE_MAX / size <= count) {
226  // Avoid integer overflow.
227  required_size = data_->size() - offset_;
228  } else {
229  required_size = size * count;
230  if (data_->size() - offset_ < required_size) {
231  required_size = data_->size() - offset_;
232  }
233  }
234  if (required_size > 0 && buffer != nullptr) {
235  memcpy(buffer, &(*data_)[offset_], required_size);
236  }
237  offset_ += required_size;
238  return required_size / size;
239 }
240 
242  ASSERT_HOST(!is_writing_);
243  offset_ = 0;
244 }
245 
246 void TFile::OpenWrite(std::vector<char> *data) {
247  offset_ = 0;
248  if (data != nullptr) {
249  if (data_is_owned_) {
250  delete data_;
251  }
252  data_ = data;
253  data_is_owned_ = false;
254  } else if (!data_is_owned_) {
255  data_ = new std::vector<char>;
256  data_is_owned_ = true;
257  }
258  is_writing_ = true;
259  swap_ = false;
260  data_->clear();
261 }
262 
263 bool TFile::CloseWrite(const char *filename, FileWriter writer) {
264  ASSERT_HOST(is_writing_);
265  if (writer == nullptr) {
266  return SaveDataToFile(*data_, filename);
267  } else {
268  return (*writer)(*data_, filename);
269  }
270 }
271 
272 size_t TFile::FWrite(const void *buffer, size_t size, size_t count) {
273  ASSERT_HOST(is_writing_);
274  ASSERT_HOST(size > 0);
275  ASSERT_HOST(SIZE_MAX / size > count);
276  size_t total = size * count;
277  const char *buf = static_cast<const char *>(buffer);
278  // This isn't very efficient, but memory is so fast compared to disk
279  // that it is relatively unimportant, and very simple.
280  for (size_t i = 0; i < total; ++i) {
281  data_->push_back(buf[i]);
282  }
283  return count;
284 }
285 
286 } // namespace tesseract.
#define ASSERT_HOST(x)
Definition: errcode.h:59
void ReverseN(void *ptr, int num_bytes)
Definition: helpers.h:189
bool(*)(const std::vector< char > &data, const char *filename) FileWriter
Definition: serialis.h:48
bool SaveDataToFile(const GenericVector< char > &data, const char *filename)
bool(*)(const char *filename, std::vector< char > *data) FileReader
Definition: baseapi.h:63
bool LoadDataFromFile(const char *filename, GenericVector< char > *data)
bool DeSerializeSize(int32_t *data)
Definition: serialis.cpp:72
void OpenWrite(std::vector< char > *data)
Definition: serialis.cpp:246
size_t FWrite(const void *buffer, size_t size, size_t count)
Definition: serialis.cpp:272
size_t FReadEndian(void *buffer, size_t size, size_t count)
Definition: serialis.cpp:210
bool DeSerialize(std::string &data)
Definition: serialis.cpp:94
bool Serialize(const std::string &data)
Definition: serialis.cpp:107
size_t FRead(void *buffer, size_t size, size_t count)
Definition: serialis.cpp:221
char * FGets(char *buffer, int buffer_size)
Definition: serialis.cpp:195
bool DeSerializeSkip(size_t size=1)
Definition: serialis.cpp:86
bool Open(const char *filename, FileReader reader)
Definition: serialis.cpp:140
bool Skip(size_t count)
Definition: serialis.cpp:135
bool CloseWrite(const char *filename, FileWriter writer)
Definition: serialis.cpp:263