Google OR-Tools v9.15
a fast and portable software suite for combinatorial optimization
Loading...
Searching...
No Matches
file.cc
Go to the documentation of this file.
1// Copyright 2010-2025 Google LLC
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
14#include "ortools/base/file.h"
15
16#include <sys/stat.h>
17#include <sys/types.h>
18
19#include <cerrno>
20#include <cstdint>
21
22#if defined(_MSC_VER)
23#include <io.h>
24#define access _access
25#define F_OK 0
26#else
27#include <unistd.h>
28#endif
29
30#include <bzlib.h>
31#include <zlib.h>
32
33#include <cstdio>
34#include <cstdlib>
35#include <cstring>
36#include <memory>
37#include <string>
38
39#include "absl/log/check.h"
40#include "absl/log/log.h"
41#include "absl/status/status.h"
42#include "absl/strings/str_cat.h"
43#include "absl/strings/string_view.h"
44#include "google/protobuf/io/tokenizer.h"
45#include "google/protobuf/message.h"
46#include "google/protobuf/text_format.h"
47
48namespace {
49enum class Format { NORMAL_FILE, GZIP_FILE, BZIP2_FILE };
50
51static Format GetFormatFromName(absl::string_view name) {
52 const int size = name.size();
53 if (size > 4 && name.substr(size - 3) == ".gz") {
54 return Format::GZIP_FILE;
55 } else if (size > 5 && name.substr(size - 4) == ".bz2") {
56 return Format::BZIP2_FILE;
57 } else {
58 return Format::NORMAL_FILE;
59 }
60}
61
62class CFile : public File {
63 public:
64 CFile(FILE* c_file, absl::string_view name) : File(name), f_(c_file) {}
65 ~CFile() override = default;
66
67 // Reads "size" bytes to buf from file, buf should be pre-allocated.
68 size_t Read(void* buf, size_t size) override {
69 return fread(buf, 1, size, f_);
70 }
71
72 // Writes "size" bytes of buf to file, buf should be pre-allocated.
73 size_t Write(const void* buf, size_t size) override {
74 return fwrite(buf, 1, size, f_);
75 }
76
77 // Closes the file and delete the underlying FILE* descriptor.
78 absl::Status Close(int /*flags*/) override {
79 absl::Status status;
80 if (f_ == nullptr) {
81 return status;
82 }
83 if (fclose(f_) == 0) {
84 f_ = nullptr;
85 } else {
86 status.Update(
87 absl::Status(absl::StatusCode::kInvalidArgument,
88 absl::StrCat("Could not close file '", name_, "'")));
89 }
90 delete this;
91 return status;
92 }
93
94 // Flushes buffer.
95 bool Flush() override { return fflush(f_) == 0; }
96
97 // Returns file size.
98 size_t Size() override {
99 struct stat f_stat;
100 stat(name_.c_str(), &f_stat);
101 return f_stat.st_size;
102 }
103
104 bool Open() const override { return f_ != nullptr; }
105
106 private:
107 FILE* f_;
108};
109
110class GzFile : public File {
111 public:
112 GzFile(gzFile gz_file, absl::string_view name) : File(name), f_(gz_file) {}
113 ~GzFile() override = default;
114
115 // Reads "size" bytes to buf from file, buf should be pre-allocated.
116 size_t Read(void* buf, size_t size) override { return gzread(f_, buf, size); }
117
118 // Writes "size" bytes of buf to file, buf should be pre-allocated.
119 size_t Write(const void* buf, size_t size) override {
120 return gzwrite(f_, buf, size);
121 }
122
123 // Closes the file and delete the underlying FILE* descriptor.
124 absl::Status Close(int /*flags*/) override {
125 absl::Status status;
126 if (f_ == nullptr) {
127 return status;
128 }
129 if (gzclose(f_) == 0) {
130 f_ = nullptr;
131 } else {
132 status.Update(
133 absl::Status(absl::StatusCode::kInvalidArgument,
134 absl::StrCat("Could not close file '", name_, "'")));
135 }
136 delete this;
137 return status;
138 }
139
140 // Flushes buffer.
141 bool Flush() override { return gzflush(f_, Z_FINISH) == Z_OK; }
142
143 // Returns file size.
144 size_t Size() override {
145 gzFile file;
146 std::string null_terminated_name = std::string(name_);
147#if defined(_MSC_VER)
148 file = gzopen(null_terminated_name.c_str(), "rb");
149#else
150 file = gzopen(null_terminated_name.c_str(), "r");
151#endif
152 if (!file) {
153 LOG(FATAL) << "Cannot get the size of '" << name_
154 << "': " << strerror(errno);
155 }
156
157 const int kLength = 5 * 1024;
158 unsigned char buffer[kLength];
159 size_t uncompressed_size = 0;
160 while (true) {
161 int err;
162 int bytes_read;
163 bytes_read = gzread(file, buffer, kLength - 1);
164 uncompressed_size += bytes_read;
165 if (bytes_read < kLength - 1) {
166 if (gzeof(file)) {
167 break;
168 } else {
169 const char* error_string;
170 error_string = gzerror(file, &err);
171 if (err) {
172 LOG(FATAL) << "Error " << error_string;
173 }
174 }
175 }
176 }
177 gzclose(file);
178 return uncompressed_size;
179 }
180
181 bool Open() const override { return f_ != nullptr; }
182
183 private:
184 gzFile f_;
185};
186
187class Bz2File : public File {
188 public:
189 Bz2File(BZFILE* bz_file, absl::string_view name) : File(name), f_(bz_file) {}
190 ~Bz2File() override = default;
191
192 // Reads "size" bytes to buf from file, buf should be pre-allocated.
193 size_t Read(void* buf, size_t size) override {
194 return BZ2_bzread(f_, buf, size);
195 }
196
197 // Writes "size" bytes of buf to file, buf should be pre-allocated.
198 size_t Write(const void* buf, size_t size) override {
199 return BZ2_bzwrite(f_, const_cast<void*>(buf), size);
200 }
201
202 // Closes the file and delete the underlying FILE* descriptor.
203 absl::Status Close(int /*flags*/) override {
204 absl::Status status;
205 if (f_ == nullptr) {
206 return absl::OkStatus();
207 }
208 BZ2_bzclose(f_);
209 f_ = nullptr;
210 delete this;
211 return absl::OkStatus();
212 }
213
214 // Flushes buffer.
215 bool Flush() override { return BZ2_bzflush(f_) == 0; }
216
217 // Returns file size.
218 size_t Size() override {
219 BZFILE* file;
220 std::string null_terminated_name = std::string(name_);
221#if defined(_MSC_VER)
222 file = BZ2_bzopen(null_terminated_name.c_str(), "rb");
223#else
224 file = BZ2_bzopen(null_terminated_name.c_str(), "r");
225#endif
226 if (!file) {
227 LOG(FATAL) << "Cannot get the size of '" << name_
228 << "': " << strerror(errno);
229 }
230
231 const int kLength = 5 * 1024;
232 unsigned char buffer[kLength];
233 size_t uncompressed_size = 0;
234 while (true) {
235 int bytes_read;
236 bytes_read = BZ2_bzread(file, buffer, kLength - 1);
237 uncompressed_size += bytes_read;
238 if (bytes_read < kLength - 1) break;
239 }
240 BZ2_bzclose(file);
241 return uncompressed_size;
242 }
243
244 bool Open() const override { return f_ != nullptr; }
245
246 private:
247 BZFILE* f_;
248};
249
250} // namespace
251
252File::File(absl::string_view name) : name_(name) {}
253
254File* File::OpenOrDie(absl::string_view file_name, absl::string_view mode) {
255 File* f = File::Open(file_name, mode);
256 CHECK(f != nullptr) << absl::StrCat("Could not open '", file_name, "'");
257 return f;
258}
259
260File* File::Open(absl::string_view file_name, absl::string_view mode) {
261 std::string null_terminated_name = std::string(file_name);
262 std::string null_terminated_mode = std::string(mode);
263#if defined(_MSC_VER)
264 if (null_terminated_mode == "r") {
265 null_terminated_mode = "rb";
266 } else if (null_terminated_mode == "w") {
267 null_terminated_mode = "wb";
268 }
269#endif
270 const Format format = GetFormatFromName(file_name);
271 switch (format) {
272 case Format::NORMAL_FILE: {
273 FILE* c_file =
274 fopen(null_terminated_name.c_str(), null_terminated_mode.c_str());
275 if (c_file == nullptr) return nullptr;
276 return new CFile(c_file, file_name);
277 }
278 case Format::GZIP_FILE: {
279 gzFile gz_file =
280 gzopen(null_terminated_name.c_str(), null_terminated_mode.c_str());
281 if (!gz_file) return nullptr;
282 return new GzFile(gz_file, file_name);
283 }
284 case Format::BZIP2_FILE: {
285 BZFILE* bz_file = BZ2_bzopen(null_terminated_name.c_str(),
286 null_terminated_mode.c_str());
287 if (!bz_file) return nullptr;
288 return new Bz2File(bz_file, file_name);
289 }
290 }
291 // never reach
292 return nullptr;
293}
294
295int64_t File::ReadToString(std::string* line, uint64_t max_length) {
296 CHECK(line != nullptr);
297 line->clear();
298
299 if (max_length == 0) return 0;
300
301 int64_t needed = max_length;
302 int bufsize = (needed < (2 << 20) ? needed : (2 << 20));
303
304 std::unique_ptr<char[]> buf(new char[bufsize]);
305
306 int64_t nread = 0;
307 while (needed > 0) {
308 nread = Read(buf.get(), (bufsize < needed ? bufsize : needed));
309 if (nread > 0) {
310 line->append(buf.get(), nread);
311 needed -= nread;
312 } else {
313 break;
314 }
315 }
316 return (nread >= 0 ? static_cast<int64_t>(line->size()) : -1);
317}
318
319size_t File::WriteString(absl::string_view str) {
320 return Write(str.data(), str.size());
321}
322
323absl::string_view File::filename() const { return name_; }
324
325void File::Init() {}
326
327namespace file {
328absl::Status Open(absl::string_view file_name, absl::string_view mode, File** f,
329 Options options) {
330 if (options == Defaults()) {
331 *f = File::Open(file_name, mode);
332 if (*f != nullptr) {
333 return absl::OkStatus();
334 }
335 }
336 return absl::Status(absl::StatusCode::kInvalidArgument,
337 absl::StrCat("Could not open '", file_name, "'"));
338}
339
340File* OpenOrDie(absl::string_view file_name, absl::string_view mode,
341 Options options) {
342 File* f;
343 CHECK_EQ(options, Defaults());
344 f = File::Open(file_name, mode);
345 CHECK(f != nullptr) << absl::StrCat("Could not open '", file_name, "'");
346 return f;
347}
348
349absl::StatusOr<std::string> GetContents(absl::string_view path,
350 Options options) {
351 std::string contents;
352 absl::Status status = GetContents(path, &contents, options);
353 if (!status.ok()) {
354 return status;
355 }
356 return contents;
357}
358
359absl::Status GetContents(absl::string_view file_name, std::string* output,
360 Options options) {
361 File* file;
362 // For windows, the "b" is added in file::Open.
363 auto status = file::Open(file_name, "r", &file, options);
364 if (!status.ok()) return status;
365
366 const int64_t size = file->Size();
367 if (file->ReadToString(output, size) == size) {
368 status.Update(file->Close(options));
369 return status;
370 }
371
372 file->Close(options).IgnoreError(); // Even if ReadToString() fails!
373
374 return absl::Status(absl::StatusCode::kInvalidArgument,
375 absl::StrCat("Could not read from '", file_name, "'."));
376}
377
378absl::Status WriteString(File* file, absl::string_view contents,
379 Options options) {
380 if (options == Defaults() && file != nullptr &&
381 file->Write(contents.data(), contents.size()) == contents.size()) {
382 return absl::OkStatus();
383 }
384 return absl::Status(
385 absl::StatusCode::kInvalidArgument,
386 absl::StrCat("Could not write ", contents.size(), " bytes"));
387}
388
389absl::Status SetContents(absl::string_view file_name,
390 absl::string_view contents, Options options) {
391 File* file;
392 // For windows, the "b" is added in file::Open.
393 auto status = file::Open(file_name, "w", &file, options);
394 if (!status.ok()) return status;
395 status = file::WriteString(file, contents, options);
396 status.Update(file->Close(options)); // Even if WriteString() fails!
397 return status;
398}
399
400namespace {
401class NoOpErrorCollector : public google::protobuf::io::ErrorCollector {
402 public:
403 ~NoOpErrorCollector() override = default;
404 void RecordError(int /*line*/, int /*column*/,
405 absl::string_view /*message*/) override {}
406};
407} // namespace
408
409absl::Status GetTextProto(absl::string_view file_name,
410 google::protobuf::Message* proto, Options options) {
411 if (options == Defaults()) {
412 std::string str;
413 if (!GetContents(file_name, &str, file::Defaults()).ok()) {
414 VLOG(1) << "Could not read '" << file_name << "'";
415 return absl::Status(
416 absl::StatusCode::kInvalidArgument,
417 absl::StrCat("Could not read proto from '", file_name, "'."));
418 }
419
420 // Attempt to decode ASCII before deciding binary. Do it in this order
421 // because it is much harder for a binary encoding to happen to be a valid
422 // ASCII encoding than the other way around. For instance "index: 1\n" is a
423 // valid (but nonsensical) binary encoding. We want to avoid printing errors
424 // for valid binary encodings if the ASCII parsing fails, and so specify a
425 // no-op error collector.
426 NoOpErrorCollector error_collector;
427 google::protobuf::TextFormat::Parser parser;
428 parser.RecordErrorsTo(&error_collector);
429
430 if (parser.ParseFromString(str, proto)) { // Text format.
431 return absl::OkStatus();
432 }
433
434 if (proto->ParseFromString(str)) { // Binary format.
435 return absl::OkStatus();
436 }
437
438 // Re-parse the ASCII, just to show the diagnostics (we could also get them
439 // out of the ErrorCollector but this way is easier).
440 google::protobuf::TextFormat::ParseFromString(str, proto);
441 VLOG(1) << "Could not parse contents of '" << file_name << "'";
442 }
443 return absl::Status(
444 absl::StatusCode::kInvalidArgument,
445 absl::StrCat("Could not read proto from '", file_name, "'."));
446}
447
448absl::Status SetTextProto(absl::string_view file_name,
449 const google::protobuf::Message& proto,
450 Options options) {
451 if (options == Defaults()) {
452 std::string proto_string;
453 if (google::protobuf::TextFormat::PrintToString(proto, &proto_string) &&
454 file::SetContents(file_name, proto_string, file::Defaults()).ok()) {
455 return absl::OkStatus();
456 }
457 }
458 return absl::Status(
459 absl::StatusCode::kInvalidArgument,
460 absl::StrCat("Could not write proto to '", file_name, "'."));
461}
462
463absl::Status GetBinaryProto(const absl::string_view file_name,
464 google::protobuf::Message* proto, Options options) {
465 std::string str;
466 if (options == Defaults() &&
467 GetContents(file_name, &str, file::Defaults()).ok() &&
468 proto->ParseFromString(str)) {
469 return absl::OkStatus();
470 }
471 return absl::Status(
472 absl::StatusCode::kInvalidArgument,
473 absl::StrCat("Could not read proto from '", file_name, "'."));
474}
475
476absl::Status SetBinaryProto(absl::string_view file_name,
477 const google::protobuf::Message& proto,
478 Options options) {
479 if (options == Defaults()) {
480 std::string proto_string;
481 if (proto.AppendToString(&proto_string) &&
482 file::SetContents(file_name, proto_string, file::Defaults()).ok()) {
483 return absl::OkStatus();
484 }
485 }
486 return absl::Status(
487 absl::StatusCode::kInvalidArgument,
488 absl::StrCat("Could not write proto to '", file_name, "'."));
489}
490
491absl::Status Delete(absl::string_view path, Options options) {
492 if (options == Defaults()) {
493 std::string null_terminated_path = std::string(path);
494 if (remove(null_terminated_path.c_str()) == 0) return absl::OkStatus();
495 }
496 return absl::Status(absl::StatusCode::kInvalidArgument,
497 absl::StrCat("Could not delete '", path, "'."));
498}
499
500absl::Status Exists(absl::string_view path, Options options) {
501 if (options == Defaults()) {
502 std::string null_terminated_path = std::string(path);
503 if (access(null_terminated_path.c_str(), F_OK) == 0) {
504 return absl::OkStatus();
505 }
506 }
507 return absl::Status(absl::StatusCode::kInvalidArgument,
508 absl::StrCat("File '", path, "' does not exist."));
509}
510} // namespace file
Definition file.h:30
absl::string_view filename() const
Definition file.cc:323
virtual size_t Write(const void *buf, size_t size)=0
virtual size_t Read(void *buf, size_t size)=0
static void Init()
Definition file.cc:325
static File * OpenOrDie(absl::string_view file_name, absl::string_view mode)
Definition file.cc:254
std::string name_
Definition file.h:79
size_t WriteString(absl::string_view str)
Definition file.cc:319
File(absl::string_view name)
Definition file.cc:252
virtual bool Open() const =0
int64_t ReadToString(std::string *line, uint64_t max_length)
Definition file.cc:295
Definition file.cc:327
int Options
Definition file.h:84
absl::StatusOr< std::string > GetContents(absl::string_view path, Options options)
Definition file.cc:349
absl::Status Exists(absl::string_view path, Options options)
Definition file.cc:500
absl::Status SetBinaryProto(absl::string_view file_name, const google::protobuf::Message &proto, Options options)
Definition file.cc:476
absl::Status SetTextProto(absl::string_view file_name, const google::protobuf::Message &proto, Options options)
Definition file.cc:448
absl::Status GetTextProto(absl::string_view file_name, google::protobuf::Message *proto, Options options)
Definition file.cc:409
File * OpenOrDie(absl::string_view file_name, absl::string_view mode, Options options)
Definition file.cc:340
absl::Status WriteString(File *file, absl::string_view contents, Options options)
Definition file.cc:378
absl::Status GetBinaryProto(const absl::string_view file_name, google::protobuf::Message *proto, Options options)
Definition file.cc:463
Options Defaults()
Definition file.h:86
absl::Status Delete(absl::string_view path, Options options)
Definition file.cc:491
absl::Status Open(absl::string_view file_name, absl::string_view mode, File **f, Options options)
Definition file.cc:328
absl::Status SetContents(absl::string_view file_name, absl::string_view contents, Options options)
Definition file.cc:389