Google OR-Tools v9.14
a fast and portable software suite for combinatorial optimization
Loading...
Searching...
No Matches
set_cover_reader.cc
Go to the documentation of this file.
1// Copyright 2010-2025 Google LLC
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
15
16#include <sys/types.h>
17
18#include <algorithm>
19#include <cctype>
20#include <cstddef>
21#include <cstdint>
22#include <limits>
23#include <string>
24#include <vector>
25
26#include "absl/log/check.h"
27#include "absl/log/log.h"
28#include "absl/strings/numbers.h"
29#include "absl/strings/str_cat.h"
30#include "absl/strings/str_format.h"
31#include "absl/strings/str_split.h"
32#include "absl/strings/string_view.h"
33#include "ortools/base/file.h"
41
42namespace operations_research {
43
45 public:
46 explicit SetCoverReader(File* file);
47 absl::string_view GetNextToken();
48 double ParseNextDouble();
49 int64_t ParseNextInteger();
50
51 private:
52 size_t SkipBlanks(size_t pos) const;
53 size_t SkipNonBlanks(size_t pos) const;
54 FileLineIterator line_iter_;
55 absl::string_view line_;
56 int start_pos_;
57 int end_pos_;
58};
59
61 : line_iter_(file, FileLineIterator::REMOVE_INLINE_CR |
62 FileLineIterator::REMOVE_BLANK_LINES),
63 start_pos_(0),
64 end_pos_(0) {
65 line_ = *line_iter_;
66}
67
68size_t SetCoverReader::SkipBlanks(size_t pos) const {
69 const size_t size = line_.size();
70 // As it is expected that the blanks will be spaces, we can skip them faster
71 // by checking for spaces only.
72 for (; pos < size && line_[pos] == ' '; ++pos) {
73 }
74 // We skip all forms of blanks to be on the safe side.
75 for (; pos < size && std::isspace(line_[pos]); ++pos) {
76 }
77 return pos;
78}
79
80size_t SetCoverReader::SkipNonBlanks(size_t pos) const {
81 const size_t size = line_.size();
82 for (; pos < size && !std::isspace(line_[pos]); ++pos) {
83 }
84 return pos;
85}
86
87absl::string_view SetCoverReader::GetNextToken() {
88 const size_t size = line_.size();
89 start_pos_ = SkipBlanks(end_pos_);
90 if (start_pos_ >= size) {
91 ++line_iter_;
92 line_ = *line_iter_;
93 start_pos_ = SkipBlanks(0);
94 }
95 end_pos_ = SkipNonBlanks(start_pos_);
96 return line_.substr(start_pos_, end_pos_ - start_pos_);
97}
98
100 double value = 0;
101 CHECK(absl::SimpleAtod(GetNextToken(), &value));
102 return value;
103}
104
106 int64_t value = 0;
107 CHECK(absl::SimpleAtoi(GetNextToken(), &value));
108 return value;
109}
110
111namespace {
112double Percent(SubsetIndex subset, BaseInt total) {
113 return subset.value() == 0 ? 0 : 100.0 * subset.value() / total;
114}
115
116double Percent(ElementIndex element, BaseInt total) {
117 return element.value() == 0 ? 0 : 100.0 * element.value() / total;
118}
119} // namespace
120
121// This is a row-based format where the elements are 1-indexed.
122SetCoverModel ReadOrlibScp(absl::string_view filename) {
123 CHECK_OK(file::Exists(filename, file::Defaults()));
124 SetCoverModel model;
125 File* file(file::OpenOrDie(filename, "r", file::Defaults()));
126 SetCoverReader reader(file);
127 const ElementIndex num_rows(reader.ParseNextInteger());
128 const SubsetIndex num_cols(reader.ParseNextInteger());
129 model.ResizeNumSubsets(num_cols);
130 for (SubsetIndex subset : SubsetRange(num_cols)) {
131 const double cost(reader.ParseNextDouble());
132 model.SetSubsetCost(subset, cost);
133 }
134 for (ElementIndex element : ElementRange(num_rows)) {
135 LOG_EVERY_N_SEC(INFO, 5)
136 << absl::StrFormat("Reading element %d (%.1f%%)", element.value(),
137 Percent(element, model.num_elements()));
138 const RowEntryIndex row_size(reader.ParseNextInteger());
139 for (RowEntryIndex entry(0); entry < row_size; ++entry) {
140 // Correct the 1-indexing.
141 const SubsetIndex subset(reader.ParseNextInteger() - 1);
142 model.AddElementToSubset(element, subset);
143 }
144 }
145 LOG(INFO) << "Read " << model.num_subsets() << " subsets, "
146 << model.num_elements() << " elements";
147 file->Close(file::Defaults()).IgnoreError();
148 model.CreateSparseRowView();
149 return model;
150}
151
152// This is a column-based format where the elements are 1-indexed.
153SetCoverModel ReadOrlibRail(absl::string_view filename) {
154 CHECK_OK(file::Exists(filename, file::Defaults()));
155 SetCoverModel model;
156 File* file(file::OpenOrDie(filename, "r", file::Defaults()));
157 SetCoverReader reader(file);
158 const ElementIndex num_rows(reader.ParseNextInteger());
159 const SubsetIndex num_cols(reader.ParseNextInteger());
160 model.ResizeNumSubsets(num_cols);
161 for (SubsetIndex subset : SubsetRange(num_cols)) {
162 LOG_EVERY_N_SEC(INFO, 5)
163 << absl::StrFormat("Reading subset %d (%.1f%%)", subset.value(),
164 Percent(subset, model.num_subsets()));
165 const double cost(reader.ParseNextDouble());
166 model.SetSubsetCost(subset, cost);
167 const ColumnEntryIndex column_size(reader.ParseNextInteger());
168 model.ReserveNumElementsInSubset(column_size.value(), subset.value());
169 for (const ColumnEntryIndex _ : ColumnEntryRange(column_size)) {
170 // Correct the 1-indexing.
171 const ElementIndex element(reader.ParseNextInteger() - 1);
172 model.AddElementToSubset(element, subset);
173 }
174 }
175 LOG(INFO) << "Read " << model.num_subsets() << " subsets, "
176 << model.num_elements() << " elements";
177 file->Close(file::Defaults()).IgnoreError();
178 model.CreateSparseRowView();
179 return model;
180}
181
182SetCoverModel ReadFimiDat(absl::string_view filename) {
183 CHECK_OK(file::Exists(filename, file::Defaults()));
184 SetCoverModel model;
185 SubsetIndex subset(0);
186 // Read the file once to discover the smallest element index.
187 BaseInt smallest_element = std::numeric_limits<BaseInt>::max();
188 BaseInt largest_element = 0;
189 for (const std::string& line : FileLines(filename)) {
190 std::vector<std::string> elements = absl::StrSplit(line, ' ');
191 if (elements.back().empty() || elements.back()[0] == '\0') {
192 elements.pop_back();
193 }
194 for (const std::string& number_str : elements) {
195 BaseInt element;
196 CHECK(absl::SimpleAtoi(number_str, &element));
197 smallest_element = std::min(smallest_element, element);
198 largest_element = std::max(largest_element, element);
199 }
200 }
201 DLOG(INFO) << "Smallest element: " << smallest_element
202 << ", Largest element: " << largest_element;
203 ElementBoolVector element_seen(largest_element + 1, false);
204 for (const std::string& line : FileLines(filename)) {
205 LOG_EVERY_N_SEC(INFO, 5)
206 << absl::StrFormat("Reading subset %d", subset.value());
207 std::vector<std::string> elements = absl::StrSplit(line, ' ');
208 if (elements.back().empty() || elements.back()[0] == '\0') {
209 elements.pop_back();
210 }
211 model.AddEmptySubset(1);
212 // As there can be repetitions in the data, we need to keep track of the
213 // elements already added to the subset.
214 std::vector<ElementIndex> elements_list;
215 for (const std::string& number_str : elements) {
216 BaseInt raw_element;
217 CHECK(absl::SimpleAtoi(number_str, &raw_element));
218 // Re-index the elements starting from 0.
219 ElementIndex element(raw_element - smallest_element);
220 if (element_seen[element]) {
221 DLOG(INFO) << "Element " << element << " already in subset "
222 << subset.value();
223 continue;
224 }
225 element_seen[element] = true;
226 elements_list.push_back(element);
227 CHECK_GE(element.value(), 0);
228 model.AddElementToLastSubset(element);
229 }
230 // Clean up the list of elements.
231 for (const ElementIndex element : elements_list) {
232 element_seen[element] = false;
233 }
234 ++subset;
235 }
236 LOG(INFO) << "Read " << model.num_subsets() << " subsets, "
237 << model.num_elements() << " elements";
238 model.CreateSparseRowView();
239 return model;
240}
241
242SetCoverModel ReadSetCoverProto(absl::string_view filename, bool binary) {
243 CHECK_OK(file::Exists(filename, file::Defaults()));
244 SetCoverModel model;
245 SetCoverProto message;
246 if (binary) {
247 CHECK_OK(file::GetBinaryProto(filename, &message, file::Defaults()));
248 } else {
249 CHECK_OK(file::GetTextProto(filename, &message, file::Defaults()));
250 }
251 model.ImportModelFromProto(message);
252 return model;
253}
254
255namespace {
256// A class to format data and write it to a file.
257// Text is formatted in chunks of at most max_cols characters.
258// Text is actually written to the file when the current chunk is full or when
259// FlushLine() is called.
260// FlushLine() must be called before closing the file.
261class LineFormatter {
262 public:
263 explicit LineFormatter(File* file)
264 : LineFormatter(file, std::numeric_limits<int64_t>::max()) {}
265 LineFormatter(File* file, int64_t max_cols)
266 : num_cols_(0), max_cols_(max_cols), line_(), file_(file) {}
267 ~LineFormatter() { CHECK(line_.empty()); }
268
269 void Append(absl::string_view text) {
270 const int text_size = text.size();
271 if (!text.empty() && text_size + num_cols_ > max_cols_) {
272 FlushLine();
273 }
274 absl::StrAppend(&line_, text);
275 num_cols_ += text_size;
276 }
277
278 void Append(BaseInt value) { Append(absl::StrCat(value, " ")); }
279
280 void Append(double value) { Append(absl::StrFormat("%.17g ", value)); }
281
282 void FlushLine() {
283 CHECK_OK(
284 file::WriteString(file_, absl::StrCat(line_, "\n"), file::Defaults()));
285 line_.clear();
286 num_cols_ = 0;
287 }
288
289 private:
290 int64_t num_cols_;
291 int64_t max_cols_;
292 std::string line_;
293 File* file_;
294};
295} // namespace
296
297void WriteOrlibScp(const SetCoverModel& model, absl::string_view filename) {
298 File* file(file::OpenOrDie(filename, "w", file::Defaults()));
299 LineFormatter formatter(file);
300 formatter.Append(model.num_elements());
301 formatter.Append(model.num_subsets());
302 formatter.FlushLine();
303 for (const SubsetIndex subset : model.SubsetRange()) {
304 formatter.Append(model.subset_costs()[subset]);
305 }
306 formatter.FlushLine();
307 for (const ElementIndex element : model.ElementRange()) {
308 LOG_EVERY_N_SEC(INFO, 5)
309 << absl::StrFormat("Writing element %d (%.1f%%)", element.value(),
310 Percent(element, model.num_elements()));
311 formatter.Append(absl::StrCat(model.rows()[element].size(), "\n"));
312 for (const SubsetIndex subset : model.rows()[element]) {
313 formatter.Append(subset.value() + 1);
314 }
315 formatter.FlushLine();
316 }
317 LOG(INFO) << "Finished writing the model.";
318 file->Close(file::Defaults()).IgnoreError();
319}
320
321// Beware the fact that elements written are converted to 1-indexed.
322void WriteOrlibRail(const SetCoverModel& model, absl::string_view filename) {
323 File* file(file::OpenOrDie(filename, "w", file::Defaults()));
324 CHECK_OK(file::WriteString(
325 file, absl::StrCat(model.num_elements(), " ", model.num_subsets(), "\n"),
326 file::Defaults()));
327 LineFormatter formatter(file);
328 for (const SubsetIndex subset : model.SubsetRange()) {
329 LOG_EVERY_N_SEC(INFO, 5)
330 << absl::StrFormat("Writing subset %d (%.1f%%)", subset.value(),
331 Percent(subset, model.num_subsets()));
332 formatter.Append(model.subset_costs()[subset]);
333 formatter.Append(static_cast<BaseInt>(model.columns()[subset].size()));
334 for (const ElementIndex element : model.columns()[subset]) {
335 formatter.Append(element.value() + 1);
336 }
337 formatter.FlushLine();
338 }
339 LOG(INFO) << "Finished writing the model.";
340 file->Close(file::Defaults()).IgnoreError();
341}
342
343void WriteSetCoverProto(const SetCoverModel& model, absl::string_view filename,
344 bool binary) {
345 const SetCoverProto message = model.ExportModelAsProto();
346 if (binary) {
347 CHECK_OK(file::SetBinaryProto(filename, message, file::Defaults()));
348 } else {
349 CHECK_OK(file::SetTextProto(filename, message, file::Defaults()));
350 }
351}
352
353SubsetBoolVector ReadSetCoverSolutionText(absl::string_view filename) {
354 CHECK_OK(file::Exists(filename, file::Defaults()));
356 File* file(file::OpenOrDie(filename, "r", file::Defaults()));
357 SetCoverReader reader(file);
358 const BaseInt num_cols(reader.ParseNextInteger());
359 solution.resize(num_cols, false);
360 const BaseInt cardinality(reader.ParseNextInteger());
361 for (int i = 0; i < cardinality; ++i) {
362 // NOTE(user): The solution is 0-indexed.
363 const SubsetIndex subset(reader.ParseNextInteger());
364 solution[subset] = true;
365 }
366 file->Close(file::Defaults()).IgnoreError();
367 return solution;
368}
369
371 bool binary) {
372 CHECK_OK(file::Exists(filename, file::Defaults()));
375 if (binary) {
376 CHECK_OK(file::GetBinaryProto(filename, &message, file::Defaults()));
377 } else {
378 CHECK_OK(file::GetTextProto(filename, &message, file::Defaults()));
379 }
380 solution.resize(message.num_subsets(), false);
381 // NOTE(user): The solution is 0-indexed.
382 for (const BaseInt subset : message.subset()) {
383 solution[SubsetIndex(subset)] = true;
384 }
385 return solution;
386}
387
390 absl::string_view filename) {
391 File* file(file::OpenOrDie(filename, "w", file::Defaults()));
392 BaseInt cardinality(0);
393 Cost cost(0);
394 for (SubsetIndex subset(0); subset.value() < solution.size(); ++subset) {
395 if (solution[subset]) {
396 ++cardinality;
397 cost += model.subset_costs()[subset];
398 }
399 }
400 CHECK_OK(file::WriteString(
401 file, absl::StrCat(solution.size(), " ", cardinality, " ", cost, "\n"),
402 file::Defaults()));
403 LineFormatter formatter(file);
404 for (BaseInt subset(0); subset < solution.size(); ++subset) {
405 if (solution[SubsetIndex(subset)]) {
406 formatter.Append(subset);
407 }
408 }
409 formatter.FlushLine();
410 file->Close(file::Defaults()).IgnoreError();
411}
412
415 absl::string_view filename, bool binary) {
417 message.set_num_subsets(solution.size());
418 Cost cost(0);
419 for (SubsetIndex subset(0); subset.value() < solution.size(); ++subset) {
420 if (solution[subset]) {
421 message.add_subset(subset.value());
422 cost += model.subset_costs()[subset];
423 }
424 }
425 message.set_cost(cost);
426 if (binary) {
427 CHECK_OK(file::SetBinaryProto(filename, message, file::Defaults()));
428 } else {
429 CHECK_OK(file::SetTextProto(filename, message, file::Defaults()));
430 }
431}
432} // namespace operations_research
Implements the minimum interface for a range-based for loop iterator.
Definition file.h:29
Main class for describing a weighted set-covering problem.
void ImportModelFromProto(const SetCoverProto &message)
Imports the model from a SetCoverProto.
void ResizeNumSubsets(BaseInt num_subsets)
void ReserveNumElementsInSubset(BaseInt num_elements, BaseInt subset)
Reserves num_elements rows in the column indexed by subset.
util_intops::StrongIntRange< ElementIndex > ElementRange() const
void AddElementToLastSubset(BaseInt element)
const SparseRowView & rows() const
Row view of the set covering problem.
util_intops::StrongIntRange< SubsetIndex > SubsetRange() const
Access to the ranges of subsets and elements.
void AddElementToSubset(BaseInt element, BaseInt subset)
void SetSubsetCost(BaseInt subset, Cost cost)
const SubsetCostVector & subset_costs() const
Vector of costs for each subset.
SetCoverProto ExportModelAsProto() const
const SparseColumnView & columns() const
Column view of the set covering problem.
Definition file.cc:326
absl::Status Exists(absl::string_view path, Options options)
Definition file.cc:499
absl::Status SetBinaryProto(absl::string_view file_name, const google::protobuf::Message &proto, Options options)
Definition file.cc:475
absl::Status SetTextProto(absl::string_view file_name, const google::protobuf::Message &proto, Options options)
Definition file.cc:447
absl::Status GetTextProto(absl::string_view file_name, google::protobuf::Message *proto, Options options)
-— Protobuf API -—
Definition file.cc:408
File * OpenOrDie(absl::string_view file_name, absl::string_view mode, Options options)
Definition file.cc:339
absl::Status WriteString(File *file, absl::string_view contents, Options options)
Definition file.cc:377
absl::Status GetBinaryProto(const absl::string_view file_name, google::protobuf::Message *proto, Options options)
Definition file.cc:462
Options Defaults()
Definition file.h:85
In SWIG mode, we don't want anything besides these top-level includes.
SetCoverModel ReadOrlibScp(absl::string_view filename)
This is a row-based format where the elements are 1-indexed.
void WriteSetCoverProto(const SetCoverModel &model, absl::string_view filename, bool binary)
SetCoverModel ReadSetCoverProto(absl::string_view filename, bool binary)
void WriteSetCoverSolutionProto(const SetCoverModel &model, const SubsetBoolVector &solution, absl::string_view filename, bool binary)
Select next search node to expand Select next item_i to add this new search node to the search Generate a new search node where item_i is not in the knapsack Check validity of this new partial solution(using propagators) - If valid
util_intops::StrongIntRange< SubsetIndex > SubsetRange
Definition base_types.h:54
SubsetBoolVector ReadSetCoverSolutionText(absl::string_view filename)
util_intops::StrongIntRange< ColumnEntryIndex > ColumnEntryRange
Definition base_types.h:56
void WriteSetCoverSolutionText(const SetCoverModel &model, const SubsetBoolVector &solution, absl::string_view filename)
SetCoverModel ReadOrlibRail(absl::string_view filename)
This is a column-based format where the elements are 1-indexed.
util_intops::StrongVector< SubsetIndex, bool > SubsetBoolVector
Definition base_types.h:71
util_intops::StrongVector< ElementIndex, bool > ElementBoolVector
Definition base_types.h:72
void WriteOrlibScp(const SetCoverModel &model, absl::string_view filename)
void WriteOrlibRail(const SetCoverModel &model, absl::string_view filename)
Beware the fact that elements written are converted to 1-indexed.
SubsetBoolVector ReadSetCoverSolutionProto(absl::string_view filename, bool binary)
double Cost
Basic non-strict type for cost. The speed penalty for using double is ~2%.
Definition base_types.h:32
util_intops::StrongIntRange< ElementIndex > ElementRange
Definition base_types.h:55
SetCoverModel ReadFimiDat(absl::string_view filename)
STL namespace.