Google OR-Tools v9.12
a fast and portable software suite for combinatorial optimization
Loading...
Searching...
No Matches
set_cover_reader.cc
Go to the documentation of this file.
1// Copyright 2010-2025 Google LLC
2// Licensed under the Apache License, Version 2.0 (the "License");
3// you may not use this file except in compliance with the License.
4// You may obtain a copy of the License at
5//
6// http://www.apache.org/licenses/LICENSE-2.0
7//
8// Unless required by applicable law or agreed to in writing, software
9// distributed under the License is distributed on an "AS IS" BASIS,
10// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
11// See the License for the specific language governing permissions and
12// limitations under the License.
13
15
16#include <sys/types.h>
17
18#include <cctype>
19#include <cstddef>
20#include <cstdint>
21#include <limits>
22#include <string>
23#include <vector>
24
25#include "absl/log/check.h"
26#include "absl/strings/numbers.h"
27#include "absl/strings/str_cat.h"
28#include "absl/strings/str_format.h"
29#include "absl/strings/str_split.h"
30#include "absl/strings/string_view.h"
31#include "ortools/algorithms/set_cover.pb.h"
33#include "ortools/base/file.h"
38
39namespace operations_research {
40
42 public:
43 explicit SetCoverReader(File* file);
44 absl::string_view GetNextToken();
45 double ParseNextDouble();
46 int64_t ParseNextInteger();
47
48 private:
49 size_t SkipBlanks(size_t pos) const;
50 size_t SkipNonBlanks(size_t pos) const;
51 FileLineIterator line_iter_;
52 absl::string_view line_;
53 int start_pos_;
54 int end_pos_;
55};
56
58 : line_iter_(file, FileLineIterator::REMOVE_INLINE_CR |
59 FileLineIterator::REMOVE_BLANK_LINES),
60 start_pos_(0),
61 end_pos_(0) {
62 line_ = *line_iter_;
63}
64
65size_t SetCoverReader::SkipBlanks(size_t pos) const {
66 const size_t size = line_.size();
67 // As it is expected that the blanks will be spaces, we can skip them faster
68 // by checking for spaces only.
69 for (; pos < size && line_[pos] == ' '; ++pos) {
70 }
71 // We skip all forms of blanks to be on the safe side.
72 for (; pos < size && std::isspace(line_[pos]); ++pos) {
73 }
74 return pos;
75}
76
77size_t SetCoverReader::SkipNonBlanks(size_t pos) const {
78 const size_t size = line_.size();
79 for (; pos < size && !std::isspace(line_[pos]); ++pos) {
80 }
81 return pos;
82}
83
84absl::string_view SetCoverReader::GetNextToken() {
85 const size_t size = line_.size();
86 start_pos_ = SkipBlanks(end_pos_);
87 if (start_pos_ >= size) {
88 ++line_iter_;
89 line_ = *line_iter_;
90 start_pos_ = SkipBlanks(0);
91 }
92 end_pos_ = SkipNonBlanks(start_pos_);
93 return line_.substr(start_pos_, end_pos_ - start_pos_);
94}
95
97 double value = 0;
98 CHECK(absl::SimpleAtod(GetNextToken(), &value));
99 return value;
100}
101
103 int64_t value = 0;
104 CHECK(absl::SimpleAtoi(GetNextToken(), &value));
105 return value;
106}
107
108// This is a row-based format where the elements are 1-indexed.
109SetCoverModel ReadOrlibScp(absl::string_view filename) {
110 SetCoverModel model;
111 File* file(file::OpenOrDie(filename, "r", file::Defaults()));
112 SetCoverReader reader(file);
113 const ElementIndex num_rows(reader.ParseNextInteger());
114 const SubsetIndex num_cols(reader.ParseNextInteger());
115 model.ReserveNumSubsets(num_cols.value());
116 for (SubsetIndex subset : SubsetRange(num_cols)) {
117 const double cost(reader.ParseNextDouble());
118 model.SetSubsetCost(subset.value(), cost);
119 }
120 for (ElementIndex element : ElementRange(num_rows)) {
121 LOG_EVERY_N_SEC(INFO, 5)
122 << absl::StrFormat("Reading element %d (%.1f%%)", element.value(),
123 100.0 * element.value() / model.num_elements());
124 const RowEntryIndex row_size(reader.ParseNextInteger());
125 for (RowEntryIndex entry(0); entry < row_size; ++entry) {
126 // Correct the 1-indexing.
127 const int subset(reader.ParseNextInteger() - 1);
128 model.AddElementToSubset(element.value(), subset);
129 }
130 }
131 LOG(INFO) << "Finished reading the model.";
132 file->Close(file::Defaults()).IgnoreError();
133 model.CreateSparseRowView();
134 return model;
135}
136
137// This is a column-based format where the elements are 1-indexed.
138SetCoverModel ReadOrlibRail(absl::string_view filename) {
139 SetCoverModel model;
140 File* file(file::OpenOrDie(filename, "r", file::Defaults()));
141 SetCoverReader reader(file);
142 const ElementIndex num_rows(reader.ParseNextInteger());
143 const BaseInt num_cols(reader.ParseNextInteger());
144 model.ReserveNumSubsets(num_cols);
145 for (BaseInt subset(0); subset < num_cols; ++subset) {
146 LOG_EVERY_N_SEC(INFO, 5)
147 << absl::StrFormat("Reading subset %d (%.1f%%)", subset,
148 100.0 * subset / model.num_subsets());
149 const double cost(reader.ParseNextDouble());
150 model.SetSubsetCost(subset, cost);
151 const ColumnEntryIndex column_size(reader.ParseNextInteger());
152 model.ReserveNumElementsInSubset(column_size.value(), subset);
153 for (const ColumnEntryIndex _ : ColumnEntryRange(column_size)) {
154 // Correct the 1-indexing.
155 const ElementIndex element(reader.ParseNextInteger() - 1);
156 model.AddElementToSubset(element.value(), subset);
157 }
158 }
159 LOG(INFO) << "Finished reading the model.";
160 file->Close(file::Defaults()).IgnoreError();
161 model.CreateSparseRowView();
162 return model;
163}
164
165SetCoverModel ReadFimiDat(absl::string_view filename) {
166 SetCoverModel model;
167 BaseInt subset(0);
168 for (const std::string& line : FileLines(filename)) {
169 LOG_EVERY_N_SEC(INFO, 5)
170 << absl::StrFormat("Reading subset %d (%.1f%%)", subset,
171 100.0 * subset / model.num_subsets());
172 std::vector<std::string> elements = absl::StrSplit(line, ' ');
173 if (elements.back().empty() || elements.back()[0] == '\0') {
174 elements.pop_back();
175 }
176 model.AddEmptySubset(1);
177 for (const std::string& number : elements) {
178 BaseInt element;
179 CHECK(absl::SimpleAtoi(number, &element));
180 CHECK_GT(element, 0);
181 // Correct the 1-indexing.
182 model.AddElementToLastSubset(ElementIndex(element - 1));
183 }
184 ++subset;
185 }
186 LOG(INFO) << "Finished reading the model.";
187 model.CreateSparseRowView();
188 return model;
189}
190
191SetCoverModel ReadSetCoverProto(absl::string_view filename, bool binary) {
192 SetCoverModel model;
193 SetCoverProto message;
194 if (binary) {
195 CHECK_OK(file::GetBinaryProto(filename, &message, file::Defaults()));
196 } else {
197 CHECK_OK(file::GetTextProto(filename, &message, file::Defaults()));
198 }
199 model.ImportModelFromProto(message);
200 return model;
201}
202
203namespace {
204// A class to format data and write it to a file.
205// Text is formatted in chunks of at most max_cols characters.
206// Text is actually written to the file when the current chunk is full or when
207// FlushLine() is called.
208// FlushLine() must be called before closing the file.
209class LineFormatter {
210 public:
211 explicit LineFormatter(File* file)
212 : LineFormatter(file, std::numeric_limits<int64_t>::max()) {}
213 LineFormatter(File* file, int64_t max_cols)
214 : num_cols_(0), max_cols_(max_cols), line_(), file_(file) {}
215 ~LineFormatter() { CHECK(line_.empty()); }
216
217 void Append(absl::string_view text) {
218 const int text_size = text.size();
219 if (!text.empty() && text_size + num_cols_ > max_cols_) {
220 FlushLine();
221 }
222 absl::StrAppend(&line_, text);
223 num_cols_ += text_size;
224 }
225
226 void Append(BaseInt value) { Append(absl::StrCat(value, " ")); }
227
228 void Append(double value) { Append(absl::StrFormat("%.17g ", value)); }
229
230 void FlushLine() {
231 CHECK_OK(
232 file::WriteString(file_, absl::StrCat(line_, "\n"), file::Defaults()));
233 line_.clear();
234 num_cols_ = 0;
235 }
236
237 private:
238 int64_t num_cols_;
239 int64_t max_cols_;
240 std::string line_;
241 File* file_;
242};
243} // namespace
244
245void WriteOrlibScp(const SetCoverModel& model, absl::string_view filename) {
246 File* file(file::OpenOrDie(filename, "w", file::Defaults()));
247 LineFormatter formatter(file);
248 formatter.Append(model.num_elements());
249 formatter.Append(model.num_subsets());
250 formatter.FlushLine();
251 for (const SubsetIndex subset : model.SubsetRange()) {
252 formatter.Append(model.subset_costs()[subset]);
253 }
254 formatter.FlushLine();
255 for (const ElementIndex element : model.ElementRange()) {
256 LOG_EVERY_N_SEC(INFO, 5)
257 << absl::StrFormat("Writing element %d (%.1f%%)", element.value(),
258 100.0 * element.value() / model.num_elements());
259 formatter.Append(absl::StrCat(model.rows()[element].size(), "\n"));
260 for (const SubsetIndex subset : model.rows()[element]) {
261 formatter.Append(subset.value() + 1);
262 }
263 formatter.FlushLine();
264 }
265 LOG(INFO) << "Finished writing the model.";
266 file->Close(file::Defaults()).IgnoreError();
267}
268
269// Beware the fact that elements written are converted to 1-indexed.
270void WriteOrlibRail(const SetCoverModel& model, absl::string_view filename) {
271 File* file(file::OpenOrDie(filename, "w", file::Defaults()));
272 CHECK_OK(file::WriteString(
273 file, absl::StrCat(model.num_elements(), " ", model.num_subsets(), "\n"),
274 file::Defaults()));
275 LineFormatter formatter(file);
276 for (const SubsetIndex subset : model.SubsetRange()) {
277 LOG_EVERY_N_SEC(INFO, 5)
278 << absl::StrFormat("Writing subset %d (%.1f%%)", subset.value(),
279 100.0 * subset.value() / model.num_subsets());
280 formatter.Append(model.subset_costs()[subset]);
281 formatter.Append(static_cast<BaseInt>(model.columns()[subset].size()));
282 for (const ElementIndex element : model.columns()[subset]) {
283 formatter.Append(element.value() + 1);
284 }
285 formatter.FlushLine();
286 }
287 LOG(INFO) << "Finished writing the model.";
288 file->Close(file::Defaults()).IgnoreError();
289}
290
291void WriteSetCoverProto(const SetCoverModel& model, absl::string_view filename,
292 bool binary) {
293 const SetCoverProto message = model.ExportModelAsProto();
294 if (binary) {
295 CHECK_OK(file::SetBinaryProto(filename, message, file::Defaults()));
296 } else {
297 CHECK_OK(file::SetTextProto(filename, message, file::Defaults()));
298 }
299}
300
301SubsetBoolVector ReadSetCoverSolutionText(absl::string_view filename) {
303 File* file(file::OpenOrDie(filename, "r", file::Defaults()));
304 SetCoverReader reader(file);
305 const BaseInt num_cols(reader.ParseNextInteger());
306 solution.resize(num_cols, false);
307 const BaseInt cardinality(reader.ParseNextInteger());
308 for (int i = 0; i < cardinality; ++i) {
309 // NOTE(user): The solution is 0-indexed.
310 const SubsetIndex subset(reader.ParseNextInteger());
311 solution[subset] = true;
312 }
313 file->Close(file::Defaults()).IgnoreError();
314 return solution;
315}
316
318 bool binary) {
320 SetCoverSolutionResponse message;
321 if (binary) {
322 CHECK_OK(file::GetBinaryProto(filename, &message, file::Defaults()));
323 } else {
324 CHECK_OK(file::GetTextProto(filename, &message, file::Defaults()));
325 }
326 solution.resize(message.num_subsets(), false);
327 // NOTE(user): The solution is 0-indexed.
328 for (const BaseInt subset : message.subset()) {
329 solution[SubsetIndex(subset)] = true;
330 }
331 return solution;
332}
333
336 absl::string_view filename) {
337 File* file(file::OpenOrDie(filename, "w", file::Defaults()));
338 BaseInt cardinality(0);
339 Cost cost(0);
340 for (SubsetIndex subset(0); subset.value() < solution.size(); ++subset) {
341 if (solution[subset]) {
342 ++cardinality;
343 cost += model.subset_costs()[subset];
344 }
345 }
346 CHECK_OK(file::WriteString(
347 file, absl::StrCat(solution.size(), " ", cardinality, " ", cost, "\n"),
348 file::Defaults()));
349 LineFormatter formatter(file);
350 for (BaseInt subset(0); subset < solution.size(); ++subset) {
351 if (solution[SubsetIndex(subset)]) {
352 formatter.Append(subset);
353 }
354 }
355 formatter.FlushLine();
356 file->Close(file::Defaults()).IgnoreError();
357}
358
361 absl::string_view filename, bool binary) {
362 SetCoverSolutionResponse message;
363 message.set_num_subsets(solution.size());
364 Cost cost(0);
365 for (SubsetIndex subset(0); subset.value() < solution.size(); ++subset) {
366 if (solution[subset]) {
367 message.add_subset(subset.value());
368 cost += model.subset_costs()[subset];
369 }
370 }
371 message.set_cost(cost);
372 if (binary) {
373 CHECK_OK(file::SetBinaryProto(filename, message, file::Defaults()));
374 } else {
375 CHECK_OK(file::SetTextProto(filename, message, file::Defaults()));
376 }
377}
378} // namespace operations_research
Implements the minimum interface for a range-based for loop iterator.
Definition file.h:30
Main class for describing a weighted set-covering problem.
void ImportModelFromProto(const SetCoverProto &message)
Imports the model from a SetCoverProto.
void ReserveNumElementsInSubset(BaseInt num_elements, BaseInt subset)
Reserves num_elements rows in the column indexed by subset.
util_intops::StrongIntRange< ElementIndex > ElementRange() const
void AddElementToLastSubset(BaseInt element)
const SparseRowView & rows() const
Row view of the set covering problem.
void ReserveNumSubsets(BaseInt num_subsets)
Reserves num_subsets columns in the model.
util_intops::StrongIntRange< SubsetIndex > SubsetRange() const
Access to the ranges of subsets and elements.
void AddElementToSubset(BaseInt element, BaseInt subset)
void SetSubsetCost(BaseInt subset, Cost cost)
const SubsetCostVector & subset_costs() const
Vector of costs for each subset.
SetCoverProto ExportModelAsProto() const
const SparseColumnView & columns() const
Column view of the set covering problem.
Definition file.cc:169
absl::Status SetTextProto(absl::string_view filename, const google::protobuf::Message &proto, Options options)
Definition file.cc:337
absl::Status SetBinaryProto(absl::string_view filename, const google::protobuf::Message &proto, Options options)
Definition file.cc:360
absl::Status WriteString(File *file, absl::string_view contents, Options options)
Definition file.cc:231
Options Defaults()
Definition file.h:107
absl::Status GetTextProto(absl::string_view filename, google::protobuf::Message *proto, Options options)
Definition file.cc:327
absl::Status GetBinaryProto(const absl::string_view filename, google::protobuf::Message *proto, Options options)
Definition file.cc:348
File * OpenOrDie(absl::string_view filename, absl::string_view mode, Options options)
Definition file.cc:182
In SWIG mode, we don't want anything besides these top-level includes.
SetCoverModel ReadOrlibScp(absl::string_view filename)
This is a row-based format where the elements are 1-indexed.
void WriteSetCoverProto(const SetCoverModel &model, absl::string_view filename, bool binary)
SetCoverModel ReadSetCoverProto(absl::string_view filename, bool binary)
void WriteSetCoverSolutionProto(const SetCoverModel &model, const SubsetBoolVector &solution, absl::string_view filename, bool binary)
Select next search node to expand Select next item_i to add this new search node to the search Generate a new search node where item_i is not in the knapsack Check validity of this new partial solution(using propagators) - If valid
util_intops::StrongIntRange< SubsetIndex > SubsetRange
SubsetBoolVector ReadSetCoverSolutionText(absl::string_view filename)
util_intops::StrongIntRange< ColumnEntryIndex > ColumnEntryRange
void WriteSetCoverSolutionText(const SetCoverModel &model, const SubsetBoolVector &solution, absl::string_view filename)
SetCoverModel ReadOrlibRail(absl::string_view filename)
This is a column-based format where the elements are 1-indexed.
util_intops::StrongVector< SubsetIndex, bool > SubsetBoolVector
void WriteOrlibScp(const SetCoverModel &model, absl::string_view filename)
void WriteOrlibRail(const SetCoverModel &model, absl::string_view filename)
Beware the fact that elements written are converted to 1-indexed.
SubsetBoolVector ReadSetCoverSolutionProto(absl::string_view filename, bool binary)
double Cost
Basic non-strict type for cost. The speed penalty for using double is ~2%.
util_intops::StrongIntRange< ElementIndex > ElementRange
SetCoverModel ReadFimiDat(absl::string_view filename)
STL namespace.