Loading...
Searching...
No Matches
ripser.h
1/* Based on Ripser commit 140670f2c76997404601e43d8054151f46be9fd7
2 * Modification(s):
3 * - YYYY/MM Author: Description of the modification
4 * - 2024 Marc Glisse: Heavy refactoring
5*/
6
7/*
8
9 Ripser: a lean C++ code for computation of Vietoris-Rips persistence barcodes
10
11 MIT License
12
13 Copyright (c) 2015–2021 Ulrich Bauer
14
15 Permission is hereby granted, free of charge, to any person obtaining a copy
16 of this software and associated documentation files (the "Software"), to deal
17 in the Software without restriction, including without limitation the rights
18 to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
19 copies of the Software, and to permit persons to whom the Software is
20 furnished to do so, subject to the following conditions:
21
22 The above copyright notice and this permission notice shall be included in all
23 copies or substantial portions of the Software.
24
25 THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
26 IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
27 FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
28 AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
29 LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
30 OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
31 SOFTWARE.
32
33 You are under no obligation whatsoever to provide any bug fixes, patches, or
34 upgrades to the features, functionality or performance of the source code
35 ("Enhancements") to anyone; however, if you choose to make your Enhancements
36 available either publicly, or directly to the author of this software, without
37 imposing a separate written license agreement for such Enhancements, then you
38 hereby grant the following license: a non-exclusive, royalty-free perpetual
39 license to install, use, modify, prepare derivative works, incorporate into
40 other computer software, distribute, and sublicense such enhancements or
41 derivative works thereof, in binary and source code form.
42
43*/
44
45//#define GUDHI_INDICATE_PROGRESS
46
47// #define GUDHI_RIPSER_USE_BOOST_HEAP // only useful when heap::push dominates?
48// #define GUDHI_RIPSER_USE_HASHMAP_FOR_SPARSE_DIST_MAT // only useful in cases where edge-collapse is more important?
49
50/* TODO:
51 * from branch representative-cocycles
52 - for vertices: brute force check all vertices that are in the same connected component (could do better, but there is already quadraticness elsewhere in most cases, even the output for dim 0 may be quadratic)
53 - for others: print_chain just calls iteratively get_pivot on working_reduction_column
54 * from branch representative-cycles
55 - dim 0: trivial
56 - parametrize a number of functions (like add_coboundary) by the (co)boundary iterator so they can be also used for homology
57 - once cohomology is computed for some dim, assemble the relevant simplices (don't forget the essential ones) and reduce them homology-style. I think we have 2 choices: reduce the birth and check which columns we add (only possibility for essential classes, but do we really need to do cohomology first if we are going to do that?), or reduce the death and look at the column after reduction (Ripser(er) chose this).
58 * check out the persistence image branch
59
60 * allow non-0 filtration value on vertices, so we can handle all flag-type filtrations, not just plain Rips. cf scikit-tda
61*/
62
63#ifndef GUDHI_RIPSER_H
64#define GUDHI_RIPSER_H
65
66#include <algorithm>
67#include <cmath> // sqrt
68#include <numeric>
69#include <queue>
70#include <optional>
71#include <stdexcept>
72#ifdef GUDHI_INDICATE_PROGRESS
73#include <chrono>
74#endif
75
76#include <boost/range/iterator_range_core.hpp>
77#include <boost/version.hpp>
78#if BOOST_VERSION >= 108100
79#include <boost/unordered/unordered_flat_map.hpp>
80#else
81#include <boost/unordered_map.hpp>
82#endif
83
84#ifdef GUDHI_RIPSER_USE_BOOST_HEAP
85#include <boost/heap/d_ary_heap.hpp>
86#endif
87
88#include <gudhi/uint128.h>
89#include <gudhi/Debug_utils.h>
90
91
92namespace Gudhi::ripser {
93
94#define GUDHI_assert(X) GUDHI_CHECK(X, std::logic_error(""))
95
96#ifdef GUDHI_RIPSER_USE_BOOST_HEAP
97template <class T, class, class C>
98using Heap = boost::heap::d_ary_heap<T, boost::heap::arity<8>, boost::heap::compare<C>>;
99#else
100template <class T, class V, class C>
101struct Heap : std::priority_queue<T, V, C> {
102 typedef std::priority_queue<T, V, C> Base;
103 using Base::Base;
104 void clear() { this->c.clear(); }
105};
106#endif
107
108template<class vertex_t_>
109class Union_find {
110 public:
111 typedef vertex_t_ vertex_t;
112 private:
113 std::vector<vertex_t> parent;
114 std::vector<uint8_t> rank;
115 public:
116 Union_find(const vertex_t n) : parent(n), rank(n, 0) {
117 for (vertex_t i = 0; i < n; ++i) parent[i] = i;
118 }
119
120 // TODO: check which one is faster (probably doesn't matter)
121 vertex_t find(vertex_t x) {
122#if 0
123 vertex_t y = x, z;
124 while ((z = parent[y]) != y) y = z;
125 while ((z = parent[x]) != y) {
126 parent[x] = y;
127 x = z;
128 }
129 return z;
130#else
131 // Path halving
132 vertex_t y = parent[x];
133 vertex_t z = parent[y];
134 while (y != z)
135 {
136 parent[x] = z;
137 x = z;
138 y = parent[x];
139 z = parent[y];
140 }
141 return y;
142#endif
143 }
144
145 void link(vertex_t x, vertex_t y) {
146 // this line is redundant, the caller already does it
147 if ((x = find(x)) == (y = find(y))) return;
148 if (rank[x] > rank[y])
149 parent[y] = x;
150 else {
151 parent[x] = y;
152 if (rank[x] == rank[y]) ++rank[y];
153 }
154 }
155};
156
157template<class coefficient_t>
158bool is_prime(const coefficient_t n) {
159 if (!(n & 1) || n < 2) return n == 2;
160 for (coefficient_t p = 3; p * p <= n; p += 2)
161 if (!(n % p)) return false;
162 return true;
163}
164
165// For pretty printing, modulo 11, we prefer -1 to 10.
166template<class coefficient_t>
167coefficient_t normalize(const coefficient_t n, const coefficient_t modulus) {
168 return n > modulus / 2 ? n - modulus : n;
169}
170
171template<class coefficient_storage_t, class coefficient_t>
172std::vector<coefficient_storage_t> multiplicative_inverse_vector(const coefficient_t m) {
173 std::vector<coefficient_storage_t> inverse(m);
174 if (!is_prime(m))
175 throw std::domain_error("Modulus must be a prime number");
176 if ((m - 1) != (coefficient_storage_t)(m - 1))
177 throw std::overflow_error("Modulus is too large");
178 inverse[1] = 1;
179 // m = a * (m / a) + m % a
180 // Multipying with inverse(a) * inverse(m % a):
181 // 0 = inverse(m % a) * (m / a) + inverse(a) (mod m)
182 for (coefficient_t a = 2; a < m; ++a) inverse[a] = m - (inverse[m % a] * (m / a)) % m;
183 return inverse;
184}
185
186#ifdef GUDHI_INDICATE_PROGRESS
187constexpr std::chrono::milliseconds time_step(40);
188constexpr const char* clear_line="\r\033[K";
189#endif
190
191/* concept DistanceMatrix
192struct DistanceMatrix {
193 typedef Category;
194 typedef vertex_t;
195 typedef value_t;
196 value_t operator()(vertex_t i, vertex_t j) const;
197 vertex_t size() const;
198 // Optional:
199 static const bool is_sparse;
200 vector<vector<vertex_diameter_t>> neighbors;
201};
202*/
203
204class Tag_dense {}; // Use directly, iterate on vertices
205class Tag_sparse {}; // Use directly, iterate on edges
206class Tag_other {}; // Could use directly as dense, but prefer converting it.
207
208template <class Params>
209struct Full_distance_matrix {
210 public:
211 typedef Tag_dense Category;
212 typedef typename Params::vertex_t vertex_t;
213 typedef typename Params::value_t value_t;
214 std::vector<value_t> distances; // TODO: private
215 private:
216 vertex_t n;
217 public:
218 //Full_distance_matrix(std::vector<value_t>&& _distances)
219 // : distances(std::move(_distances)), n(std::sqrt(_distances.size())) {}
220
221 template <typename DistanceMatrix>
222 Full_distance_matrix(const DistanceMatrix& mat)
223 : distances(static_cast<std::size_t>(mat.size()) * mat.size()), n(mat.size()) { // vertex_t is meant for vertices. Using it for edges could be unsafe, so we cast to size_t.
224 for (vertex_t i = 0; i < size(); ++i)
225 for (vertex_t j = 0; j < size(); ++j)
226 distances[i * n + j] = mat(i, j); // If mat::operator() involves computations, should we try to take advantage of the symmetry?
227 }
228
229 // Confusing: why is the order j,i significantly faster than i,j?
230 value_t operator()(const vertex_t j, const vertex_t i) const {
231 return distances[i * n + j];
232 }
233 vertex_t size() const { return n; }
234};
235
236enum Compressed_matrix_layout { LOWER_TRIANGULAR, UPPER_TRIANGULAR };
237
238template <class Params, Compressed_matrix_layout Layout>
239struct Compressed_distance_matrix {
240 public:
241 typedef Tag_dense Category;
242 typedef typename Params::vertex_t vertex_t;
243 typedef typename Params::value_t value_t;
244 std::vector<value_t> distances; // TODO: private
245 private:
246 std::vector<value_t*> rows; // Surprisingly, this is more efficient than computing i*(i-1)/2
247
248 public:
249 Compressed_distance_matrix(std::vector<value_t>&& _distances)
250 : distances(std::move(_distances)), rows((1 + std::sqrt(1 + 8 * distances.size())) / 2) {
251 GUDHI_assert(distances.size() == (std::size_t)size() * (size() - 1) / 2);
252 init_rows();
253 }
254
255 template <typename DistanceMatrix>
256 Compressed_distance_matrix(const DistanceMatrix& mat)
257 : distances(static_cast<std::size_t>(mat.size()) * (mat.size() - 1) / 2), rows(mat.size()) { // vertex_t is meant for vertices. Using it for edges could be unsafe, so we cast to size_t.
258 init_rows();
259
260 for (vertex_t i = 1; i < size(); ++i)
261 for (vertex_t j = 0; j < i; ++j) rows[i][j] = mat(i, j);
262 }
263
264 value_t operator()(const vertex_t i, const vertex_t j) const {
265 if (i == j) return 0;
266 if ((Layout == LOWER_TRIANGULAR) ? (i < j) : (i > j))
267 return rows[j][i];
268 else
269 return rows[i][j];
270 }
271 vertex_t size() const { return rows.size(); }
272 private:
273 void init_rows() {
274 if constexpr (Layout == LOWER_TRIANGULAR) {
275 value_t* pointer = &distances[0];
276 for (vertex_t i = 1; i < size(); ++i) {
277 rows[i] = pointer;
278 pointer += i;
279 }
280 } else { // UPPER_TRIANGULAR
281 value_t* pointer = &distances[0] - 1;
282 for (vertex_t i = 0; i < size() - 1; ++i) {
283 rows[i] = pointer;
284 pointer += size() - i - 2;
285 }
286 }
287 }
288};
289
290template <class Params>
291struct Sparse_distance_matrix {
292 public:
293 typedef Tag_sparse Category;
294 static constexpr bool is_sparse = true;
295 typedef typename Params::vertex_t vertex_t;
296 typedef typename Params::value_t value_t;
297 struct vertex_diameter_t {
298 vertex_diameter_t() =default;
299 vertex_diameter_t(vertex_t i_, value_t d_) : i(i_), d(d_) {}
300#if 1
301 // gcc is faster with those 2 lines (or with a std::pair) :-(
302 vertex_diameter_t(vertex_diameter_t const&o)noexcept :i(o.i),d(o.d){}
303 vertex_diameter_t&operator=(vertex_diameter_t const&o) noexcept{i=o.i;d=o.d;return*this;}
304#endif
305 vertex_t i; value_t d;
306 friend vertex_t get_vertex(const vertex_diameter_t& i) { return i.i; }
307 friend value_t get_diameter(const vertex_diameter_t& i) { return i.d; }
308 friend bool operator<(vertex_diameter_t const& a, vertex_diameter_t const& b) {
309 if (a.i < b.i) return true;
310 if (a.i > b.i) return false;
311 return a.d < b.d;
312 }
313 };
314
315 std::vector<std::vector<vertex_diameter_t>> neighbors;
316 std::size_t num_edges; // TODO: useless, remove
317
318 private:
319#ifdef GUDHI_RIPSER_USE_HASHMAP_FOR_SPARSE_DIST_MAT
320#if BOOST_VERSION >= 108100
321 boost::unordered_flat_map<std::pair<vertex_t,vertex_t>,value_t> m;
322#else
323 boost::unordered_map<std::pair<vertex_t,vertex_t>,value_t> m;
324#endif
325 // Would a vector<unordered_map> (one map per vertex) have any advantage?
326#endif
327
328 public:
329 Sparse_distance_matrix(std::vector<std::vector<vertex_diameter_t>>&& _neighbors,
330 std::size_t _num_edges = 0)
331 : neighbors(std::move(_neighbors)), num_edges(_num_edges) {init();}
332
333 template <typename DistanceMatrix>
334 Sparse_distance_matrix(const DistanceMatrix& mat, const value_t threshold)
335 : neighbors(mat.size()), num_edges(0) {
336
337 for (vertex_t i = 0; i < size(); ++i)
338 for (vertex_t j = 0; j < size(); ++j)
339 if (i != j) {
340 auto d = mat(i, j);
341 if (d <= threshold) {
342 ++num_edges;
343 neighbors[i].emplace_back(j, d);
344 }
345 }
346 init();
347 }
348
349 value_t operator()(const vertex_t i, const vertex_t j) const {
350#ifdef GUDHI_RIPSER_USE_HASHMAP_FOR_SPARSE_DIST_MAT
351 // We could insert in both orders to save the minmax for each query.
352 return m.at(std::minmax(i,j));
354 // auto it = m.find(std::minmax(i,j));
355 // return (it != m.end()) ? *it : std::numeric_limits<value_t>::infinity();
356#else
357 auto neighbor =
358 std::lower_bound(neighbors[i].begin(), neighbors[i].end(), vertex_diameter_t{j, 0});
359 return (neighbor != neighbors[i].end() && get_vertex(*neighbor) == j)
360 ? get_diameter(*neighbor)
361 : std::numeric_limits<value_t>::infinity();
362#endif
363 }
364 vertex_t size() const { return neighbors.size(); }
365 private:
366 void init() {
367#ifdef GUDHI_RIPSER_USE_HASHMAP_FOR_SPARSE_DIST_MAT
368 for(vertex_t i=0; i<size(); ++i){
369 for(auto n:neighbors[i]){
370 m[std::minmax(i,get_vertex(n))]=get_diameter(n);
371 }
372 }
373#endif
374 }
375};
376
377// Do not feed this directly to ripser (slow), first convert to another matrix type
378template <class Params>
379struct Euclidean_distance_matrix {
380 public:
381 typedef Tag_other Category;
382 typedef typename Params::vertex_t vertex_t;
383 typedef typename Params::value_t value_t;
384
385 Euclidean_distance_matrix(std::vector<std::vector<value_t>>&& _points)
386 : points(std::move(_points)) {
387 for (auto p : points) { GUDHI_assert(p.size() == points.front().size()); }
388 }
389
390 value_t operator()(const vertex_t i, const vertex_t j) const {
391 GUDHI_assert((std::size_t)i < points.size());
392 GUDHI_assert((std::size_t)j < points.size());
393 return std::sqrt(std::inner_product(
394 points[i].begin(), points[i].end(), points[j].begin(), value_t(), std::plus<value_t>(),
395 [](value_t u, value_t v) { return (u - v) * (u - v); }));
396 }
397
398 vertex_t size() const { return points.size(); }
399 private:
400 std::vector<std::vector<value_t>> points;
401};
402
403// The gratuitous restrictions on what can be specialized in C++ are annoying.
404template <class DistanceMatrix, class=std::bool_constant<true>> struct Is_sparse_impl : std::bool_constant<false> {};
405template <class DistanceMatrix> struct Is_sparse_impl<DistanceMatrix, std::bool_constant<DistanceMatrix::is_sparse>> : std::bool_constant<true> {};
406template <class DistanceMatrix> constexpr bool is_sparse () { return Is_sparse_impl<DistanceMatrix>::value; }
407
408template <typename ValueType> class Compressed_sparse_matrix_ {
409 std::vector<size_t> bounds;
410 std::vector<ValueType> entries;
411
412 typedef typename std::vector<ValueType>::const_iterator iterator;
413 typedef boost::iterator_range<iterator> iterator_pair;
414
415 public:
416 iterator_pair subrange(const size_t index) const {
417 return {entries.begin() + (index == 0 ? 0 : bounds[index - 1]),
418 entries.begin() + bounds[index]};
419 }
420
421 // Close the current column, the next push_back will be for a new column
422 void append_column() { bounds.push_back(entries.size()); }
423
424 void push_back(const ValueType e) {
425 GUDHI_assert(0 < bounds.size());
426 entries.push_back(e);
427 ++bounds.back();
428 }
429};
430
431/* concept SimplexEncoding
432struct SimplexEncoding {
433 typedef dimension_t;
434 typedef vertex_t;
435 typedef simplex_t; // has to be an integer
436 SimplexEncoding(vertex_t n_vertices, dimension_t max_dim_plus_one); // max_vertices_per_simplex
437 simplex_t operator()(vertex_t v, dimension_t position) const; // position starts at 1?
438 vertex_t get_max_vertex(simplex_t s, dimension_t position, vertex_t n_vertices) const;
439 int num_extra_bits() const;
440};
441*/
442
443// number of bits necessary to store x with 0 <= x < n
444template <class vertex_t>
445constexpr int log2up(vertex_t n) {
446 --n;
447 int k = 0;
448 while(n>0) { n>>=1; ++k; }
449 return k;
450}
451
452template <class Params>
453class Cns_encoding {
454 public:
455 typedef typename Params::dimension_t dimension_t;
456 typedef typename Params::vertex_t vertex_t;
457 typedef typename Params::simplex_t simplex_t;
458
459 private:
460 std::vector<std::vector<simplex_t>> B; // table of binomial coefficients
461 int extra_bits;
462
463 public:
464 Cns_encoding(vertex_t n, dimension_t k) : B(k + 1, std::vector<simplex_t>(n + 1, 0)) {
465 static_assert(std::numeric_limits<simplex_t>::radix == 2);
466 const int available_bits = std::numeric_limits<simplex_t>::digits;
467 simplex_t max_simplex_index = 0;
468 for (vertex_t i = 0; i <= n; ++i) {
469 B[0][i] = 1;
470 for (dimension_t j = 1; (vertex_t)j < std::min<vertex_t>(i, k + 1); ++j)
471 B[j][i] = B[j - 1][i - 1] + B[j][i - 1];
472 if (i <= k) B[i][i] = 1;
473 vertex_t mi = std::min<vertex_t>(i >> 1, (vertex_t)k); // max
474 max_simplex_index = B[mi][i];
475 if (i > 1 && max_simplex_index < B[mi][i-1]) { // overflow
476 throw std::overflow_error("cannot encode all simplices of dimension " + std::to_string(k) + " with " + std::to_string(n) + " vertices using only " + std::to_string(available_bits) + " bits");
477 }
478 }
479 extra_bits = available_bits - log2up(max_simplex_index + 1);
480 }
481
482 simplex_t operator()(vertex_t n, dimension_t k) const {
483 GUDHI_assert(n >= k - 1);
484 return B[k][n];
485 }
486
487 // We could get `n` from B and avoid passing it as argument
488 vertex_t get_max_vertex(const simplex_t idx, const dimension_t k, const vertex_t n) const {
489 return get_max(n, k - 1, [&](vertex_t w) -> bool { return (*this)(w, k) <= idx; });
490 }
491
492 int num_extra_bits() const { return extra_bits; }
493
494 private:
495 template <class Predicate>
496 static vertex_t get_max(vertex_t top, const vertex_t bottom, const Predicate pred) {
497 if (!pred(top)) {
498 vertex_t count = top - bottom;
499 while (count > 0) {
500 vertex_t step = count >> 1, mid = top - step;
501 if (!pred(mid)) {
502 top = mid - 1;
503 count -= step + 1;
504 } else
505 count = step;
506 }
507 }
508 return top;
509 }
510};
511
512template <class Params>
513class Bitfield_encoding {
514 public:
515 typedef typename Params::dimension_t dimension_t;
516 typedef typename Params::vertex_t vertex_t;
517 typedef typename Params::simplex_t simplex_t;
518
519 private:
520 int bits_per_vertex;
521 int extra_bits;
522
523 public:
524 Bitfield_encoding(vertex_t n, dimension_t k) : bits_per_vertex(log2up(n)) {
525 static_assert(std::numeric_limits<simplex_t>::radix == 2);
526 const int available_bits = std::numeric_limits<simplex_t>::digits;
527 extra_bits = available_bits - bits_per_vertex * k;
528 if (extra_bits < 0)
529 throw std::overflow_error("cannot encode all simplices of dimension " + std::to_string(k - 1) + " with " + std::to_string(n) + " vertices using only " + std::to_string(available_bits) + " bits");
530 // The message is a bit misleading, it is tuples that we cannot encode, and just with this representation.
531 }
532
533 simplex_t operator()(vertex_t n, dimension_t k) const {
534 if(k==0) return 1; // because of odd use in (co)boundary...
535 --k;
536 // USE_N_MINUS_K only useful if it somehow helps remove the test k==0 above
537#ifdef USE_N_MINUS_K
538 return (simplex_t)(n - k) << (bits_per_vertex * k);
539#else
540 return (simplex_t)n << (bits_per_vertex * k);
541#endif
542 }
543
544 vertex_t get_max_vertex(const simplex_t idx, dimension_t k, const vertex_t) const {
545 GUDHI_assert(k > 0);
546 --k;
547#ifdef USE_N_MINUS_K
548 return static_cast<vertex_t>(idx >> (bits_per_vertex * k)) + k;
549#else
550 return static_cast<vertex_t>(idx >> (bits_per_vertex * k));
551#endif
552 }
553
554 int num_extra_bits() const { return extra_bits; }
555};
556
557template <typename DistanceMatrix, typename SimplexEncoding, typename Params> struct Rips_filtration {
558 using size_t = typename Params::size_t;
559 using vertex_t = typename SimplexEncoding::vertex_t;
560 // static_assert(std::is_same_v<vertex_t, typename DistanceMatrix::vertex_t>); // too strict
561 using simplex_t = typename SimplexEncoding::simplex_t;
562 using dimension_t = typename SimplexEncoding::dimension_t;
563 using value_t = typename DistanceMatrix::value_t;
564 using coefficient_storage_t = typename Params::coefficient_storage_t;
565 using coefficient_t = typename Params::coefficient_t;
566 static constexpr bool use_coefficients = Params::use_coefficients;
567
568 // The definition of entry_t could be added in some intermediate layer between SimplexEncoding and here
569 struct entry_with_coeff_t {
570 simplex_t content;
571 entry_with_coeff_t(simplex_t _index, coefficient_t _coefficient, int bits_for_coeff)
572 : content((_index << bits_for_coeff) | (_coefficient - 1)) { GUDHI_assert(_coefficient != 0); }
573 entry_with_coeff_t() {}
574 // We never store a coefficient of 0, so we can store coef-1 so %2 requires 0 bit and %3 only 1 bit
575 friend const entry_with_coeff_t& get_entry(const entry_with_coeff_t& e) { return e; }
576 };
577 simplex_t get_index(const entry_with_coeff_t& e) const { return e.content >> num_bits_for_coeff(); }
578 coefficient_t get_coefficient(const entry_with_coeff_t& e) const { return static_cast<coefficient_t>(e.content & (((simplex_t)1 << num_bits_for_coeff()) - 1)) + 1; }
579 void set_coefficient(entry_with_coeff_t& e, const coefficient_t c) const { GUDHI_assert(c!=0); e.content = (e.content & ((simplex_t)(-1) << num_bits_for_coeff())) | (c - 1); }
580 // Should we cache the masks derived from num_bits_for_coeff?
581
582 struct entry_plain_t {
583 simplex_t index;
584 entry_plain_t(simplex_t _index, coefficient_t, int) : index(_index) {}
585 entry_plain_t() {}
586 friend const entry_plain_t& get_entry(const entry_plain_t& e) { return e; }
587 };
588 static simplex_t get_index(const entry_plain_t& i) { return i.index; }
589 static coefficient_t get_coefficient(const entry_plain_t& i) { return 1; }
590 static void set_coefficient(entry_plain_t& e, const coefficient_t c) { GUDHI_assert(c==1); }
591
592 typedef std::conditional_t<use_coefficients, entry_with_coeff_t, entry_plain_t> entry_t;
593 entry_t make_entry(simplex_t i, coefficient_t c) const { return entry_t(i, c, num_bits_for_coeff()); }
594
595 static_assert(sizeof(entry_t) == sizeof(simplex_t), "size of entry_t is not the same as simplex_t");
596
597 // TODO: avoid storing filtp when !use_coefficients (same for Equal_index and greater_*)
598 struct Entry_hash {
599 Entry_hash(Rips_filtration const& filt) : filtp(&filt) {}
600 Rips_filtration const* filtp;
601 std::size_t operator()(const entry_t& e) const { return boost::hash<simplex_t>()(filtp->get_index(e)); }
602 };
603
604 struct Equal_index {
605 Equal_index(Rips_filtration const& filt) : filtp(&filt) {}
606 Rips_filtration const* filtp;
607 bool operator()(const entry_t& e, const entry_t& f) const {
608 return filtp->get_index(e) == filtp->get_index(f);
609 }
610 };
611
612 struct diameter_simplex_t {
613 value_t diameter;
614 simplex_t index;
615 friend value_t get_diameter(const diameter_simplex_t& i) { return i.diameter; }
616 };
617 static simplex_t get_index(const diameter_simplex_t& i) { return i.index; }
618
619 struct diameter_entry_t : std::pair<value_t, entry_t> {
620 using std::pair<value_t, entry_t>::pair;
621 friend const entry_t& get_entry(const diameter_entry_t& p) { return p.second; }
622 friend entry_t& get_entry(diameter_entry_t& p) { return p.second; }
623 friend value_t get_diameter(const diameter_entry_t& p) { return p.first; }
624 };
625 simplex_t get_index(const diameter_entry_t& p) const { return get_index(get_entry(p)); }
626 coefficient_t get_coefficient(const diameter_entry_t& p) const {
627 return get_coefficient(get_entry(p));
628 }
629 void set_coefficient(diameter_entry_t& p, const coefficient_t c) const {
630 set_coefficient(get_entry(p), c);
631 }
632 diameter_entry_t make_diameter_entry(value_t diameter, simplex_t index, coefficient_t coefficient) const {
633 return diameter_entry_t(diameter, make_entry(index, coefficient));
634 }
635 diameter_entry_t make_diameter_entry(const diameter_simplex_t& diameter_index, coefficient_t coefficient) const {
636 return diameter_entry_t(get_diameter(diameter_index), make_entry(get_index(diameter_index), coefficient));
637 }
638
639 template <typename Entry> struct Greater_diameter_or_smaller_index {
640 Greater_diameter_or_smaller_index(Rips_filtration const& filt) : filtp(&filt) {}
641 Rips_filtration const* filtp;
642 bool operator()(const Entry& a, const Entry& b) const {
643 return (get_diameter(a) > get_diameter(b)) ||
644 ((get_diameter(a) == get_diameter(b)) && (filtp->get_index(a) < filtp->get_index(b)));
645 }
646 };
647
648 const DistanceMatrix dist; // only store a reference instead?
649 const vertex_t n; // redundant with dist?
650 const dimension_t dim_max;
651 const value_t threshold; // It would be nice if this was only in DistanceMatrix, but inconvenient. Only used to list the edges of dense distance matrices.
652 const coefficient_t modulus;
653 const SimplexEncoding simplex_encoding; // only store a reference instead?
654 mutable std::vector<vertex_t> vertices; // we must not have several threads looking at the same complex
655 int bits_for_coeff;
656
657 Rips_filtration(DistanceMatrix&& _dist, dimension_t _dim_max, value_t _threshold, coefficient_t _modulus)
658 : dist(std::move(_dist)), n(dist.size()),
659 dim_max(std::min<vertex_t>(_dim_max, dist.size() - 2)), threshold(_threshold),
660 modulus(_modulus), simplex_encoding(n, dim_max + 2), bits_for_coeff(log2up(modulus-1)) {
661 // See entry_with_coeff_t for the logic for log2up(modulus-1) (storing coeff-1)
662 if (use_coefficients && simplex_encoding.num_extra_bits() < num_bits_for_coeff())
663 // TODO: include relevant numbers in the message
664 throw std::overflow_error("Not enough spare bits in the simplex encoding to store a coefficient");
665 }
666
667 vertex_t num_vertices() const { return n; }
668 int num_bits_for_coeff() const { return bits_for_coeff; }
669
670 simplex_t get_edge_index(const vertex_t i, const vertex_t j) const {
671 return simplex_encoding(i, 2) + j;
672 }
673
674 template <typename OutputIterator>
675 OutputIterator get_simplex_vertices(simplex_t idx, const dimension_t dim, vertex_t n,
676 OutputIterator out) const {
677 --n;
678 for (dimension_t k = dim + 1; k > 1; --k) {
679 n = simplex_encoding.get_max_vertex(idx, k, n);
680 *out++ = n;
681 idx -= simplex_encoding(n, k);
682 }
683 *out = static_cast<vertex_t>(idx);
684 return out;
685 }
686
687 value_t compute_diameter(const simplex_t index, const dimension_t dim) const {
688 value_t diam = -std::numeric_limits<value_t>::infinity();
689
690 vertices.resize(dim + 1);
691 get_simplex_vertices(index, dim, dist.size(), vertices.rbegin());
692
693 for (dimension_t i = 0; i <= dim; ++i)
694 for (dimension_t j = 0; j < i; ++j) {
695 diam = std::max(diam, dist(vertices[i], vertices[j]));
696 }
697 return diam;
698 }
699
700 std::vector<diameter_simplex_t> get_edges() {
701 if constexpr (!std::is_same_v<typename DistanceMatrix::Category, Tag_sparse>) { // Compressed_lower_distance_matrix
702 // TODO: it would be convenient to have DistanceMatrix provide a range of neighbors at dist<=threshold even in the dense case (as a filtered_range)
703 std::vector<diameter_simplex_t> edges;
704 for (vertex_t i = 0; i < n; ++i) {
705 for (vertex_t j = 0; j < i; ++j) {
706 value_t length = dist(i, j);
707 if (length <= threshold) edges.push_back({length, get_edge_index(i, j)});
708 }
709 }
710 return edges;
711 } else { // Sparse_distance_matrix
712 std::vector<diameter_simplex_t> edges;
713 for (vertex_t i = 0; i < n; ++i)
714 for (auto n : dist.neighbors[i]) {
715 vertex_t j = get_vertex(n);
716 if (i > j) edges.push_back({get_diameter(n), get_edge_index(i, j)});
717 }
718 return edges;
719 }
720 }
721
722 // TODO: document in what way (if any) the order matters
723 template<class DistanceMatrix2, class=typename DistanceMatrix2::Category> class Simplex_coboundary_enumerator_ { // Compressed_lower_distance_matrix
724 simplex_t idx_below, idx_above;
725 vertex_t j;
726 dimension_t k;
727 std::vector<vertex_t> vertices;
728 diameter_entry_t simplex;
729 const DistanceMatrix2& dist;
730 const SimplexEncoding& simplex_encoding;
731 const Rips_filtration& parent; // for n and get_simplex_vertices
732 // at least dist and simplex_encoding are redundant with parent, but using parent.dist and parent.simplex_encoding seems to have a bad impact on performance.
733
734 public:
735 Simplex_coboundary_enumerator_(const Rips_filtration& _parent) : dist(_parent.dist),
736 simplex_encoding(_parent.simplex_encoding), parent(_parent) {}
737
738 void set_simplex(const diameter_entry_t _simplex, const dimension_t _dim) {
739 idx_below = parent.get_index(_simplex);
740 idx_above = 0;
741 j = dist.size() - 1;
742 k = _dim + 1;
743 simplex = _simplex;
744 vertices.resize(_dim + 1);
745 parent.get_simplex_vertices(parent.get_index(_simplex), _dim, dist.size(), vertices.rbegin());
746 }
747
748 bool has_next(bool all_cofacets = true) {
749 return (j >= k && (all_cofacets || simplex_encoding(j, k) > idx_below));
750 }
751
752 std::optional<diameter_entry_t> next_raw(bool all_cofacets = true) {
753 if (!has_next(all_cofacets)) return std::nullopt;
754 // this requires simplex_encoding(x,0)>0
755 while (simplex_encoding(j, k) <= idx_below) {
756 idx_below -= simplex_encoding(j, k);
757 idx_above += simplex_encoding(j, k + 1);
758 --j;
759 --k;
760 GUDHI_assert(k != -1);
761 }
762 value_t cofacet_diameter = get_diameter(simplex);
763 // The order of j and i matters for performance
764 for (vertex_t i : vertices) cofacet_diameter = std::max(cofacet_diameter, dist(j, i));
765 simplex_t cofacet_index = idx_above + simplex_encoding(j--, k + 1) + idx_below;
766 coefficient_t cofacet_coefficient = parent.get_coefficient(simplex);
767 if (k & 1) cofacet_coefficient = parent.modulus - cofacet_coefficient;
768 return parent.make_diameter_entry(cofacet_diameter, cofacet_index, cofacet_coefficient);
769 }
770 std::optional<diameter_entry_t> next(bool all_cofacets = true) {
771 while(true) {
772 std::optional<diameter_entry_t> res = next_raw(all_cofacets);
773 if(!res || get_diameter(*res) <= parent.threshold) return res;
774 }
775 }
776 };
777
778 template <class DistanceMatrix2> class Simplex_coboundary_enumerator_<DistanceMatrix2, Tag_sparse> {
779 typedef typename DistanceMatrix2::vertex_diameter_t vertex_diameter_t;
780 simplex_t idx_below, idx_above;
781 dimension_t k;
782 std::vector<vertex_t> vertices;
783 diameter_entry_t simplex;
784 const DistanceMatrix2& dist;
785 const SimplexEncoding& simplex_encoding;
786 std::vector<typename std::vector<vertex_diameter_t>::const_reverse_iterator> neighbor_it;
787 std::vector<typename std::vector<vertex_diameter_t>::const_reverse_iterator> neighbor_end;
788 vertex_diameter_t neighbor;
789 const Rips_filtration& parent; // for n and get_simplex_vertices
790
791 public:
792 Simplex_coboundary_enumerator_(const Rips_filtration& _parent)
793 : dist(_parent.dist),
794 simplex_encoding(_parent.simplex_encoding), parent(_parent) {}
795
796 void set_simplex(const diameter_entry_t _simplex, const dimension_t _dim) {
797 idx_below = parent.get_index(_simplex);
798 idx_above = 0;
799 k = _dim + 1;
800 simplex = _simplex;
801 vertices.resize(_dim + 1);
802 parent.get_simplex_vertices(idx_below, _dim, dist.size(), vertices.rbegin());
803
804 neighbor_it.resize(_dim + 1);
805 neighbor_end.resize(_dim + 1);
806 for (dimension_t i = 0; i <= _dim; ++i) {
807 auto v = vertices[i];
808 neighbor_it[i] = dist.neighbors[v].rbegin();
809 neighbor_end[i] = dist.neighbors[v].rend();
810 }
811 }
812
813 bool has_next(bool all_cofacets = true) {
814 for (auto &it0 = neighbor_it[0], &end0 = neighbor_end[0]; it0 != end0; ++it0) {
815 neighbor = *it0;
816 for (size_t idx = 1; idx < neighbor_it.size(); ++idx) {
817 auto &it = neighbor_it[idx], end = neighbor_end[idx];
818 while (get_vertex(*it) > get_vertex(neighbor))
819 if (++it == end) return false;
820 if (get_vertex(*it) != get_vertex(neighbor))
821 goto continue_outer;
822 else
823 neighbor = std::max(neighbor, *it);
824 }
825 while (k > 0 && vertices[k - 1] > get_vertex(neighbor)) {
826 if (!all_cofacets) return false;
827 idx_below -= simplex_encoding(vertices[k - 1], k);
828 idx_above += simplex_encoding(vertices[k - 1], k + 1);
829 --k;
830 }
831 return true;
832continue_outer:;
833 }
834 return false;
835 }
836
837 std::optional<diameter_entry_t> next_raw(bool all_cofacets = true) {
838 if (!has_next(all_cofacets)) return std::nullopt;
839 ++neighbor_it[0];
840 value_t cofacet_diameter = std::max(get_diameter(simplex), get_diameter(neighbor));
841 simplex_t cofacet_index = idx_above + simplex_encoding(get_vertex(neighbor), k + 1) + idx_below;
842 const coefficient_t modulus = parent.modulus;
843 coefficient_t cofacet_coefficient =
844 (k & 1 ? modulus - 1 : 1) * parent.get_coefficient(simplex) % modulus;
845 return parent.make_diameter_entry(cofacet_diameter, cofacet_index, cofacet_coefficient);
846 }
847 std::optional<diameter_entry_t> next(bool all_cofacets = true) {
848 return next_raw(all_cofacets);
849 }
850 };
851
852 typedef Simplex_coboundary_enumerator_<DistanceMatrix> Simplex_coboundary_enumerator;
853
854 class Simplex_boundary_enumerator {
855 private:
856 simplex_t idx_below, idx_above;
857 vertex_t j;
858 dimension_t k;
859 diameter_entry_t simplex;
860 dimension_t dim;
861 const SimplexEncoding& simplex_encoding;
862 const Rips_filtration& parent; // for n, get_max_vertex, compute_diameter
863
864 public:
865 Simplex_boundary_enumerator(const Rips_filtration& _parent)
866 : simplex_encoding(_parent.simplex_encoding), parent(_parent) {}
867
868 void set_simplex(const diameter_entry_t _simplex, const dimension_t _dim) {
869 idx_below = parent.get_index(_simplex);
870 idx_above = 0;
871 j = parent.n - 1;
872 k = _dim;
873 simplex = _simplex;
874 dim = _dim;
875 }
876
877 bool has_next() { return (k >= 0); }
878
879 std::optional<diameter_entry_t> next() {
880 if (!has_next()) return std::nullopt;
881 j = simplex_encoding.get_max_vertex(idx_below, k + 1, j);
882
883 simplex_t face_index = idx_above - simplex_encoding(j, k + 1) + idx_below;
884
885 // It would make sense to extract the vertices once in set_simplex
886 // and pass the proper subset to compute_diameter, but even in cases
887 // where this dominates it does not seem to help (probably because we
888 // stop at the first coface).
889 value_t face_diameter = parent.compute_diameter(face_index, dim - 1);
890
891 const coefficient_t modulus = parent.modulus;
892 coefficient_t face_coefficient =
893 (k & 1 ? -1 + modulus : 1) * parent.get_coefficient(simplex) % modulus;
894
895 idx_below -= simplex_encoding(j, k + 1);
896 idx_above += simplex_encoding(j, k);
897
898 --k;
899
900 return parent.make_diameter_entry(face_diameter, face_index, face_coefficient);
901 }
902 };
903};
904
905#if BOOST_VERSION >= 108100
906template <class Key, class T, class H, class E> using Hash_map = boost::unordered_flat_map<Key, T, H, E>;
907#else
908template <class Key, class T, class H, class E> using Hash_map = boost::unordered_map<Key, T, H, E>;
909#endif
910
911template <typename Filtration> class Persistent_cohomology {
912 using size_t = typename Filtration::size_t;
913 using coefficient_t = typename Filtration::coefficient_t;
914 using coefficient_storage_t = typename Filtration::coefficient_storage_t;
915 using dimension_t = typename Filtration::dimension_t;
916 using value_t = typename Filtration::value_t;
917 using vertex_t = typename Filtration::vertex_t;
918 using simplex_t = typename Filtration::simplex_t;
919 using diameter_simplex_t = typename Filtration::diameter_simplex_t;
920 using entry_t = typename Filtration::entry_t;
921 using diameter_entry_t = typename Filtration::diameter_entry_t;
922 using Entry_hash = typename Filtration::Entry_hash;
923 using Equal_index = typename Filtration::Equal_index;
924 using Simplex_boundary_enumerator = typename Filtration::Simplex_boundary_enumerator;
925 using Simplex_coboundary_enumerator = typename Filtration::Simplex_coboundary_enumerator;
926 template<class T>using Greater_diameter_or_smaller_index = typename Filtration::template Greater_diameter_or_smaller_index<T>;
927
928 typedef Compressed_sparse_matrix_<diameter_entry_t> Compressed_sparse_matrix;
929 typedef Hash_map<entry_t, size_t, Entry_hash, Equal_index> entry_hash_map;
930
931 Filtration filt;
932 const vertex_t n;
933 const dimension_t dim_max;
934 const coefficient_t modulus;
935 const std::vector<coefficient_storage_t> multiplicative_inverse_;
936 mutable std::vector<diameter_entry_t> cofacet_entries;
937 mutable std::vector<vertex_t> vertices;
938 Simplex_boundary_enumerator facets;
939 // Creating a new one in each function that needs it wastes a bit of time, but we may need 2 at the same time.
940 Simplex_coboundary_enumerator cofacets1, cofacets2;
941
942 coefficient_t multiplicative_inverse(coefficient_t c) const {
943 return multiplicative_inverse_[c];
944 }
945 public:
946 Persistent_cohomology(Filtration&& _filt, dimension_t _dim_max, coefficient_t _modulus)
947 : filt(std::move(_filt)), n(filt.num_vertices()),
948 dim_max(std::min<vertex_t>(_dim_max, n - 2)),
949 modulus(_modulus),
950 multiplicative_inverse_(multiplicative_inverse_vector<coefficient_storage_t>(_modulus)),
951 facets(filt), cofacets1(filt), cofacets2(filt)
952 {
953 }
954
955
956 std::optional<diameter_entry_t> get_zero_pivot_facet(const diameter_entry_t simplex, const dimension_t dim) {
957 facets.set_simplex(simplex, dim);
958 while(true) {
959 std::optional<diameter_entry_t> facet = facets.next();
960 if (!facet) break;
961 if (get_diameter(*facet) == get_diameter(simplex)) return *facet;
962 }
963 return std::nullopt;
964 }
965
966 std::optional<diameter_entry_t> get_zero_pivot_cofacet(const diameter_entry_t simplex, const dimension_t dim) {
967 cofacets1.set_simplex(simplex, dim);
968 while(true) {
969 std::optional<diameter_entry_t> cofacet = cofacets1.next_raw();
970 if (!cofacet) break;
971 if (get_diameter(*cofacet) == get_diameter(simplex)) return *cofacet;
972 }
973 return std::nullopt;
974 }
975
976 // Apparent pairs are implicit in Ripser.
977 // pro: we don't need to store them
978 // con: we may have to recompute them many times, and each test is more expensive than emergent pairs
979 std::optional<diameter_entry_t> get_zero_apparent_facet(const diameter_entry_t simplex, const dimension_t dim) {
980 std::optional<diameter_entry_t> facet = get_zero_pivot_facet(simplex, dim);
981 if (!facet) return std::nullopt;
982 std::optional<diameter_entry_t> cofacet = get_zero_pivot_cofacet(*facet, dim - 1);
983 if (!cofacet || filt.get_index(*cofacet) != filt.get_index(simplex)) return std::nullopt;
984 return *facet;
985 }
986
987 std::optional<diameter_entry_t> get_zero_apparent_cofacet(const diameter_entry_t simplex, const dimension_t dim) {
988 std::optional<diameter_entry_t> cofacet = get_zero_pivot_cofacet(simplex, dim);
989 if (!cofacet) return std::nullopt;
990 std::optional<diameter_entry_t> facet = get_zero_pivot_facet(*cofacet, dim + 1);
991 if (!facet || filt.get_index(*facet) != filt.get_index(simplex)) return std::nullopt;
992 return *cofacet;
993 }
994
995 bool is_in_zero_apparent_pair(const diameter_entry_t simplex, const dimension_t dim) {
996 return get_zero_apparent_cofacet(simplex, dim) || get_zero_apparent_facet(simplex, dim);
997 }
998
999 void assemble_columns_to_reduce(std::vector<diameter_simplex_t>& simplices,
1000 std::vector<diameter_simplex_t>& columns_to_reduce,
1001 entry_hash_map& pivot_column_index, dimension_t dim) {
1002
1003#ifdef GUDHI_INDICATE_PROGRESS
1004 std::cerr << clear_line << "assembling columns" << std::flush;
1005 std::chrono::steady_clock::time_point next = std::chrono::steady_clock::now() + time_step;
1006#endif
1007
1008 columns_to_reduce.clear();
1009 std::vector<diameter_simplex_t> next_simplices;
1010
1011 for (diameter_simplex_t& simplex : simplices) {
1012 cofacets2.set_simplex(filt.make_diameter_entry(simplex, 1), dim - 1);
1013
1014 while(true) {
1015 std::optional<diameter_entry_t> cofacet = cofacets2.next(false);
1016 if (!cofacet) break;
1017#ifdef GUDHI_INDICATE_PROGRESS
1018 if (std::chrono::steady_clock::now() > next) {
1019 std::cerr << clear_line << "assembling " << next_simplices.size()
1020 << " columns (processing " << std::distance(&simplices[0], &simplex)
1021 << "/" << simplices.size() << " simplices)" << std::flush;
1022 next = std::chrono::steady_clock::now() + time_step;
1023 }
1024#endif
1025 if (dim < dim_max) next_simplices.push_back({get_diameter(*cofacet), filt.get_index(*cofacet)});
1026 // Wouldn't it be cheaper in the reverse order? Seems negligible
1027 if (!is_in_zero_apparent_pair(*cofacet, dim) &&
1028 (pivot_column_index.find(get_entry(*cofacet)) == pivot_column_index.end()))
1029 columns_to_reduce.push_back({get_diameter(*cofacet), filt.get_index(*cofacet)});
1030 }
1031 }
1032
1033 if (dim < dim_max) simplices.swap(next_simplices);
1034
1035#ifdef GUDHI_INDICATE_PROGRESS
1036 std::cerr << clear_line << "sorting " << columns_to_reduce.size() << " columns"
1037 << std::flush;
1038#endif
1039
1040 std::sort(columns_to_reduce.begin(), columns_to_reduce.end(),
1041 Greater_diameter_or_smaller_index<diameter_simplex_t>(filt));
1042#ifdef GUDHI_INDICATE_PROGRESS
1043 std::cerr << clear_line << std::flush;
1044#endif
1045 }
1046
1047 template<class OutPair>
1048 void compute_dim_0_pairs(std::vector<diameter_simplex_t>& edges,
1049 std::vector<diameter_simplex_t>& columns_to_reduce, OutPair& output_pair) {
1050 Union_find<vertex_t> dset(n);
1051
1052 edges = filt.get_edges();
1053 std::sort(edges.rbegin(), edges.rend(),
1054 Greater_diameter_or_smaller_index<diameter_simplex_t>(filt));
1055 std::vector<vertex_t> vertices_of_edge(2);
1056 for (auto e : edges) {
1057 // Should we work with pairs of vertices instead of edges, to skip get_simplex_vertices?
1058 filt.get_simplex_vertices(filt.get_index(e), 1, n, vertices_of_edge.rbegin());
1059 vertex_t u = dset.find(vertices_of_edge[0]), v = dset.find(vertices_of_edge[1]);
1060
1061 if (u != v) {
1062 if (get_diameter(e) != 0)
1063 output_pair(0, get_diameter(e));
1064 dset.link(u, v);
1065 } else if ((dim_max > 0) && !get_zero_apparent_cofacet(filt.make_diameter_entry(e, 1), 1))
1066 columns_to_reduce.push_back(e);
1067 }
1068 if (dim_max > 0) std::reverse(columns_to_reduce.begin(), columns_to_reduce.end());
1069
1070 for (vertex_t i = 0; i < n; ++i)
1071 if (dset.find(i) == i) output_pair(0, std::numeric_limits<value_t>::infinity());
1072 }
1073
1074 template <typename Column> std::optional<diameter_entry_t> pop_pivot(Column& column) {
1075 while(!column.empty()) { // At this stage the partial sum is 0
1076 diameter_entry_t pivot = column.top();
1077 column.pop();
1078 while(true) { // At this stage the partial sum is led by pivot
1079 if (column.empty() || filt.get_index(column.top()) != filt.get_index(pivot)) return pivot;
1080 coefficient_t sum = (filt.get_coefficient(pivot) + filt.get_coefficient(column.top())) % modulus;
1081 column.pop();
1082 if (sum == 0) {
1083 break;
1084 }
1085 filt.set_coefficient(pivot, sum);
1086 }
1087 }
1088 return std::nullopt;
1089 }
1090
1091 template <typename Column> std::optional<diameter_entry_t> get_pivot(Column& column) {
1092 // We could look for the pivot without needing to push it back, but it does not seem to help in benchmarks.
1093 std::optional<diameter_entry_t> result = pop_pivot(column);
1094 if (result) column.push(*result);
1095 return result;
1096 }
1097
1098 // Either return the pivot in an emergent pair, or fill working_coboundary with the full coboundary (and return its pivot)
1099 template <typename Column>
1100 std::optional<diameter_entry_t> init_coboundary_and_get_pivot(const diameter_entry_t simplex,
1101 Column& working_coboundary, const dimension_t dim,
1102 entry_hash_map& pivot_column_index) {
1103 bool check_for_emergent_pair = true;
1104 cofacet_entries.clear();
1105 cofacets2.set_simplex(simplex, dim);
1106 while(true) {
1107 std::optional<diameter_entry_t> cofacet = cofacets2.next();
1108 if (!cofacet) break;
1109 cofacet_entries.push_back(*cofacet);
1110 if (check_for_emergent_pair && (get_diameter(simplex) == get_diameter(*cofacet))) {
1111 if ((pivot_column_index.find(get_entry(*cofacet)) == pivot_column_index.end()) &&
1112 !get_zero_apparent_facet(*cofacet, dim + 1))
1113 return *cofacet;
1114 check_for_emergent_pair = false;
1115 }
1116 }
1117 for (auto cofacet : cofacet_entries) working_coboundary.push(cofacet);
1118 return get_pivot(working_coboundary);
1119 }
1120
1121 // Beyond apparent/emergent pairs, Ripser does the column reduction eagerly.
1122 // To keep with the lazy paradigm, it would be possible to represent a column as a heap of coboundary_iterator, using the order of the current simplices pointed to by the iterators. The equivalent of `pop` would be ++ on the top iterator, which decreases its priority (or removes it if it reached the end). If we do it right, it could also help us notice if we have twice the same iterator and avoid computing its full coboundary twice (although we may still end up computing the first simplex of the coboundary for both, since it may be the easiest way to detect duplicates without maintaining yet another structure on the side). Another advantage is that its size would be bounded by the number of simplices of dimension d instead of d+1 currently. (Dory seems to do something related but too complicated for me.)
1123 template <typename Column>
1124 void add_simplex_coboundary(const diameter_entry_t simplex, const dimension_t dim,
1125 Column& working_reduction_column, Column& working_coboundary) {
1126 working_reduction_column.push(simplex);
1127 cofacets1.set_simplex(simplex, dim);
1128 while(true) { // stupid C++
1129 std::optional<diameter_entry_t> cofacet = cofacets1.next();
1130 if (!cofacet) break;
1131 working_coboundary.push(*cofacet);
1132 }
1133 }
1134
1135 // add an already reduced column, i.e. add all the simplex coboundaries that were involved in that reduction
1136 template <typename Column>
1137 void add_coboundary(Compressed_sparse_matrix& reduction_matrix,
1138 const std::vector<diameter_simplex_t>& columns_to_reduce,
1139 const size_t index_column_to_add, const coefficient_t factor,
1140 const dimension_t dim, Column& working_reduction_column,
1141 Column& working_coboundary) {
1142 diameter_entry_t column_to_add = filt.make_diameter_entry(columns_to_reduce[index_column_to_add], factor);
1143 add_simplex_coboundary(column_to_add, dim, working_reduction_column, working_coboundary);
1144
1145 for (diameter_entry_t simplex : reduction_matrix.subrange(index_column_to_add)) {
1146 filt.set_coefficient(simplex, filt.get_coefficient(simplex) * factor % modulus);
1147 add_simplex_coboundary(simplex, dim, working_reduction_column, working_coboundary);
1148 }
1149 }
1150
1151 template<class OutPair>
1152 void compute_pairs(const std::vector<diameter_simplex_t>& columns_to_reduce,
1153 entry_hash_map& pivot_column_index, const dimension_t dim, OutPair& output_pair) {
1154 Compressed_sparse_matrix reduction_matrix;
1155 Greater_diameter_or_smaller_index<diameter_entry_t> cmp(filt);
1156 Heap<diameter_entry_t, std::vector<diameter_entry_t>,
1157 Greater_diameter_or_smaller_index<diameter_entry_t>>
1158 working_reduction_column(cmp), working_coboundary(cmp);
1159
1160#ifdef GUDHI_INDICATE_PROGRESS
1161 std::chrono::steady_clock::time_point next = std::chrono::steady_clock::now() + time_step;
1162#endif
1163 for (size_t index_column_to_reduce = 0; index_column_to_reduce < columns_to_reduce.size();
1164 ++index_column_to_reduce) {
1165
1166 diameter_entry_t column_to_reduce = filt.make_diameter_entry(columns_to_reduce[index_column_to_reduce], 1);
1167 value_t diameter = get_diameter(column_to_reduce);
1168
1169 reduction_matrix.append_column();
1170
1171 working_reduction_column.clear(); working_coboundary.clear();
1172
1173 std::optional<diameter_entry_t> pivot = init_coboundary_and_get_pivot(
1174 column_to_reduce, working_coboundary, dim, pivot_column_index);
1175 // When we found an emergent pair, we could avoid checking again below, but it does not seem to gain anything in practice.
1176
1177 while (true) {
1178#ifdef GUDHI_INDICATE_PROGRESS
1179 if (std::chrono::steady_clock::now() > next) {
1180 std::cerr << clear_line << "reducing column " << index_column_to_reduce + 1
1181 << "/" << columns_to_reduce.size() << " (diameter " << diameter << ")"
1182 << std::flush;
1183 next = std::chrono::steady_clock::now() + time_step;
1184 }
1185#endif
1186 if (pivot) {
1187 auto pair = pivot_column_index.find(get_entry(*pivot));
1188 if (pair != pivot_column_index.end()) {
1189 entry_t other_pivot = pair->first;
1190 size_t index_column_to_add = pair->second;
1191 coefficient_t factor =
1192 modulus - filt.get_coefficient(*pivot) *
1193 multiplicative_inverse(filt.get_coefficient(other_pivot)) %
1194 modulus;
1195
1196 // It saves a little bit (3% on an example, 0% on another) if we pass pivot to add_coboundary and avoid pushing entries smaller than pivot in working_coboundary
1197 add_coboundary(reduction_matrix, columns_to_reduce, index_column_to_add,
1198 factor, dim, working_reduction_column, working_coboundary);
1199
1200 pivot = get_pivot(working_coboundary);
1201 } else if (std::optional<diameter_entry_t> e = get_zero_apparent_facet(*pivot, dim + 1); e) {
1202 filt.set_coefficient(*e, modulus - filt.get_coefficient(*e));
1203
1204 add_simplex_coboundary(*e, dim, working_reduction_column, working_coboundary);
1205
1206 pivot = get_pivot(working_coboundary);
1207 } else {
1208 value_t death = get_diameter(*pivot);
1209 output_pair(diameter, death);
1210 pivot_column_index.insert({get_entry(*pivot), index_column_to_reduce});
1211 // CubicalRipser suggests caching the column here, at least if it took many operations to reduce it.
1212
1213 while (true) {
1214 std::optional<diameter_entry_t> e = pop_pivot(working_reduction_column);
1215 if (!e) break;
1216 GUDHI_assert(filt.get_coefficient(*e) > 0);
1217 reduction_matrix.push_back(*e);
1218 }
1219 break;
1220 }
1221 } else {
1222 output_pair(diameter, std::numeric_limits<value_t>::infinity());
1223 break;
1224 }
1225 }
1226 }
1227#ifdef GUDHI_INDICATE_PROGRESS
1228 std::cerr << clear_line << std::flush;
1229#endif
1230 }
1231
1232 // Add a separate output_essential?
1233 // TODO: Should output_pair also take a simplex_t argument?
1234 template<class OutDim, class OutPair>
1235 void compute_barcodes(OutDim&& output_dim, OutPair&& output_pair) {
1236 std::vector<diameter_simplex_t> simplices, columns_to_reduce;
1237
1238 output_dim(0);
1239 compute_dim_0_pairs(simplices, columns_to_reduce, output_pair);
1240
1241 for (dimension_t dim = 1; dim <= dim_max; ++dim) {
1242 entry_hash_map pivot_column_index(0, filt, filt);
1243 pivot_column_index.reserve(columns_to_reduce.size());
1244
1245 output_dim(dim);
1246 compute_pairs(columns_to_reduce, pivot_column_index, dim, output_pair);
1247
1248 if (dim < dim_max)
1249 assemble_columns_to_reduce(simplices, columns_to_reduce, pivot_column_index,
1250 dim + 1);
1251 // If for some odd reason one wanted all the infinite intervals in the last dimension, assemble_columns_to_reduce should give us that.
1252 }
1253 }
1254};
1255
1256// An example of what Params can be
1257#if 0
1258struct Params1 {
1259 // size_t (not always from Params...) is used for counting (ok) and storage for the index of columns in a Hash_map.
1260 // To gain on a pair<entry_t,size_t> by reducing size_t, simplex_t has to be smaller than size_t I guess, which is very small, not worth it.
1261 typedef std::size_t size_t;
1262 typedef float value_t;
1263 typedef int8_t dimension_t; // Does it need to be signed? Experimentally no.
1264 typedef int vertex_t; // Currently needs to be signed for Simplex_coboundary_enumerator_<Compressed_lower_distance_matrix>::has_next. Reducing to int16_t helps perf a bit.
1265 typedef Gudhi::numbers::uint128_t simplex_t;
1266 // We could introduce a smaller edge_t, but it is not trivial to separate and probably not worth it
1267 typedef uint16_t coefficient_storage_t; // For the table of multiplicative inverses
1268 typedef uint_least32_t coefficient_t; // We need x * y % z to work, but promotion from uint16_t to int is not enough
1269 static const bool use_coefficients = false;
1270
1271 // Assumptions used in the code:
1272 // * dimension_t smaller than vertex_t
1273 // Check which ones need to be signed, and which ones could be unsigned instead
1274};
1275#endif
1276
1277// Trying to write a magic function
1278template<class Params, class SimplexEncoding, class DistanceMatrix, class OutDim, class OutPair>
1279void help2(DistanceMatrix&& dist, int dim_max, typename DistanceMatrix::value_t threshold, unsigned modulus, OutDim&& output_dim, OutPair&& output_pair) {
1280 typedef Rips_filtration<DistanceMatrix, SimplexEncoding, Params> Filt;
1281 typedef Persistent_cohomology<Filt> Pcoh;
1282 Filt filt(std::move(dist), dim_max, threshold, modulus);
1283 Pcoh(std::move(filt), dim_max, modulus).compute_barcodes(output_dim, output_pair);
1284}
1285template<bool use_coefficients_, class simplex_t_, class value_t_> struct TParams {
1286 // hardcode most options
1287 typedef std::size_t size_t;
1288 typedef value_t_ value_t;
1289 typedef int8_t dimension_t;
1290 typedef int vertex_t;
1291 typedef simplex_t_ simplex_t;
1292 typedef uint16_t coefficient_storage_t;
1293 typedef uint_least32_t coefficient_t;
1294 static const bool use_coefficients = use_coefficients_;
1295};
1296template<class value_t_> struct TParams2 {
1297 typedef int vertex_t;
1298 typedef value_t_ value_t;
1299};
1300// Choose simplex encoding
1301template<bool use_coefficients, class DistanceMatrix, class OutDim, class OutPair>
1302void help1(DistanceMatrix&& dist, int dim_max, typename DistanceMatrix::value_t threshold, unsigned modulus, OutDim&& output_dim, OutPair&& output_pair) {
1303 auto n = dist.size();
1304 // TODO: if n>=3 we could even go to n-3, because the Rips cannot have homology in dimension n-2
1305 // (e.g. for 3 points, there is no 1-homology)
1306 if (dim_max > n - 2) dim_max = n - 2; // FIXME: duplicated. problem if n unsigned and 0 or 1.
1307 int bits_per_vertex = log2up(n);
1308 int bits_for_coeff = log2up(modulus - 1); // duplicating logic :-( Also, if modulus is something absurd, the diagnostic is too late
1309 int bitfield_size = bits_per_vertex * (dim_max + 2) + bits_for_coeff;
1310 if (bitfield_size <= 64) { // use bitfield-64
1311 typedef TParams<use_coefficients, uint64_t, typename DistanceMatrix::value_t> P;
1312 help2<P, Bitfield_encoding<P>>(std::move(dist), dim_max, threshold, modulus, output_dim, output_pair);
1313 } else if (bitfield_size <= 128) { // use bitfield-128
1314 typedef TParams<use_coefficients, Gudhi::numbers::uint128_t, typename DistanceMatrix::value_t> P;
1315 help2<P, Bitfield_encoding<P>>(std::move(dist), dim_max, threshold, modulus, output_dim, output_pair);
1316 } else { // use cns-128
1317 typedef TParams<use_coefficients, Gudhi::numbers::uint128_t, typename DistanceMatrix::value_t> P;
1318 help2<P, Cns_encoding<P>>(std::move(dist), dim_max, threshold, modulus, output_dim, output_pair);
1319 }
1320 // Does cns-64 have its place?
1321}
1322// Select hardcoded Z/2Z or runtime Z/pZ
1323template<class DistanceMatrix, class OutDim, class OutPair>
1324void ripser(DistanceMatrix dist, int dim_max, typename DistanceMatrix::value_t threshold, unsigned modulus, OutDim&& output_dim, OutPair&& output_pair) {
1325 if (modulus == 2)
1326 help1<false>(std::move(dist), dim_max, threshold, modulus, output_dim, output_pair);
1327 else
1328 help1<true >(std::move(dist), dim_max, threshold, modulus, output_dim, output_pair);
1329}
1330#if 0
1331template<class DMParams, class OutDim, class OutPair>
1332void ripser_auto(Sparse_distance_matrix<DMParams> dist, int dim_max, typename DMParams::value_t threshold, unsigned modulus, OutDim&& output_dim, OutPair&& output_pair) {
1333 ripser(std::move(dist), dim_max, threshold, modulus, output_dim, output_pair);
1334}
1335template<class DMParams, Compressed_matrix_layout Layout, class OutDim, class OutPair>
1336void ripser_auto(Compressed_distance_matrix<DMParams, Layout> dist, int dim_max, typename DMParams::value_t threshold, unsigned modulus, OutDim&& output_dim, OutPair&& output_pair) {
1337 typedef typename DMParams::value_t value_t;
1338 typedef typename DMParams::vertex_t vertex_t;
1339 if (threshold < std::numeric_limits<value_t>::max()) { // or infinity()
1340 Sparse_distance_matrix<DMParams> new_dist(dist, threshold);
1341 ripser(std::move(new_dist), dim_max, threshold, modulus, output_dim, output_pair);
1342 } else {
1343 for (vertex_t i = 0; i < dist.size(); ++i) {
1344 value_t r_i = -std::numeric_limits<value_t>::infinity();
1345 for (vertex_t j = 0; j < dist.size(); ++j)
1346 r_i = std::max(r_i, dist(i, j));
1347 threshold = std::min(threshold, r_i);
1348 // Should we also compute this when an explicit threshold is passed, in case the user passed an unnecessarily large threshold?
1349 }
1350 ripser(std::move(dist), dim_max, threshold, modulus, output_dim, output_pair);
1351 }
1352}
1353// Other = Euclidean. TODO: more robust dispatching... (how?)
1354template<class DistanceMatrix, class OutDim, class OutPair>
1355void ripser_auto(DistanceMatrix dist, int dim_max, typename DistanceMatrix::value_t threshold, unsigned modulus, OutDim&& output_dim, OutPair&& output_pair) {
1356 typedef typename DistanceMatrix::value_t value_t;
1357 typedef TParams2<value_t> P;
1358 if (threshold < std::numeric_limits<value_t>::max()) { // or infinity()
1359 Sparse_distance_matrix<P> new_dist(dist, threshold);
1360 ripser(std::move(new_dist), dim_max, threshold, modulus, output_dim, output_pair);
1361 } else {
1362 Compressed_distance_matrix<P, LOWER_TRIANGULAR> new_dist(dist);
1363 ripser(std::move(new_dist), dim_max, threshold, modulus, output_dim, output_pair);
1364 }
1365}
1366#endif
1367template<class DistanceMatrix, class OutDim, class OutPair>
1368void ripser_auto(DistanceMatrix dist, int dim_max, typename DistanceMatrix::value_t threshold, unsigned modulus, OutDim&& output_dim, OutPair&& output_pair) {
1369 typedef typename DistanceMatrix::vertex_t vertex_t;
1370 typedef typename DistanceMatrix::value_t value_t;
1371 typedef TParams2<value_t> P;
1372 if constexpr (std::is_same_v<typename DistanceMatrix::Category, Tag_sparse>) {
1373 ripser(std::move(dist), dim_max, threshold, modulus, output_dim, output_pair);
1374 } else if (threshold < std::numeric_limits<value_t>::max()) { // or infinity()
1375 Sparse_distance_matrix<P> new_dist(dist, threshold);
1376 ripser(std::move(new_dist), dim_max, threshold, modulus, output_dim, output_pair);
1377 } else if constexpr (std::is_same_v<typename DistanceMatrix::Category, Tag_dense>) {
1378 for (vertex_t i = 0; i < dist.size(); ++i) {
1379 value_t r_i = -std::numeric_limits<value_t>::infinity();
1380 for (vertex_t j = 0; j < dist.size(); ++j)
1381 r_i = std::max(r_i, dist(i, j));
1382 threshold = std::min(threshold, r_i);
1383 // Should we also compute this when an explicit threshold is passed, in case the user passed an unnecessarily large threshold?
1384 }
1385 ripser(std::move(dist), dim_max, threshold, modulus, output_dim, output_pair);
1386 } else {
1387 Compressed_distance_matrix<P, LOWER_TRIANGULAR> new_dist(dist);
1388 ripser_auto(std::move(new_dist), dim_max, threshold, modulus, output_dim, output_pair);
1389 }
1390}
1391// - sparse input -> sparse matrix
1392// - euclidean input & threshold -> sparse matrix (don't build dense matrix)
1393// - euclidean input & !threshold -> dense matrix
1394// - dense matrix & threshold -> sparse matrix
1395// - dense matrix & !threshold -> compute minmax, keep dense
1396}
1397#undef GUDHI_assert
1398#endif // GUDHI_RIPSER_H
1399
1400/* Relevant benchmarks where different functions dominate the profile
1401# push
1402ripser --format point-cloud ripser/examples/o3_1024.txt
1403# pop
1404ripser --format point-cloud ripser/examples/o3_4096.txt --threshold 1.6
1405# apparent_pair (get_simplex_vertices + dist) - edge_collapse would help
1406ripser --format point-cloud --dim 2 --threshold .7 tore
1407# apparent_pair (get_simplex_vertices) - no edge collapse possible
1408ripser --format point-cloud circle24 --dim 25
1409
1410where equivalent datasets can be obtained through
1411 tore -> tore3D_1307.off
1412 circle24:
1413 t=np.linspace(0, 2*np.pi, 24, endpoint=False)
1414 np.stack((np.cos(t),np.sin(t))).T
1415 OR
1416 gudhi.datasets.generators.points.torus(24,1,'grid')
1417 o3 (?):
1418 scipy.stats.ortho_group.rvs(3,4096).reshape(-1,9)
1419*/
Computes the persistent cohomology of a filtered complex.
Definition Persistent_cohomology.h:54
STL namespace.