Loading [MathJax]/extensions/tex2jax.js
PISM, A Parallel Ice Sheet Model 2.2.1-cd005eec8 committed by Constantine Khrulev on 2025-03-07
All Classes Namespaces Files Functions Variables Typedefs Enumerations Enumerator Friends Macros Pages
label_components_parallel.cc
Go to the documentation of this file.
1/* Copyright (C) 2023, 2024 PISM Authors
2 *
3 * This file is part of PISM.
4 *
5 * PISM is free software; you can redistribute it and/or modify it under the
6 * terms of the GNU General Public License as published by the Free Software
7 * Foundation; either version 3 of the License, or (at your option) any later
8 * version.
9 *
10 * PISM is distributed in the hope that it will be useful, but WITHOUT ANY
11 * WARRANTY; without even the implied warranty of MERCHANTABILITY or FITNESS
12 * FOR A PARTICULAR PURPOSE. See the GNU General Public License for more
13 * details.
14 *
15 * You should have received a copy of the GNU General Public License
16 * along with PISM; if not, write to the Free Software
17 * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
18 */
19
20#include "pism/util/connected_components/label_components.hh"
21#include "pism/util/connected_components/label_components_impl.hh"
22#include "pism/util/Grid.hh"
23#include "pism/util/array/Scalar.hh"
24
25#include <cmath> // pow, ceil, log10
26#include <map>
27#include <mpi.h>
28#include <queue>
29#include <set>
30
31namespace pism {
32
33namespace details {
34
35//! West boundary of a sub-domain.
36static bool west_boundary(const Grid &grid, int i) {
37 int i_first = grid.xs();
38 return (i == i_first) and (i != 0);
39}
40
41//! East boundary of a sub-domain.
42static bool east_boundary(const Grid &grid, int i) {
43 int i_last = grid.xs() + grid.xm() - 1;
44 return (i == i_last) and (i != (int)grid.Mx() - 1);
45}
46
47//! North boundary of a sub-domain.
48static bool north_boundary(const Grid &grid, int j) {
49 int j_last = grid.ys() + grid.ym() - 1;
50 return (j == j_last) and (j != (int)grid.My() - 1);
51}
52
53//! South boundary of a sub-domain.
54static bool south_boundary(const Grid &grid, int j) {
55 int j_first = grid.ys();
56 return (j == j_first) and (j != 0);
57}
58
59/*!
60 * Inspect sub-domain edges to detect connections between patches owned by individual
61 * sub-domains.
62 */
63static std::map<int, std::set<int> > detect_connections(array::Scalar1 &mask) {
64 auto grid = mask.grid();
65 std::map<int, std::set<int> > graph;
66
67 mask.update_ghosts();
68
69 array::AccessScope list{ &mask };
70
71 auto maybe_add_edge = [&](int a, int b) {
72 if (a > 0 and b > 0) {
73 graph[std::min(a, b)].insert(std::max(a, b));
74 }
75 };
76
77 for (Points p(*grid); p; p.next()) {
78 const int i = p.i(), j = p.j();
79
80 int M = mask.as_int(i, j);
81
82 // add an edge (M, M) to ensure that the graph contains information about patches that
83 // don't touch sub-domain edges. This is needed to be able to re-label them properly
84 // (i.e. to make labels consecutive).
85 //
86 // Note that this is incompatible with the optimization mentioned in
87 // label_components_parallel()
88 maybe_add_edge(M, M);
89
90 if (north_boundary(*grid, j)) {
91 maybe_add_edge(M, mask.as_int(i, j + 1));
92 }
93
94 if (east_boundary(*grid, i)) {
95 maybe_add_edge(M, mask.as_int(i + 1, j));
96 }
97
98 if (south_boundary(*grid, j)) {
99 maybe_add_edge(M, mask.as_int(i, j - 1));
100 }
101
102 if (west_boundary(*grid, i)) {
103 maybe_add_edge(M, mask.as_int(i - 1, j));
104 }
105 } // end of the loop over grid points
106
107 return graph;
108}
109
110/*!
111 * Pack information about connections (graph edges) into the form needed for communication.
112 */
113static std::vector<int> pack_data(const std::map<int, std::set<int> > &graph) {
114 // FIXME (maybe): we could reduce the amount of data we have to send by being more
115 // clever about packing data.
116 std::vector<int> result{};
117 for (const auto &e : graph) {
118 int a = e.first;
119 for (auto b : e.second) {
120 result.push_back(a);
121 result.push_back(b);
122 }
123 }
124 return result;
125}
126
127/*!
128 * Unpack data gathered from all the ranks that contain connected components.
129 */
130static std::map<int, std::set<int> > unpack_data(const std::vector<int> &buffer) {
131 std::map<int, std::set<int> > result;
132 for (size_t n = 0; n < buffer.size() / 2; ++n) {
133 int a = buffer[2 * n + 0];
134 int b = buffer[2 * n + 1];
135
136 // insert *both* (a,b) and (b,a) because we want to be able to easily get a list
137 // of nodes adjacent to a given node
138 result[a].insert(b);
139 result[b].insert(a);
140 }
141
142 return result;
143}
144
145/*!
146 * Assemble complete graph info by gathering edges from all MPI ranks that have at least
147 * one patch.
148 */
149static std::map<int, std::set<int> > assemble_graph(MPI_Comm comm,
150 const std::map<int, std::set<int> > &subgraph) {
151 int size = 0;
152 MPI_Comm_size(comm, &size);
153 int rank = 0;
154 MPI_Comm_rank(comm, &rank);
155
156 auto message = pack_data(subgraph);
157
158 // gather message lengths to prepare the buffer:
159 std::vector<int> message_length(size);
160 message_length[rank] = (int)message.size();
161 MPI_Allgather(MPI_IN_PLACE, 1, MPI_INT, message_length.data(), 1, MPI_INT, comm);
162
163 // compute offsets
164 std::vector<int> offsets(size);
165 offsets[0] = 0;
166 for (int k = 1; k < size; ++k) {
167 offsets[k] = offsets[k - 1] + message_length[k - 1];
168 }
169
170 // allocate the buffer
171 int buffer_size = offsets.back() + message_length.back();
172 std::vector<int> buffer(buffer_size);
173
174 // Gather adjacency info:
175 MPI_Allgatherv(message.data(), (int)message.size(), MPI_INT, buffer.data(), message_length.data(),
176 offsets.data(), MPI_INT, comm);
177
178 // unpack messages:
179 return unpack_data(buffer);
180}
181
182/*!
183 * Traverse the `graph` using a version of the "breadth first search" algorithm and find
184 * final labels for all of its nodes.
185 *
186 * If `mark_isolated_patches` is true, isolated patches get labeled with `1`, the rest
187 * with `0`.
188 *
189 * If `mark_isolated_patches` is false, each connected component in the graph gets a
190 * unique label starting from `1` and using consecutive integers.
191 */
192static std::map<int, int> resolve_labels(const std::map<int, std::set<int> > &graph,
193 bool mark_isolated_patches) {
194
195 // visited[X] == true if we already visited the node with the ID equal to X
196 std::map<int, bool> visited;
197 // map from node IDs to labels
198 std::map<int, int> result;
199
200 // label used for "attached" cells
201 const int attached = 1;
202
203 // Loop over all edges in the graph and start the breadth-first search starting from the
204 // first node of each edge. This is sufficient since if (a,b) is in the graph then (b,a)
205 // is also in the graph (by construction).
206 for (const auto &edge : graph) {
207 int u = edge.first;
208
209 if (visited[u]) {
210 continue;
211 }
212
213 // select a provisional label:
214 int label = u;
215
216 std::set<int> equivalent{};
217
218 std::queue<int> queue;
219 queue.push(u);
220 while (not queue.empty()) {
221 int v = queue.front();
222 queue.pop();
223
224 if (visited[v]) {
225 continue;
226 }
227
228 // Odd labels are used to mark "attached" cells; replace this label with "1". Note
229 // that this is the smallest label we use, so `min(label, v)` will preserve it.
230 label = (v % 2 == 1) ? attached : std::min(label, v);
231
232 equivalent.insert(v);
233 visited[v] = true;
234
235 auto it = graph.find(v);
236 if (it != graph.end()) {
237 for (int w : it->second) {
238 queue.push(w);
239 }
240 }
241 }
242
243 // record the label for all the "equivalent" nodes (i.e. all the nodes in a particular
244 // connected component)
245 for (int v : equivalent) {
246 result[v] = label;
247 }
248 }
249
250 if (mark_isolated_patches) {
251 // mark "attached" cells with `0` and the rest with `1`:
252 for (auto &p : result) {
253 p.second = (p.second == attached) ? 0 : 1;
254 }
255 } else {
256 // make labels consecutive:
257 int current_label = 1;
258 std::map<int, int> new_labels;
259 for (auto &p : result) {
260 if (new_labels.find(p.second) == new_labels.end()) {
261 new_labels[p.second] = current_label++;
262 }
263 p.second = new_labels[p.second];
264 }
265 }
266
267 return result;
268}
269
270//! Assign new labels to elements of `mask`. Does not touch background grid cells.
271/*!
272 * This function assumes that `mask` was labeled using
273 * `connected_components::details::label()`
274 */
275void relabel(array::Scalar &mask, const std::map<int, int> &labels) {
276
277 if (labels.empty()) {
278 return;
279 }
280
281 array::AccessScope list{ &mask };
282
283 for (Points p(*mask.grid()); p; p.next()) {
284 const int i = p.i(), j = p.j();
285
286 int old_label = mask.as_int(i, j);
287
288 auto it = labels.find(old_label);
289 if (it == labels.end()) {
290 continue;
291 }
292
293 mask(i, j) = it->second;
294 }
295}
296
297/*!
298 * Compute the map from intermediate to final labels given a ghosted array `input`.
299 *
300 * The value of the flag `subdomain_is_not_empty` varies from one subdomain to the next
301 * depending on whether it has any foreground pixels.
302 */
303std::map<int, int> final_labels(array::Scalar1 &input, bool subdomain_is_not_empty,
304 bool mark_isolated_patches) {
305 // Iterate over the grid to find connections between patches owned by individual
306 // sub-domains. Updates the ghosts of `mask`.
307 auto edges = details::detect_connections(input);
308
309 // We create a sub-communicator containing *only* the ranks with at least one patch to
310 // reduce the amount of data sent around by the rest of this algorithm. We may be able
311 // to reduce this even further by excluding sub-domains with patches that are *entirely*
312 // contained within a sub-domain. On the other hand, this would make it impossible to
313 // ensure that patches are labeled with consecutive numbers starting from 1 (this
314 // simplifies the code elsewhere).
315 //
316 // This optimization *is* an option if `mark_isolated_patches` is set since in that case
317 // the only values used are `1` for isolated patches and `0` for the rest of the domain.
318 //
319 // Note: depending on the cost of MPI_Comm_split() it may be more efficient to use the
320 // original communicator instead of creating a sub-communicator. We need to do some
321 // profiling...
322 std::map<int, int> labels;
323 {
324 MPI_Comm comm = MPI_COMM_NULL;
325 MPI_Comm_split(input.grid()->com, subdomain_is_not_empty ? 1 : 0, 0, &comm);
326
327 // the following block is executed by ranks that have at least one patch
328 if (subdomain_is_not_empty) {
329 auto graph = details::assemble_graph(comm, edges);
330
331 labels = details::resolve_labels(graph, mark_isolated_patches);
332 }
333
334 MPI_Comm_free(&comm);
335 }
336 return labels;
337}
338
339//! Compute the first label a particular rank can use. This is supposed to ensure that all
340//! labels are unique (i.e. different ranks do not use the same label).
341int first_label(const Grid &grid) {
342
343 // Here we set `first_label` to ensure that all ranks use *different* labels.
344 // find the smallest N such that grid.xm()*grid.ym() < 10^N
345 int exponent = static_cast<int>(std::ceil(std::log10(grid.max_patch_size())));
346 int labels_per_rank = static_cast<int>(std::pow(10, exponent));
347
348 // FIXME: this is very unlikely, but I should still check for integer overflow.
349
350 return grid.rank() * labels_per_rank + 1;
351}
352
353} // end of namespace details
354
355namespace connected_components {
356
359
361
362 Array input(mask);
363
364 bool mark_isolated_patches = false;
365 label_components_impl(Mask(input, -1), mark_isolated_patches, mask);
366}
367
368void label_isolated(array::Scalar1 &mask, int reachable) {
369
370 Array input(mask);
371
372 bool mark_isolated_patches = true;
373 label_components_impl(Mask(input, reachable), mark_isolated_patches, mask);
374}
375
376} // end of namespace connected_components
377
378} // end of namespace pism
Describes the PISM grid and the distribution of data across processors.
Definition Grid.hh:290
Makes sure that we call begin_access() and end_access() for all accessed array::Arrays.
Definition Array.hh:64
std::shared_ptr< const Grid > grid() const
Definition Array.cc:131
void update_ghosts()
Updates ghost points.
Definition Array.cc:615
int as_int(int i, int j) const
Definition Scalar.hh:45
Adds "foregrdound" and "attached" concepts to an "array".
#define n
Definition exactTestM.c:37
void label_isolated(array::Scalar1 &mask, int reachable)
connected_components::details::Mask< Array > Mask
static bool west_boundary(const Grid &grid, int i)
West boundary of a sub-domain.
int first_label(const Grid &grid)
static bool south_boundary(const Grid &grid, int j)
South boundary of a sub-domain.
void relabel(array::Scalar &mask, const std::map< int, int > &labels)
Assign new labels to elements of mask. Does not touch background grid cells.
static std::map< int, std::set< int > > unpack_data(const std::vector< int > &buffer)
static bool east_boundary(const Grid &grid, int i)
East boundary of a sub-domain.
std::map< int, int > final_labels(array::Scalar1 &input, bool subdomain_is_not_empty, bool mark_isolated_patches)
static std::map< int, int > resolve_labels(const std::map< int, std::set< int > > &graph, bool mark_isolated_patches)
static bool north_boundary(const Grid &grid, int j)
North boundary of a sub-domain.
static std::vector< int > pack_data(const std::map< int, std::set< int > > &graph)
static std::map< int, std::set< int > > assemble_graph(MPI_Comm comm, const std::map< int, std::set< int > > &subgraph)
static std::map< int, std::set< int > > detect_connections(array::Scalar1 &mask)
static const double k
Definition exactTestP.cc:42
void label_components_impl(const T &input, bool mark_isolated_patches, array::Scalar1 &output)