Line data Source code
1 : #include "triggeralgs/dbscan/dbscan.hpp"
2 : #include "triggeralgs/dbscan/Hit.hpp"
3 :
4 : #include <cassert>
5 : #include <limits>
6 :
7 : namespace triggeralgs {
8 : namespace dbscan {
9 :
10 : //======================================================================
11 : int
12 0 : neighbours_sorted(const std::vector<Hit*>& hits, Hit& q, float eps, int minPts)
13 : {
14 0 : int n = 0;
15 : // Loop over the hits starting from the latest hit, since we will
16 : // ~always be adding a hit at recent times
17 0 : for (auto hit_it = hits.rbegin(); hit_it != hits.rend(); ++hit_it) {
18 0 : if ((*hit_it)->time > q.time + eps)
19 0 : continue;
20 0 : if ((*hit_it)->time < q.time - eps)
21 : break;
22 :
23 0 : if (q.add_potential_neighbour(*hit_it, eps, minPts))
24 0 : ++n;
25 : }
26 0 : return n;
27 : }
28 :
29 : //======================================================================
30 : bool
31 0 : Cluster::maybe_add_new_hit(Hit* new_hit, float eps, int minPts)
32 : {
33 : // Should we add this hit?
34 0 : bool do_add = false;
35 :
36 : // Hits earlier than new_hit time minus `eps` can't possibly be
37 : // neighbours, so start the search there in the sorted list of hits in this
38 : // cluster
39 0 : auto begin_it = std::lower_bound(
40 0 : hits.begin(), hits.end(), new_hit->time - eps, time_comp_lower);
41 :
42 0 : for (auto it = begin_it; it != hits.end(); ++it) {
43 0 : Hit* h = *it;
44 0 : if (h->add_potential_neighbour(new_hit, eps, minPts)) {
45 0 : do_add = true;
46 0 : if (h->neighbours.size() + 1 >= minPts) {
47 0 : h->connectedness = Connectedness::kCore;
48 : } else {
49 0 : h->connectedness = Connectedness::kEdge;
50 : }
51 : }
52 : } // end loop over hits in cluster
53 :
54 0 : if (do_add) {
55 0 : add_hit(new_hit);
56 : }
57 :
58 0 : return do_add;
59 : }
60 :
61 : //======================================================================
62 : void
63 0 : Cluster::add_hit(Hit* h)
64 : {
65 0 : hits.insert(h);
66 0 : h->cluster = index;
67 0 : latest_time = std::max(latest_time, h->time);
68 0 : if (h->connectedness == Connectedness::kCore &&
69 0 : (!latest_core_point || h->time > latest_core_point->time)) {
70 0 : latest_core_point = h;
71 : }
72 0 : }
73 :
74 : //======================================================================
75 : void
76 0 : Cluster::steal_hits(Cluster& other)
77 : {
78 : // TODO: it might be faster to do some sort of explicit "merge" of the hits,
79 : // eg:
80 : //
81 : // this->hits.insert(other hits); // Inserts at end
82 : // std::inplace_merge(...)
83 : //
84 : // This might save some reallocations of the vector
85 0 : for (auto h : other.hits) {
86 0 : assert(h);
87 0 : add_hit(h);
88 : }
89 0 : other.hits.clear();
90 0 : other.completeness = Completeness::kComplete;
91 0 : }
92 :
93 : //======================================================================
94 : void
95 0 : IncrementalDBSCAN::cluster_reachable(Hit* seed_hit, Cluster& cluster)
96 : {
97 : // Loop over all neighbours (and the neighbours of core points, and so on)
98 0 : std::vector<Hit*> seedSet(seed_hit->neighbours.begin(),
99 0 : seed_hit->neighbours.end());
100 :
101 0 : while (!seedSet.empty()) {
102 0 : Hit* q = seedSet.back();
103 0 : seedSet.pop_back();
104 : // Change noise to a border point
105 0 : if (q->connectedness == Connectedness::kNoise) {
106 0 : cluster.add_hit(q);
107 : }
108 :
109 0 : if (q->cluster != kUndefined) {
110 0 : continue;
111 : }
112 :
113 0 : cluster.add_hit(q);
114 :
115 : // If q is a core point, add its neighbours to the search list
116 0 : if (q->neighbours.size() + 1 >= m_minPts) {
117 0 : q->connectedness = Connectedness::kCore;
118 0 : seedSet.insert(
119 0 : seedSet.end(), q->neighbours.begin(), q->neighbours.end());
120 : }
121 : }
122 0 : }
123 :
124 : //======================================================================
125 : void
126 0 : IncrementalDBSCAN::add_point(float time, float channel, std::vector<Cluster>* completed_clusters)
127 : {
128 0 : Hit& new_hit=m_hit_pool[m_pool_end];
129 0 : new_hit.reset(time, channel);
130 0 : ++m_pool_end;
131 0 : if(m_pool_end==m_hit_pool.size()) m_pool_end=0;
132 0 : add_hit(&new_hit, completed_clusters);
133 0 : }
134 :
135 : //======================================================================
136 : void
137 0 : IncrementalDBSCAN::add_primitive(const triggeralgs::TriggerPrimitive& prim, std::vector<Cluster>* completed_clusters)
138 : {
139 0 : if(m_first_prim_time==0){
140 0 : m_first_prim_time=prim.time_start;
141 : }
142 :
143 0 : Hit& new_hit=m_hit_pool[m_pool_end];
144 0 : new_hit.reset(1e-2*(prim.time_start-m_first_prim_time), prim.channel, &prim);
145 0 : ++m_pool_end;
146 0 : if(m_pool_end==m_hit_pool.size()) m_pool_end=0;
147 :
148 0 : add_hit(&new_hit, completed_clusters);
149 0 : }
150 :
151 : //======================================================================
152 : void
153 0 : IncrementalDBSCAN::add_hit(Hit* new_hit, std::vector<Cluster>* completed_clusters)
154 : {
155 : // TODO: this should be a member variable, not a static, in case
156 : // there are multiple IncrementalDBSCAN instances
157 0 : static int next_cluster_index = 0;
158 :
159 0 : m_hits.push_back(new_hit);
160 0 : m_latest_time = new_hit->time;
161 :
162 : // All the clusters that this hit neighboured. If there are
163 : // multiple clusters neighbouring this hit, we'll merge them at
164 : // the end
165 0 : std::set<int> clusters_neighbouring_hit;
166 :
167 : // Find all the hit's neighbours
168 0 : neighbours_sorted(m_hits, *new_hit, m_eps, m_minPts);
169 :
170 0 : for (auto neighbour : new_hit->neighbours) {
171 0 : if (neighbour->cluster != kUndefined && neighbour->cluster != kNoise &&
172 0 : neighbour->neighbours.size() + 1 >= m_minPts) {
173 : // This neighbour is a core point in a cluster. Add the cluster to the list of
174 : // clusters that will contain this hit
175 0 : clusters_neighbouring_hit.insert(neighbour->cluster);
176 : }
177 : }
178 :
179 0 : if (clusters_neighbouring_hit.empty()) {
180 : // This hit didn't match any existing cluster. See if we can
181 : // make a new cluster out of it. Otherwise mark it as noise
182 :
183 0 : if (new_hit->neighbours.size() + 1 >= m_minPts) {
184 : // std::cout << "New cluster starting at hit time " << new_hit->time << " with " << new_hit->neighbours.size() << " neighbours" << std::endl;
185 0 : new_hit->connectedness = Connectedness::kCore;
186 0 : auto new_it = m_clusters.emplace_hint(
187 0 : m_clusters.end(), next_cluster_index, next_cluster_index);
188 0 : Cluster& new_cluster = new_it->second;
189 0 : new_cluster.completeness = Completeness::kIncomplete;
190 0 : new_cluster.add_hit(new_hit);
191 0 : next_cluster_index++;
192 0 : cluster_reachable(new_hit, new_cluster);
193 : }
194 : else{
195 : // std::cout << "New hit time " << new_hit->time << " with " << new_hit->neighbours.size() << " neighbours is noise" << std::endl;
196 : }
197 : } else {
198 : // This hit neighboured at least one cluster. Add the hit and
199 : // its noise neighbours to the first cluster in the list, then
200 : // merge the rest of the clusters into it
201 :
202 0 : auto index_it = clusters_neighbouring_hit.begin();
203 :
204 0 : auto it = m_clusters.find(*index_it);
205 0 : assert(it != m_clusters.end());
206 0 : Cluster& cluster = it->second;
207 : // std::cout << "Adding hit time " << new_hit->time << " with " << new_hit->neighbours.size() << " neighbours to existing cluster" << std::endl;
208 0 : cluster.add_hit(new_hit);
209 :
210 : // TODO: this seems wrong: we're adding this hit's neighbours
211 : // to the cluster even if this hit isn't a core point, but if
212 : // I wrap the whole thing in "if(new_hit is core)" then the
213 : // results differ from classic DBSCAN
214 0 : for (auto q : new_hit->neighbours) {
215 0 : if (q->cluster == kUndefined || q->cluster == kNoise) {
216 : // std::cout << " Adding hit time " << q->time << " to existing cluster" << std::endl;
217 0 : cluster.add_hit(q);
218 : }
219 : // If the neighbouring hit q has exactly m_minPts
220 : // neighbours, it must have become a core point by the
221 : // addition of new_hit. Add q's neighbours to the cluster
222 0 : if(q->neighbours.size() + 1 == m_minPts){
223 0 : for (auto r : q->neighbours) {
224 0 : cluster.add_hit(r);
225 : }
226 : }
227 : }
228 :
229 :
230 0 : ++index_it;
231 :
232 0 : for (; index_it != clusters_neighbouring_hit.end(); ++index_it) {
233 :
234 0 : auto other_it = m_clusters.find(*index_it);
235 0 : assert(other_it != m_clusters.end());
236 0 : Cluster& other_cluster = other_it->second;
237 0 : cluster.steal_hits(other_cluster);
238 : }
239 : }
240 :
241 : // Last case: new_hit and its neighbour are both noise, but the
242 : // addition of new_hit makes the neighbour a core point. So we
243 : // start a new cluster at the neighbour, and walk out from there
244 0 : for (auto& neighbour : new_hit->neighbours) {
245 0 : if(neighbour->neighbours.size() + 1 >= m_minPts){
246 : // std::cout << "new_hit's neighbour at " << neighbour->time << " has " << neighbour->neighbours.size() << " neighbours, so is core" << std::endl;
247 0 : if(neighbour->cluster==kNoise || neighbour->cluster==kUndefined){
248 0 : if(new_hit->cluster==kNoise || new_hit->cluster==kUndefined){
249 0 : auto new_it = m_clusters.emplace_hint(
250 0 : m_clusters.end(), next_cluster_index, next_cluster_index);
251 0 : Cluster& new_cluster = new_it->second;
252 0 : new_cluster.completeness = Completeness::kIncomplete;
253 0 : new_cluster.add_hit(neighbour);
254 0 : next_cluster_index++;
255 0 : cluster_reachable(neighbour, new_cluster);
256 : }
257 : }
258 : }
259 : else {
260 : // std::cout << "new_hit's neighbour at " << neighbour->time << " has " << neighbour->neighbours.size() << " neighbours, so is NOT core" << std::endl;
261 : }
262 : }
263 :
264 :
265 : // Delete any completed clusters from the list. Put them in the
266 : // `completed_clusters` vector, if that vector was passed
267 0 : auto clust_it = m_clusters.begin();
268 0 : while (clust_it != m_clusters.end()) {
269 0 : Cluster& cluster = clust_it->second;
270 :
271 0 : if (cluster.latest_time < m_latest_time - m_eps) {
272 0 : cluster.completeness = Completeness::kComplete;
273 : }
274 :
275 0 : if (cluster.completeness == Completeness::kComplete) {
276 0 : if(completed_clusters){
277 : // TODO: room for std::move here?
278 : //
279 : // Clusters that got merged into another cluster had
280 : // their hits cleared, and were set kComplete, by
281 : // steal_hits
282 0 : if(cluster.hits.size()!=0){
283 0 : completed_clusters->push_back(cluster);
284 : }
285 : }
286 0 : clust_it = m_clusters.erase(clust_it);
287 0 : continue;
288 : } else {
289 0 : ++clust_it;
290 : }
291 : }
292 0 : }
293 :
294 : void
295 0 : IncrementalDBSCAN::trim_hits()
296 : {
297 : // Find the earliest time of a hit in any cluster in the list (active or
298 : // not)
299 0 : float earliest_time = std::numeric_limits<float>::max();
300 :
301 0 : for (auto& cluster : m_clusters) {
302 0 : earliest_time =
303 0 : std::min(earliest_time, (*cluster.second.hits.begin())->time);
304 : }
305 :
306 : // If there were no clusters, set the earliest_time to the latest time
307 : // (otherwise it would still be FLOAT_MAX)
308 0 : if (m_clusters.empty()) {
309 0 : earliest_time = m_latest_time;
310 : }
311 0 : auto last_it = std::lower_bound(m_hits.begin(),
312 : m_hits.end(),
313 0 : earliest_time - 10 * m_eps,
314 : time_comp_lower);
315 :
316 0 : m_hits.erase(m_hits.begin(), last_it);
317 0 : }
318 :
319 : }
320 : }
321 : // Local Variables:
322 : // mode: c++
323 : // c-basic-offset: 4
324 : // c-file-style: "linux"
325 : // End:
|