AlgoPlus  v0.1.0
dbscan.h
1 #ifndef DBSCAN_H
2 #define DBSCAN_H
3 
4 #ifdef __cplusplus
5 #include <iostream>
6 #include <vector>
7 #include <map>
8 #include <math.h>
9 #include <utility>
10 #include <cstdint>
11 #include <stdexcept>
12 #endif
13 
17 class DBSCAN{
18 private:
19  std::vector<std::pair<double, double>> setOfPoints;
20  double Eps;
21  int64_t MinPts;
22  int64_t cluster_id{0};
23  std::map<std::pair<double, double>, int64_t> points;
24 
25 public:
26 
33  explicit DBSCAN(std::vector<std::pair<double, double>> setOfPoints, double Eps, int64_t MinPts) noexcept : setOfPoints(setOfPoints), Eps(Eps), MinPts(MinPts) {
34  // cluster_id is by default noise
35 
36  for(size_t i = 0; i<setOfPoints.size(); ++i){
37  if(points.find(setOfPoints[i]) == points.end()){
38  if(ExpandCluster(setOfPoints, setOfPoints[i], cluster_id, Eps, MinPts)){
39  cluster_id = nextId(cluster_id);
40  }
41  }
42  }
43  }
44 
49  int64_t nextId(int64_t cluster_id);
50 
60  bool ExpandCluster(std::vector<std::pair<double, double> > setOfPoints, std::pair<double, double> point, int64_t cluster_id, double Eps, int64_t MinPts);
61 
69  std::vector<std::pair<double, double> > get_query(std::vector<std::pair<double, double> > setOfPoints, std::pair<double, double> point, double Eps);
70 
77  double dist(std::pair<double, double> a, std::pair<double, double> b);
78 
84  std::map<std::pair<double, double>, int64_t> get_clusters();
85 
90  std::vector<std::pair<double, double> > get_noise();
91 };
92 
93 int64_t DBSCAN::nextId(int64_t cluster_id){
94  try{
95  if(cluster_id < -1){
96  throw std::logic_error("cluster_id can't be less than -1");
97  }
98  }
99  catch(std::logic_error &e){
100  std::cerr << e.what() << '\n';
101  return -2;
102  }
103  cluster_id++;
104  return cluster_id;
105 }
106 
107 bool DBSCAN::ExpandCluster(std::vector<std::pair<double, double> > setOfPoints, std::pair<double, double> point, int64_t cluster_id, double Eps, int64_t MinPts){
108  std::vector<std::pair<double, double> > seeds = get_query(setOfPoints, point, Eps);
109  if(seeds.size() < MinPts){
110  // no core point
111  points[point] = -1;
112  return false;
113  }
114  else{
115  // we have a core point
116  // all the points in seeds are density-reachable from point
117  for(auto & x : seeds){
118  points[x] = cluster_id;
119  }
120  for(auto it = seeds.begin(); it != seeds.end(); it++){
121  if((*it).first == point.first && (*it).second == point.second){
122  seeds.erase(it);
123  break;
124  }
125  }
126 
127  while(!seeds.empty()){
128  auto current = seeds[0];
129  std::vector<std::pair<double, double> > result = get_query(setOfPoints, current, Eps);
130 
131  if(result.size() >= MinPts){
132  for(size_t i = 0; i<result.size(); i++){
133  std::pair<double, double> result_p = result[i];
134  if(points.find(result_p) == points.end() || points[result_p] == -1){
135  if(points.find(result_p) == points.end()){
136  seeds.push_back(result_p);
137  }
138  points[result_p] = cluster_id;
139  } // unclassified or noise
140  }
141  }
142  for(auto it = seeds.begin(); it != seeds.end(); it++){
143  if((*it).first == current.first && (*it).second == current.second){
144  seeds.erase(it);
145  break;
146  }
147  }
148  }
149  return true;
150  }
151  return false;
152 }
153 
154 std::vector<std::pair<double, double> > DBSCAN::get_query(std::vector<std::pair<double, double> > setOfPoints, std::pair<double, double> point, double Eps){
155  std::vector<std::pair<double, double> > ans;
156  for(size_t i = 0; i<setOfPoints.size(); i++){
157  std::pair<double, double> curr = setOfPoints[i];
158  if(dist(point, curr) <= Eps){
159  ans.push_back(curr);
160  }
161  }
162  return ans;
163 }
164 
165 double DBSCAN::dist(std::pair<double, double> a, std::pair<double, double> b){
166  return sqrt( pow(b.second - a.second, 2) + pow(b.first - a.first, 2) );
167 }
168 
169 std::map< std::pair<double, double>, int64_t> DBSCAN::get_clusters(){
170  std::map< std::pair<double, double>, int64_t> ans;
171  for(size_t i = 0; i<setOfPoints.size(); i++){
172  if(points[setOfPoints[i]] != -1){
173  ans[setOfPoints[i]] = points[setOfPoints[i]];
174  }
175  }
176  return ans;
177 }
178 
179 std::vector< std::pair<double, double>> DBSCAN::get_noise(){
180  std::vector< std::pair<double, double> > ans;
181  for(size_t i = 0; i<setOfPoints.size(); i++){
182  if(points[setOfPoints[i]] == -1){
183  ans.push_back(setOfPoints[i]);
184  }
185  }
186  return ans;
187 }
188 
189 
190 #endif
DBSCAN clustering algorithm class.
Definition: dbscan.h:17
bool ExpandCluster(std::vector< std::pair< double, double > > setOfPoints, std::pair< double, double > point, int64_t cluster_id, double Eps, int64_t MinPts)
ExpandCluster function.
Definition: dbscan.h:107
double dist(std::pair< double, double > a, std::pair< double, double > b)
dist function
Definition: dbscan.h:165
int64_t nextId(int64_t cluster_id)
Definition: dbscan.h:93
std::vector< std::pair< double, double > > get_query(std::vector< std::pair< double, double > > setOfPoints, std::pair< double, double > point, double Eps)
get_query function
Definition: dbscan.h:154
DBSCAN(std::vector< std::pair< double, double >> setOfPoints, double Eps, int64_t MinPts) noexcept
constructor for the DBSCAN class
Definition: dbscan.h:33
std::map< std::pair< double, double >, int64_t > get_clusters()
get_clusters function
Definition: dbscan.h:169
std::vector< std::pair< double, double > > get_noise()
get_noise function
Definition: dbscan.h:179