Skip to content

Commit d2325e5

Browse files
adding a simple implementation of optimal segmentation with a piecewise
constant model.
1 parent 3526921 commit d2325e5

File tree

1 file changed

+128
-0
lines changed

1 file changed

+128
-0
lines changed

src/simple-proto-seg.cxx

Lines changed: 128 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,128 @@
1+
#include <limits>
2+
#include <vector>
3+
#include <tuple>
4+
#include <iterator>
5+
#include <iostream>
6+
#include <algorithm>
7+
8+
#include <boost/lexical_cast.hpp>
9+
/*
10+
We used an idiomatic basic C++ iterator based API.
11+
(could also use a C++11 container absed API with move)
12+
The data to segment in K segments is in [beg, end).
13+
The segments for segmentations in K-1 segments of [i,end) with i \in [beg,end)
14+
are in [computed_beg, computed_end)
15+
segments are recursive tuples
16+
*/
17+
18+
/*
19+
The segment data structure contains the cost (sum of square errors), the episod size and a pointer to the next segment.
20+
*/
21+
template<typename CostType, typename SizeType=std::size_t>
22+
struct segment {
23+
typedef CostType cost_type;
24+
typedef SizeType size_type;
25+
segment():cost(std::numeric_limits<cost_type>::max()), size(0), next(0){}
26+
cost_type cost;
27+
size_type size;
28+
segment* next;
29+
};
30+
31+
/* The generic optimal segmentation algorithm for one more segment. Dynamic programing makes it O(n^2).
32+
*/
33+
template<typename ItData, typename ItSeg, typename Out>
34+
Out compute_seg(ItData beg, ItData end, ItSeg computed_beg, ItSeg computed_end
35+
, Out result_out) {
36+
typedef typename std::iterator_traits<ItSeg>::value_type seg_type;
37+
typedef typename seg_type::cost_type cost_type;
38+
typedef typename std::iterator_traits<ItData>::difference_type size_type;
39+
40+
ItData start_data(beg); // we won't compute N=(end-beg) segmentations because there is no seg in K segments for the last K-1 points, so we iterate in computed_ instead
41+
// we compute a seg starting from start_data
42+
for(ItSeg start_seg(computed_beg); start_seg != computed_end; ++start_seg, ++start_data, ++result_out){
43+
seg_type current_seg;
44+
cost_type sum(0.), sum_sq(0.);
45+
size_type nb_elts(1);
46+
ItData it_data= start_data;
47+
// for each seg, we test strating from start_data
48+
for(ItSeg it_seg(start_seg); it_seg != computed_end;++it_seg, ++it_data, ++nb_elts){
49+
sum+= *it_data; sum_sq+= (*it_data)* (*it_data);
50+
cost_type const current_cost((sum_sq-(sum*sum)/nb_elts) + it_seg->cost);
51+
// std::cerr<<" cost is "<<(sum_sq-(sum*sum)/nb_elts)<< " + "<<it_seg->cost <<" = "<<current_cost<<" ";
52+
if(current_cost < current_seg.cost) {
53+
//std::cerr<<"optim found for size "<<nb_elts<<" next size is "<<it_seg->size<<std::endl;
54+
current_seg.cost= current_cost;
55+
current_seg.next= &(*it_seg);
56+
current_seg.size= nb_elts;
57+
}
58+
}
59+
*result_out= current_seg;
60+
}
61+
return result_out;
62+
}
63+
template <typename It> struct generic_traits : std::iterator_traits<It> {};
64+
template <typename C> struct generic_traits<std::back_insert_iterator<C> >
65+
: std::iterator_traits<typename C::iterator> {};
66+
67+
/* the initial step computing "segmentations" in one segment.
68+
*/
69+
template<typename ItData, typename ItSeg>
70+
ItSeg compute_initial_seg(ItData beg, ItData end, ItSeg out) {
71+
typedef typename generic_traits<ItSeg>::value_type seg_type;
72+
typedef typename seg_type::cost_type cost_type;
73+
typedef typename std::iterator_traits<ItData>::difference_type size_type;
74+
typedef std::reverse_iterator<ItData> rev_it_type;
75+
seg_type res;
76+
cost_type sum(0.), sum_sq(0.);
77+
// for efficiency reasons, we compute the segments in reverse order
78+
// so we store them in a temporary buffer and output the buffer in reverse order at the end.
79+
std::vector<seg_type> tmp;
80+
for(rev_it_type it(end), rend(beg); it != rend; ++it, tmp.push_back(res)){
81+
sum+= *it; sum_sq+= (*it)* (*it);
82+
++res.size;
83+
res.cost= sum_sq-(sum*sum)/ res.size;
84+
}
85+
return std::copy(tmp.rbegin(), tmp.rend(), out);
86+
}
87+
/* Out put segmented series values "unpacking" the list of segments.
88+
As the mean is not stored into the segment data structure, we have to compute it.
89+
*/
90+
template< typename Seg, typename ItData, typename Out>
91+
Out print_segmentation(Seg seg, ItData it_data, Out out) {
92+
for(Seg* s(&seg); s != 0; s=s->next){
93+
typename std::iterator_traits<ItData>::value_type current_mean=0.;
94+
for(typename Seg::size_type i(0); i != s->size; ++i, ++it_data)
95+
{ current_mean+= *it_data;}// TODO Kahan sum
96+
current_mean/= s->size;
97+
for(typename Seg::size_type i(0); i != s->size; ++i, ++out){
98+
*out= current_mean;
99+
}
100+
}
101+
return out;
102+
}
103+
104+
// main program, reading times series from std::cin and outputing the optimal segmented time series to std::cout in the requested nb of segments (given as the arg, defaults to 2)
105+
int main(int argc, char* argv[]){
106+
typedef double data_type;
107+
typedef std::istream_iterator<data_type> input_t;
108+
std::vector<data_type> data(input_t(std::cin), (input_t()));
109+
typedef std::vector<segment<data_type> > seg_type;
110+
111+
// nb segs is given as argument (default to 2) but must not exceed the nb of points
112+
std::size_t const nb_segs(std::min(argc >1 ? boost::lexical_cast<std::size_t>(argv[1]):2
113+
, data.size()));
114+
115+
typedef std::vector<seg_type> segs_type;
116+
segs_type all_segs(nb_segs);
117+
for(std::size_t i(0); i != nb_segs; ++i){
118+
if(i==0){
119+
compute_initial_seg(data.begin(), data.end(), std::back_inserter(all_segs.front()));
120+
// std::reverse(all_segs.front().begin(), all_segs.front().end());
121+
}else {
122+
compute_seg(data.begin(), data.end(), all_segs[i-1].begin()+1, all_segs[i-1].end()
123+
, std::back_inserter(all_segs[i]));
124+
}
125+
}
126+
print_segmentation(all_segs.back().front(), data.begin(), std::ostream_iterator<data_type>(std::cout, " "));
127+
return 0;
128+
}

0 commit comments

Comments
 (0)