Skip to content

Commit de83f78

Browse files
prototypes_model now working
1 parent 04fb726 commit de83f78

File tree

1 file changed

+162
-55
lines changed

1 file changed

+162
-55
lines changed

src/simple-proto-seg.cxx

Lines changed: 162 additions & 55 deletions
Original file line numberDiff line numberDiff line change
@@ -1,11 +1,14 @@
11
#include <limits>
22
#include <vector>
33
#include <tuple>
4+
#include <memory>
45
#include <iterator>
56
#include <iostream>
67
#include <algorithm>
78

89
#include <boost/lexical_cast.hpp>
10+
// TODO find best abstractions
11+
// g++-snapshot -DPROTO -std=c++11 simple-proto-seg.cxx -o simple-proto-seg.S -g3 -O4 -Wall -S -march=native -fverbose-asm
912
/*
1013
We used an idiomatic basic C++ iterator based API.
1114
(could also use a C++11 container absed API with move)
@@ -18,112 +21,216 @@ segments are recursive tuples
1821
/*
1922
The segment data structure contains the cost (sum of square errors), the episod size and a pointer to the next segment.
2023
*/
24+
2125
template<typename CostType, typename SizeType=std::size_t>
2226
struct segment {
2327
typedef CostType cost_type;
2428
typedef SizeType size_type;
25-
segment():cost(std::numeric_limits<cost_type>::max()), size(0), next(0){}
26-
cost_type cost;
29+
explicit segment(cost_type c= std::numeric_limits<cost_type>::max(), size_type s=0, segment const* n=0)
30+
:cost_(c), size_(s), next_(n){}
31+
cost_type cost() const{return cost_;}
32+
size_type size() const{return size_;}
33+
segment const* next()const{return next_;}
34+
cost_type cost_;
35+
size_type size_;
36+
segment const* next_;
37+
};
38+
// fake segment iterator used by models for initial segmentation
39+
template<typename SegmentType> struct fake_segment_iterator {
40+
typedef SegmentType value_type;
41+
fake_segment_iterator& operator++() { return *this;}
42+
struct fake_segment {
43+
typename SegmentType::cost_type cost() const {return 0.;}
44+
typename SegmentType::size_type size() const {return 0;}
45+
SegmentType* operator&() const { return 0;}
46+
};
47+
fake_segment const& operator*()const{ return instance;}
48+
static fake_segment instance;
49+
};
50+
template<typename SegmentType>
51+
typename fake_segment_iterator<SegmentType>::fake_segment fake_segment_iterator<SegmentType>::instance=
52+
typename fake_segment_iterator<SegmentType>::fake_segment();
53+
54+
namespace std
55+
{ template<typename S> struct iterator_traits<fake_segment_iterator<S> > : iterator_traits<S*> {}; }
56+
57+
// needed because std::back_insert_iterator<> do not define ::value_type
58+
template <typename It> struct generic_traits : std::iterator_traits<It> {};
59+
template <typename C> struct generic_traits<std::back_insert_iterator<C> >
60+
: std::iterator_traits<typename C::iterator> {};
61+
62+
template<typename DataIt, typename SegIt, typename ModelData=int> struct regular_model {
63+
typedef typename std::iterator_traits<SegIt>::value_type seg_type;
64+
typedef typename seg_type::cost_type cost_type;
65+
typedef typename seg_type::size_type size_type;
66+
// dummy arg is here for API compatibility with models that need them
67+
template<typename Dummy=int>
68+
regular_model(DataIt data_beg, SegIt seg_beg, Dummy const& unused = Dummy())
69+
: data_it(data_beg), seg_it(seg_beg), sum(0.), sum_sq(0.), cost(0.), size(0.) {}
70+
regular_model& operator++(){
71+
// std::cerr<<"adding "<<*data_it<<std::endl;
72+
sum+= *data_it;
73+
sum_sq+= (*data_it) * (*data_it);
74+
++size;
75+
++data_it;
76+
++seg_it;
77+
cost= sum_sq-(sum*sum) / size;
78+
return *this;
79+
}
80+
bool operator<(regular_model const& o) const
81+
{ return (cost + seg_it->cost()) < (o.cost + o.seg_it->cost());}
82+
explicit operator seg_type ()const {return seg_type(cost+ (*seg_it).cost(), size, &(*seg_it));}
83+
explicit operator typename std::iterator_traits<DataIt>::value_type () const {return sum/size;}// size==0 -> na ?
84+
SegIt next() const {return seg_it;}
85+
DataIt data_it;
86+
SegIt seg_it;
87+
cost_type sum, sum_sq, cost;
2788
size_type size;
28-
segment* next;
89+
};
90+
91+
template<typename DataIt, typename SegIt, typename Prototypes> struct prototypes_model {
92+
typedef typename std::iterator_traits<SegIt>::value_type seg_type;
93+
typedef typename seg_type::cost_type cost_type;
94+
typedef typename seg_type::size_type size_type;
95+
prototypes_model(DataIt data_beg, SegIt seg_beg, Prototypes const& p)
96+
: data_it(data_beg), seg_it(seg_beg), protos(p), size(0.), costs(p.size(), 0.)
97+
, best_cost_it(costs.begin()){}
98+
prototypes_model(prototypes_model const& o): data_it(o.data_it), seg_it(o.seg_it), protos(o.protos), size(o.size), costs(o.costs), best_cost_it(costs.begin()+ (o.best_cost_it-o.costs.begin() )){}
99+
100+
prototypes_model& operator=(prototypes_model const& o){
101+
// assert same prototypes
102+
data_it= o.data_it;
103+
seg_it= o.seg_it;
104+
size= o.size;
105+
costs= o.costs;
106+
best_cost_it= costs.begin()+(o.best_cost_it-o.costs.begin());
107+
return *this;
108+
}
109+
110+
prototypes_model& operator++(){
111+
// std::cerr<<"adding "<<*data_it<<std::endl;
112+
for(std::size_t i(0); i != protos.size(); ++i){
113+
cost_type const tmp(protos[i]- (*data_it));
114+
costs[i]+= tmp * tmp;
115+
}
116+
++size;
117+
++data_it;
118+
++seg_it;
119+
best_cost_it= std::min_element(costs.begin(), costs.end());
120+
return *this;
121+
}
122+
bool operator<(prototypes_model const& o) const
123+
{ //std::cerr<<"comparing "<< *best_cost_it <<" + "<<seg_it->cost() << " = "<<(*best_cost_it + seg_it->cost())<<" to "<<(*(o.best_cost_it))<<" + "<<o.seg_it->cost()<<" = "<<(*(o.best_cost_it) + o.seg_it->cost())<<std::endl;
124+
return (*best_cost_it + seg_it->cost()) < (*(o.best_cost_it) + o.seg_it->cost());}
125+
explicit operator seg_type () const { return seg_type(*best_cost_it + (*seg_it).cost(), size, &(*seg_it));}
126+
explicit operator typename std::iterator_traits<DataIt>::value_type () const
127+
{return protos[best_cost_it-costs.begin()];}// size==0 -> na ?
128+
SegIt next() const {return seg_it;}
129+
130+
DataIt data_it;
131+
SegIt seg_it;
132+
Prototypes const& protos;
133+
size_type size;
134+
std::vector<cost_type> costs;
135+
typename std::vector<cost_type>::iterator best_cost_it;
29136
};
30137

31138
/* The generic optimal segmentation algorithm for one more segment. Dynamic programing makes it O(n^2).
32139
*/
33-
template<typename ItData, typename ItSeg, typename Out>
140+
template<template<class DI, class SI, class M> class ModelType
141+
, typename ItData, typename ItSeg, typename ModelData, typename Out>
34142
Out compute_seg(ItData beg, ItData end, ItSeg computed_beg, ItSeg computed_end
35-
, Out result_out) {
143+
, ModelData const& model_data, Out result_out) {
144+
typedef ModelType<ItData, ItSeg, ModelData> model_type;
36145
typedef typename std::iterator_traits<ItSeg>::value_type seg_type;
37146
typedef typename seg_type::cost_type cost_type;
38147
typedef typename std::iterator_traits<ItData>::difference_type size_type;
39148

40149
ItData start_data(beg); // we won't compute N=(end-beg) segmentations because there is no seg in K segments for the last K-1 points, so we iterate in computed_ instead
41150
// we compute a seg starting from start_data
42151
for(ItSeg start_seg(computed_beg); start_seg != computed_end; ++start_seg, ++start_data, ++result_out){
43-
seg_type current_seg;
44-
cost_type sum(0.), sum_sq(0.);
45-
size_type nb_elts(1);
46-
ItData it_data= start_data;
47-
// for each seg, we test strating from start_data
48-
for(ItSeg it_seg(start_seg); it_seg != computed_end;++it_seg, ++it_data, ++nb_elts){
49-
sum+= *it_data; sum_sq+= (*it_data)* (*it_data);
50-
cost_type const current_cost((sum_sq-(sum*sum)/nb_elts) + it_seg->cost);
51-
// std::cerr<<" cost is "<<(sum_sq-(sum*sum)/nb_elts)<< " + "<<it_seg->cost <<" = "<<current_cost<<" ";
52-
if(current_cost < current_seg.cost) {
53-
//std::cerr<<"optim found for size "<<nb_elts<<" next size is "<<it_seg->size<<std::endl;
54-
current_seg.cost= current_cost;
55-
current_seg.next= &(*it_seg);
56-
current_seg.size= nb_elts;
152+
model_type best_model(start_data, start_seg, model_data);
153+
// for each seg, we test starting from start_data
154+
for(model_type current_model(best_model); (current_model).next() != computed_end; ++current_model){
155+
if( current_model < best_model){
156+
// std::cerr<<"optim found for size "<<current_model.size<<" and cost "<< (seg_type(current_model).cost())<<"next size is "<<(current_model.next()->size())<<std::endl;
157+
best_model= current_model;
57158
}
58159
}
59-
*result_out= current_seg;
160+
// std::cerr<<"seg done\n";
161+
*result_out= seg_type(best_model);
60162
}
61163
return result_out;
62164
}
63165

64-
// needed because std::back_insert_iterator<> do not define ::value_type
65-
template <typename It> struct generic_traits : std::iterator_traits<It> {};
66-
template <typename C> struct generic_traits<std::back_insert_iterator<C> >
67-
: std::iterator_traits<typename C::iterator> {};
68166

69167
/* the initial step computing "segmentations" in one segment.
70168
*/
71-
template<typename ItData, typename ItSeg>
72-
ItSeg compute_initial_seg(ItData beg, ItData end, ItSeg out) {
169+
template<template<class ID, class IS, class MD> class ModelType, typename ItData, typename ModelData, typename ItSeg>
170+
ItSeg compute_initial_seg(ItData beg, ItData end, ModelData const& model_data, ItSeg out) {
73171
typedef typename generic_traits<ItSeg>::value_type seg_type;
74172
typedef typename seg_type::cost_type cost_type;
75173
typedef typename std::iterator_traits<ItData>::difference_type size_type;
76174
typedef std::reverse_iterator<ItData> rev_it_type;
77-
seg_type res;
78-
cost_type sum(0.), sum_sq(0.);
175+
ModelType<rev_it_type, fake_segment_iterator<seg_type>, ModelData > model(rev_it_type(end), fake_segment_iterator<seg_type>(), model_data);
79176
// for efficiency reasons, we compute the segments in reverse order
80177
// so we store them in a temporary buffer and output the buffer in reverse order at the end.
81178
std::vector<seg_type> tmp;
82-
for(rev_it_type it(end), rend(beg); it != rend; ++it, tmp.push_back(res)){
83-
sum+= *it; sum_sq+= (*it)* (*it);
84-
++res.size;
85-
res.cost= sum_sq-(sum*sum)/ res.size;
86-
}
179+
for(rev_it_type it(end), rend(beg); it != rend; ++it, ++model, tmp.push_back(seg_type(model)))
180+
{}
87181
return std::copy(tmp.rbegin(), tmp.rend(), out);
88182
}
89183
/* Out put segmented series values "unpacking" the list of segments.
90184
As the mean is not stored into the segment data structure, we have to compute it.
91185
*/
92-
template< typename Seg, typename ItData, typename Out>
93-
Out print_segmentation(Seg seg, ItData it_data, Out out) {
94-
for(Seg* s(&seg); s != 0; s=s->next){
95-
typename std::iterator_traits<ItData>::value_type current_mean=0.;
96-
for(typename Seg::size_type i(0); i != s->size; ++i, ++it_data)
97-
{ current_mean+= *it_data;}// TODO Kahan sum
98-
current_mean/= s->size;
99-
for(typename Seg::size_type i(0); i != s->size; ++i, ++out){
100-
*out= current_mean;
186+
template<template<class ID, class IS, class MD> class ModelType
187+
, typename Seg, typename ItData, typename ModelData, typename Out>
188+
Out print_segmentation(Seg seg, ItData it_data, ModelData const& model_data, Out out) {
189+
for(Seg const* s(&seg); s != 0; s=s->next()){
190+
ModelType<ItData, fake_segment_iterator<Seg>, ModelData > model(it_data, fake_segment_iterator<Seg>(), model_data);
191+
for(typename Seg::size_type i(0); i != s->size(); ++i, ++model, ++it_data)
192+
{}
193+
std::cerr<<"output seg of size "<<s->size()<<std::endl;
194+
for(typename Seg::size_type i(0); i != s->size(); ++i, ++out){
195+
*out= typename std::iterator_traits<ItData>::value_type(model);
101196
}
102197
}
103198
return out;
104199
}
105200

201+
template<template<class ID, class IS, class MD> class ModelType
202+
, typename ItData, typename ModelData=int>
203+
void do_it(ItData beg, ItData end, std::size_t nb_segs, ModelData const& model_data= ModelData()){
204+
typedef typename std::iterator_traits<ItData>::value_type data_type;
205+
typedef segment<data_type> segment_type;
206+
typedef std::vector<segment_type> segmentation_type;
207+
typedef std::vector<segmentation_type> segs_type;
208+
segs_type all_segs(nb_segs);
209+
for(std::size_t i(0); i != nb_segs; ++i){
210+
if(i==0){
211+
compute_initial_seg<ModelType>(beg, end, model_data, std::back_inserter(all_segs.front()));
212+
}else {
213+
compute_seg<ModelType>(beg, end, all_segs[i-1].begin(), all_segs[i-1].end(), model_data
214+
, std::back_inserter(all_segs[i]));
215+
}
216+
}
217+
print_segmentation<ModelType>(all_segs.back().front(), beg, model_data, std::ostream_iterator<data_type>(std::cout, " "));
218+
}
219+
106220
// main program, reading times series from std::cin and outputing the optimal segmented time series to std::cout in the requested nb of segments (given as the arg, defaults to 2)
107221
int main(int argc, char* argv[]){
108222
typedef double data_type;
223+
typedef std::vector<data_type> container_t;
109224
typedef std::istream_iterator<data_type> input_t;
110-
std::vector<data_type> data(input_t(std::cin), (input_t()));
111-
typedef std::vector<segment<data_type> > seg_type;
225+
container_t data(input_t(std::cin), (input_t()));
112226

113227
// nb segs is given as argument (default to 2) but must not exceed the nb of points
114228
std::size_t const nb_segs(std::min(argc >1 ? boost::lexical_cast<std::size_t>(argv[1]):2
115229
, data.size()));
116230

117-
typedef std::vector<seg_type> segs_type;
118-
segs_type all_segs(nb_segs);
119-
for(std::size_t i(0); i != nb_segs; ++i){
120-
if(i==0){
121-
compute_initial_seg(data.begin(), data.end(), std::back_inserter(all_segs.front()));
122-
}else {
123-
compute_seg(data.begin(), data.end(), all_segs[i-1].begin()+1, all_segs[i-1].end()
124-
, std::back_inserter(all_segs[i]));
125-
}
126-
}
127-
print_segmentation(all_segs.back().front(), data.begin(), std::ostream_iterator<data_type>(std::cout, " "));
231+
container_t protos(2);
232+
protos[0]= 0; protos[1]=10;
233+
if(true){ do_it<regular_model>(data.begin(), data.end(), nb_segs); }
234+
else { do_it<prototypes_model>(data.begin(), data.end(), nb_segs, protos); }
128235
return 0;
129236
}

0 commit comments

Comments
 (0)