1
1
#include < limits>
2
2
#include < vector>
3
3
#include < tuple>
4
+ #include < memory>
4
5
#include < iterator>
5
6
#include < iostream>
6
7
#include < algorithm>
7
8
8
9
#include < boost/lexical_cast.hpp>
10
+ // TODO find best abstractions
11
+ // g++-snapshot -DPROTO -std=c++11 simple-proto-seg.cxx -o simple-proto-seg.S -g3 -O4 -Wall -S -march=native -fverbose-asm
9
12
/*
10
13
We used an idiomatic basic C++ iterator based API.
11
14
(could also use a C++11 container absed API with move)
@@ -18,112 +21,216 @@ segments are recursive tuples
18
21
/*
19
22
The segment data structure contains the cost (sum of square errors), the episod size and a pointer to the next segment.
20
23
*/
24
+
21
25
template <typename CostType, typename SizeType=std::size_t >
22
26
struct segment {
23
27
typedef CostType cost_type;
24
28
typedef SizeType size_type;
25
- segment ():cost(std::numeric_limits<cost_type>::max()), size(0 ), next(0 ){}
26
- cost_type cost;
29
+ explicit segment (cost_type c= std::numeric_limits<cost_type>::max(), size_type s=0, segment const * n=0)
30
+ :cost_(c), size_(s), next_(n){}
31
+ cost_type cost () const {return cost_;}
32
+ size_type size () const {return size_;}
33
+ segment const * next ()const {return next_;}
34
+ cost_type cost_;
35
+ size_type size_;
36
+ segment const * next_;
37
+ };
38
+ // fake segment iterator used by models for initial segmentation
39
+ template <typename SegmentType> struct fake_segment_iterator {
40
+ typedef SegmentType value_type;
41
+ fake_segment_iterator& operator ++() { return *this ;}
42
+ struct fake_segment {
43
+ typename SegmentType::cost_type cost () const {return 0 .;}
44
+ typename SegmentType::size_type size () const {return 0 ;}
45
+ SegmentType* operator &() const { return 0 ;}
46
+ };
47
+ fake_segment const & operator *()const { return instance;}
48
+ static fake_segment instance;
49
+ };
50
+ template <typename SegmentType>
51
+ typename fake_segment_iterator<SegmentType>::fake_segment fake_segment_iterator<SegmentType>::instance=
52
+ typename fake_segment_iterator<SegmentType>::fake_segment();
53
+
54
+ namespace std
55
+ { template <typename S> struct iterator_traits <fake_segment_iterator<S> > : iterator_traits<S*> {}; }
56
+
57
+ // needed because std::back_insert_iterator<> do not define ::value_type
58
+ template <typename It> struct generic_traits : std::iterator_traits<It> {};
59
+ template <typename C> struct generic_traits <std::back_insert_iterator<C> >
60
+ : std::iterator_traits<typename C::iterator> {};
61
+
62
+ template <typename DataIt, typename SegIt, typename ModelData=int > struct regular_model {
63
+ typedef typename std::iterator_traits<SegIt>::value_type seg_type;
64
+ typedef typename seg_type::cost_type cost_type;
65
+ typedef typename seg_type::size_type size_type;
66
+ // dummy arg is here for API compatibility with models that need them
67
+ template <typename Dummy=int >
68
+ regular_model (DataIt data_beg, SegIt seg_beg, Dummy const & unused = Dummy())
69
+ : data_it(data_beg), seg_it(seg_beg), sum(0 .), sum_sq(0 .), cost(0 .), size(0 .) {}
70
+ regular_model& operator ++(){
71
+ // std::cerr<<"adding "<<*data_it<<std::endl;
72
+ sum+= *data_it;
73
+ sum_sq+= (*data_it) * (*data_it);
74
+ ++size;
75
+ ++data_it;
76
+ ++seg_it;
77
+ cost= sum_sq-(sum*sum) / size;
78
+ return *this ;
79
+ }
80
+ bool operator <(regular_model const & o) const
81
+ { return (cost + seg_it->cost ()) < (o.cost + o.seg_it ->cost ());}
82
+ explicit operator seg_type ()const {return seg_type (cost+ (*seg_it).cost (), size, &(*seg_it));}
83
+ explicit operator typename std::iterator_traits<DataIt>::value_type () const {return sum/size;}// size==0 -> na ?
84
+ SegIt next () const {return seg_it;}
85
+ DataIt data_it;
86
+ SegIt seg_it;
87
+ cost_type sum, sum_sq, cost;
27
88
size_type size;
28
- segment* next;
89
+ };
90
+
91
+ template <typename DataIt, typename SegIt, typename Prototypes> struct prototypes_model {
92
+ typedef typename std::iterator_traits<SegIt>::value_type seg_type;
93
+ typedef typename seg_type::cost_type cost_type;
94
+ typedef typename seg_type::size_type size_type;
95
+ prototypes_model (DataIt data_beg, SegIt seg_beg, Prototypes const & p)
96
+ : data_it(data_beg), seg_it(seg_beg), protos(p), size(0 .), costs(p.size(), 0 .)
97
+ , best_cost_it(costs.begin()){}
98
+ prototypes_model (prototypes_model const & o): data_it(o.data_it), seg_it(o.seg_it), protos(o.protos), size(o.size), costs(o.costs), best_cost_it(costs.begin()+ (o.best_cost_it-o.costs.begin() )){}
99
+
100
+ prototypes_model& operator =(prototypes_model const & o){
101
+ // assert same prototypes
102
+ data_it= o.data_it ;
103
+ seg_it= o.seg_it ;
104
+ size= o.size ;
105
+ costs= o.costs ;
106
+ best_cost_it= costs.begin ()+(o.best_cost_it -o.costs .begin ());
107
+ return *this ;
108
+ }
109
+
110
+ prototypes_model& operator ++(){
111
+ // std::cerr<<"adding "<<*data_it<<std::endl;
112
+ for (std::size_t i (0 ); i != protos.size (); ++i){
113
+ cost_type const tmp (protos[i]- (*data_it));
114
+ costs[i]+= tmp * tmp;
115
+ }
116
+ ++size;
117
+ ++data_it;
118
+ ++seg_it;
119
+ best_cost_it= std::min_element (costs.begin (), costs.end ());
120
+ return *this ;
121
+ }
122
+ bool operator <(prototypes_model const & o) const
123
+ { // std::cerr<<"comparing "<< *best_cost_it <<" + "<<seg_it->cost() << " = "<<(*best_cost_it + seg_it->cost())<<" to "<<(*(o.best_cost_it))<<" + "<<o.seg_it->cost()<<" = "<<(*(o.best_cost_it) + o.seg_it->cost())<<std::endl;
124
+ return (*best_cost_it + seg_it->cost ()) < (*(o.best_cost_it ) + o.seg_it ->cost ());}
125
+ explicit operator seg_type () const { return seg_type (*best_cost_it + (*seg_it).cost (), size, &(*seg_it));}
126
+ explicit operator typename std::iterator_traits<DataIt>::value_type () const
127
+ {return protos[best_cost_it-costs.begin ()];}// size==0 -> na ?
128
+ SegIt next () const {return seg_it;}
129
+
130
+ DataIt data_it;
131
+ SegIt seg_it;
132
+ Prototypes const & protos;
133
+ size_type size;
134
+ std::vector<cost_type> costs;
135
+ typename std::vector<cost_type>::iterator best_cost_it;
29
136
};
30
137
31
138
/* The generic optimal segmentation algorithm for one more segment. Dynamic programing makes it O(n^2).
32
139
*/
33
- template <typename ItData, typename ItSeg, typename Out>
140
+ template <template <class DI , class SI , class M > class ModelType
141
+ , typename ItData, typename ItSeg, typename ModelData, typename Out>
34
142
Out compute_seg (ItData beg, ItData end, ItSeg computed_beg, ItSeg computed_end
35
- , Out result_out) {
143
+ , ModelData const & model_data, Out result_out) {
144
+ typedef ModelType<ItData, ItSeg, ModelData> model_type;
36
145
typedef typename std::iterator_traits<ItSeg>::value_type seg_type;
37
146
typedef typename seg_type::cost_type cost_type;
38
147
typedef typename std::iterator_traits<ItData>::difference_type size_type;
39
148
40
149
ItData start_data (beg); // we won't compute N=(end-beg) segmentations because there is no seg in K segments for the last K-1 points, so we iterate in computed_ instead
41
150
// we compute a seg starting from start_data
42
151
for (ItSeg start_seg (computed_beg); start_seg != computed_end; ++start_seg, ++start_data, ++result_out){
43
- seg_type current_seg;
44
- cost_type sum (0 .), sum_sq (0 .);
45
- size_type nb_elts (1 );
46
- ItData it_data= start_data;
47
- // for each seg, we test strating from start_data
48
- for (ItSeg it_seg (start_seg); it_seg != computed_end;++it_seg, ++it_data, ++nb_elts){
49
- sum+= *it_data; sum_sq+= (*it_data)* (*it_data);
50
- cost_type const current_cost ((sum_sq-(sum*sum)/nb_elts) + it_seg->cost );
51
- // std::cerr<<" cost is "<<(sum_sq-(sum*sum)/nb_elts)<< " + "<<it_seg->cost <<" = "<<current_cost<<" ";
52
- if (current_cost < current_seg.cost ) {
53
- // std::cerr<<"optim found for size "<<nb_elts<<" next size is "<<it_seg->size<<std::endl;
54
- current_seg.cost = current_cost;
55
- current_seg.next = &(*it_seg);
56
- current_seg.size = nb_elts;
152
+ model_type best_model (start_data, start_seg, model_data);
153
+ // for each seg, we test starting from start_data
154
+ for (model_type current_model (best_model); (current_model).next () != computed_end; ++current_model){
155
+ if ( current_model < best_model){
156
+ // std::cerr<<"optim found for size "<<current_model.size<<" and cost "<< (seg_type(current_model).cost())<<"next size is "<<(current_model.next()->size())<<std::endl;
157
+ best_model= current_model;
57
158
}
58
159
}
59
- *result_out= current_seg;
160
+ // std::cerr<<"seg done\n";
161
+ *result_out= seg_type (best_model);
60
162
}
61
163
return result_out;
62
164
}
63
165
64
- // needed because std::back_insert_iterator<> do not define ::value_type
65
- template <typename It> struct generic_traits : std::iterator_traits<It> {};
66
- template <typename C> struct generic_traits <std::back_insert_iterator<C> >
67
- : std::iterator_traits<typename C::iterator> {};
68
166
69
167
/* the initial step computing "segmentations" in one segment.
70
168
*/
71
- template <typename ItData, typename ItSeg>
72
- ItSeg compute_initial_seg (ItData beg, ItData end, ItSeg out) {
169
+ template <template < class ID , class IS , class MD > class ModelType , typename ItData, typename ModelData , typename ItSeg>
170
+ ItSeg compute_initial_seg (ItData beg, ItData end, ModelData const & model_data, ItSeg out) {
73
171
typedef typename generic_traits<ItSeg>::value_type seg_type;
74
172
typedef typename seg_type::cost_type cost_type;
75
173
typedef typename std::iterator_traits<ItData>::difference_type size_type;
76
174
typedef std::reverse_iterator<ItData> rev_it_type;
77
- seg_type res;
78
- cost_type sum (0 .), sum_sq (0 .);
175
+ ModelType<rev_it_type, fake_segment_iterator<seg_type>, ModelData > model (rev_it_type (end), fake_segment_iterator<seg_type>(), model_data);
79
176
// for efficiency reasons, we compute the segments in reverse order
80
177
// so we store them in a temporary buffer and output the buffer in reverse order at the end.
81
178
std::vector<seg_type> tmp;
82
- for (rev_it_type it (end), rend (beg); it != rend; ++it, tmp.push_back (res)){
83
- sum+= *it; sum_sq+= (*it)* (*it);
84
- ++res.size ;
85
- res.cost = sum_sq-(sum*sum)/ res.size ;
86
- }
179
+ for (rev_it_type it (end), rend (beg); it != rend; ++it, ++model, tmp.push_back (seg_type (model)))
180
+ {}
87
181
return std::copy (tmp.rbegin (), tmp.rend (), out);
88
182
}
89
183
/* Out put segmented series values "unpacking" the list of segments.
90
184
As the mean is not stored into the segment data structure, we have to compute it.
91
185
*/
92
- template < typename Seg, typename ItData, typename Out>
93
- Out print_segmentation (Seg seg, ItData it_data, Out out) {
94
- for (Seg* s (&seg); s != 0 ; s=s->next ){
95
- typename std::iterator_traits<ItData>::value_type current_mean=0 .;
96
- for (typename Seg::size_type i (0 ); i != s->size ; ++i, ++it_data)
97
- { current_mean+= *it_data;}// TODO Kahan sum
98
- current_mean/= s->size ;
99
- for (typename Seg::size_type i (0 ); i != s->size ; ++i, ++out){
100
- *out= current_mean;
186
+ template <template <class ID , class IS , class MD > class ModelType
187
+ , typename Seg, typename ItData, typename ModelData, typename Out>
188
+ Out print_segmentation (Seg seg, ItData it_data, ModelData const & model_data, Out out) {
189
+ for (Seg const * s (&seg); s != 0 ; s=s->next ()){
190
+ ModelType<ItData, fake_segment_iterator<Seg>, ModelData > model (it_data, fake_segment_iterator<Seg>(), model_data);
191
+ for (typename Seg::size_type i (0 ); i != s->size (); ++i, ++model, ++it_data)
192
+ {}
193
+ std::cerr<<" output seg of size " <<s->size ()<<std::endl;
194
+ for (typename Seg::size_type i (0 ); i != s->size (); ++i, ++out){
195
+ *out= typename std::iterator_traits<ItData>::value_type (model);
101
196
}
102
197
}
103
198
return out;
104
199
}
105
200
201
+ template <template <class ID , class IS , class MD > class ModelType
202
+ , typename ItData, typename ModelData=int >
203
+ void do_it (ItData beg, ItData end, std::size_t nb_segs, ModelData const & model_data= ModelData()){
204
+ typedef typename std::iterator_traits<ItData>::value_type data_type;
205
+ typedef segment<data_type> segment_type;
206
+ typedef std::vector<segment_type> segmentation_type;
207
+ typedef std::vector<segmentation_type> segs_type;
208
+ segs_type all_segs (nb_segs);
209
+ for (std::size_t i (0 ); i != nb_segs; ++i){
210
+ if (i==0 ){
211
+ compute_initial_seg<ModelType>(beg, end, model_data, std::back_inserter (all_segs.front ()));
212
+ }else {
213
+ compute_seg<ModelType>(beg, end, all_segs[i-1 ].begin (), all_segs[i-1 ].end (), model_data
214
+ , std::back_inserter (all_segs[i]));
215
+ }
216
+ }
217
+ print_segmentation<ModelType>(all_segs.back ().front (), beg, model_data, std::ostream_iterator<data_type>(std::cout, " " ));
218
+ }
219
+
106
220
// main program, reading times series from std::cin and outputing the optimal segmented time series to std::cout in the requested nb of segments (given as the arg, defaults to 2)
107
221
int main (int argc, char * argv[]){
108
222
typedef double data_type;
223
+ typedef std::vector<data_type> container_t ;
109
224
typedef std::istream_iterator<data_type> input_t ;
110
- std::vector<data_type> data (input_t (std::cin), (input_t ()));
111
- typedef std::vector<segment<data_type> > seg_type;
225
+ container_t data (input_t (std::cin), (input_t ()));
112
226
113
227
// nb segs is given as argument (default to 2) but must not exceed the nb of points
114
228
std::size_t const nb_segs (std::min (argc >1 ? boost::lexical_cast<std::size_t >(argv[1 ]):2
115
229
, data.size ()));
116
230
117
- typedef std::vector<seg_type> segs_type;
118
- segs_type all_segs (nb_segs);
119
- for (std::size_t i (0 ); i != nb_segs; ++i){
120
- if (i==0 ){
121
- compute_initial_seg (data.begin (), data.end (), std::back_inserter (all_segs.front ()));
122
- }else {
123
- compute_seg (data.begin (), data.end (), all_segs[i-1 ].begin ()+1 , all_segs[i-1 ].end ()
124
- , std::back_inserter (all_segs[i]));
125
- }
126
- }
127
- print_segmentation (all_segs.back ().front (), data.begin (), std::ostream_iterator<data_type>(std::cout, " " ));
231
+ container_t protos (2 );
232
+ protos[0 ]= 0 ; protos[1 ]=10 ;
233
+ if (true ){ do_it<regular_model>(data.begin (), data.end (), nb_segs); }
234
+ else { do_it<prototypes_model>(data.begin (), data.end (), nb_segs, protos); }
128
235
return 0 ;
129
236
}
0 commit comments