|
| 1 | +#include <limits> |
| 2 | +#include <vector> |
| 3 | +#include <tuple> |
| 4 | +#include <iterator> |
| 5 | +#include <iostream> |
| 6 | +#include <algorithm> |
| 7 | + |
| 8 | +#include <boost/lexical_cast.hpp> |
| 9 | +/* |
| 10 | +We used an idiomatic basic C++ iterator based API. |
| 11 | +(could also use a C++11 container absed API with move) |
| 12 | +The data to segment in K segments is in [beg, end). |
| 13 | +The segments for segmentations in K-1 segments of [i,end) with i \in [beg,end) |
| 14 | +are in [computed_beg, computed_end) |
| 15 | +segments are recursive tuples |
| 16 | + */ |
| 17 | + |
| 18 | +/* |
| 19 | + The segment data structure contains the cost (sum of square errors), the episod size and a pointer to the next segment. |
| 20 | + */ |
| 21 | +template<typename CostType, typename SizeType=std::size_t> |
| 22 | +struct segment { |
| 23 | + typedef CostType cost_type; |
| 24 | + typedef SizeType size_type; |
| 25 | + segment():cost(std::numeric_limits<cost_type>::max()), size(0), next(0){} |
| 26 | + cost_type cost; |
| 27 | + size_type size; |
| 28 | + segment* next; |
| 29 | +}; |
| 30 | + |
| 31 | +/* The generic optimal segmentation algorithm for one more segment. Dynamic programing makes it O(n^2). |
| 32 | + */ |
| 33 | +template<typename ItData, typename ItSeg, typename Out> |
| 34 | +Out compute_seg(ItData beg, ItData end, ItSeg computed_beg, ItSeg computed_end |
| 35 | + , Out result_out) { |
| 36 | + typedef typename std::iterator_traits<ItSeg>::value_type seg_type; |
| 37 | + typedef typename seg_type::cost_type cost_type; |
| 38 | + typedef typename std::iterator_traits<ItData>::difference_type size_type; |
| 39 | + |
| 40 | + ItData start_data(beg); // we won't compute N=(end-beg) segmentations because there is no seg in K segments for the last K-1 points, so we iterate in computed_ instead |
| 41 | + // we compute a seg starting from start_data |
| 42 | + for(ItSeg start_seg(computed_beg); start_seg != computed_end; ++start_seg, ++start_data, ++result_out){ |
| 43 | + seg_type current_seg; |
| 44 | + cost_type sum(0.), sum_sq(0.); |
| 45 | + size_type nb_elts(1); |
| 46 | + ItData it_data= start_data; |
| 47 | + // for each seg, we test strating from start_data |
| 48 | + for(ItSeg it_seg(start_seg); it_seg != computed_end;++it_seg, ++it_data, ++nb_elts){ |
| 49 | + sum+= *it_data; sum_sq+= (*it_data)* (*it_data); |
| 50 | + cost_type const current_cost((sum_sq-(sum*sum)/nb_elts) + it_seg->cost); |
| 51 | + // std::cerr<<" cost is "<<(sum_sq-(sum*sum)/nb_elts)<< " + "<<it_seg->cost <<" = "<<current_cost<<" "; |
| 52 | + if(current_cost < current_seg.cost) { |
| 53 | + //std::cerr<<"optim found for size "<<nb_elts<<" next size is "<<it_seg->size<<std::endl; |
| 54 | + current_seg.cost= current_cost; |
| 55 | + current_seg.next= &(*it_seg); |
| 56 | + current_seg.size= nb_elts; |
| 57 | + } |
| 58 | + } |
| 59 | + *result_out= current_seg; |
| 60 | + } |
| 61 | + return result_out; |
| 62 | +} |
| 63 | +template <typename It> struct generic_traits : std::iterator_traits<It> {}; |
| 64 | +template <typename C> struct generic_traits<std::back_insert_iterator<C> > |
| 65 | +: std::iterator_traits<typename C::iterator> {}; |
| 66 | + |
| 67 | +/* the initial step computing "segmentations" in one segment. |
| 68 | + */ |
| 69 | +template<typename ItData, typename ItSeg> |
| 70 | +ItSeg compute_initial_seg(ItData beg, ItData end, ItSeg out) { |
| 71 | + typedef typename generic_traits<ItSeg>::value_type seg_type; |
| 72 | + typedef typename seg_type::cost_type cost_type; |
| 73 | + typedef typename std::iterator_traits<ItData>::difference_type size_type; |
| 74 | + typedef std::reverse_iterator<ItData> rev_it_type; |
| 75 | + seg_type res; |
| 76 | + cost_type sum(0.), sum_sq(0.); |
| 77 | + // for efficiency reasons, we compute the segments in reverse order |
| 78 | + // so we store them in a temporary buffer and output the buffer in reverse order at the end. |
| 79 | + std::vector<seg_type> tmp; |
| 80 | + for(rev_it_type it(end), rend(beg); it != rend; ++it, tmp.push_back(res)){ |
| 81 | + sum+= *it; sum_sq+= (*it)* (*it); |
| 82 | + ++res.size; |
| 83 | + res.cost= sum_sq-(sum*sum)/ res.size; |
| 84 | + } |
| 85 | + return std::copy(tmp.rbegin(), tmp.rend(), out); |
| 86 | +} |
| 87 | +/* Out put segmented series values "unpacking" the list of segments. |
| 88 | +As the mean is not stored into the segment data structure, we have to compute it. |
| 89 | + */ |
| 90 | +template< typename Seg, typename ItData, typename Out> |
| 91 | +Out print_segmentation(Seg seg, ItData it_data, Out out) { |
| 92 | + for(Seg* s(&seg); s != 0; s=s->next){ |
| 93 | + typename std::iterator_traits<ItData>::value_type current_mean=0.; |
| 94 | + for(typename Seg::size_type i(0); i != s->size; ++i, ++it_data) |
| 95 | + { current_mean+= *it_data;}// TODO Kahan sum |
| 96 | + current_mean/= s->size; |
| 97 | + for(typename Seg::size_type i(0); i != s->size; ++i, ++out){ |
| 98 | + *out= current_mean; |
| 99 | + } |
| 100 | + } |
| 101 | + return out; |
| 102 | +} |
| 103 | + |
| 104 | +// main program, reading times series from std::cin and outputing the optimal segmented time series to std::cout in the requested nb of segments (given as the arg, defaults to 2) |
| 105 | +int main(int argc, char* argv[]){ |
| 106 | + typedef double data_type; |
| 107 | + typedef std::istream_iterator<data_type> input_t; |
| 108 | + std::vector<data_type> data(input_t(std::cin), (input_t())); |
| 109 | + typedef std::vector<segment<data_type> > seg_type; |
| 110 | + |
| 111 | + // nb segs is given as argument (default to 2) but must not exceed the nb of points |
| 112 | + std::size_t const nb_segs(std::min(argc >1 ? boost::lexical_cast<std::size_t>(argv[1]):2 |
| 113 | + , data.size())); |
| 114 | + |
| 115 | + typedef std::vector<seg_type> segs_type; |
| 116 | + segs_type all_segs(nb_segs); |
| 117 | + for(std::size_t i(0); i != nb_segs; ++i){ |
| 118 | + if(i==0){ |
| 119 | + compute_initial_seg(data.begin(), data.end(), std::back_inserter(all_segs.front())); |
| 120 | + // std::reverse(all_segs.front().begin(), all_segs.front().end()); |
| 121 | + }else { |
| 122 | + compute_seg(data.begin(), data.end(), all_segs[i-1].begin()+1, all_segs[i-1].end() |
| 123 | + , std::back_inserter(all_segs[i])); |
| 124 | + } |
| 125 | + } |
| 126 | + print_segmentation(all_segs.back().front(), data.begin(), std::ostream_iterator<data_type>(std::cout, " ")); |
| 127 | + return 0; |
| 128 | +} |
0 commit comments