11#include " learner.h"
2+ #include " Eigen/Dense"
3+ #include < omp.h>
24
35namespace adPredictAlgo {
46
@@ -12,15 +14,57 @@ class LBFGSSolver : public Learner {
1214 max_lbfgs_iter = 10 ;
1315 memory_size = 4 ;
1416 num_fea = 0 ;
17+ nthread = 0 ;
18+ debug = false ;
19+
20+ Eigen::VectorXf e (num_fea);
21+ y.resize (memory_size,e);
22+ s.resize (memory_size,e);
23+
24+ alpha.resize (memory_size,0.0 );
25+
26+ z = Eigen::VectorXf::Zero (num_fea);
27+ grad = Eigen::VectorXf::Zero (num_fea);
28+ new_w = Eigen::VectorXf::Zero (num_fea);
1529 }
1630
1731 ~LBFGSSolver () {
18-
32+ y.clear ();
33+ s.clear ();
34+ alpha.clear ();
1935 }
2036
2137 void Configure (const std::vector<std::pair<std::string,std::string> > &cfg)
2238 {
23- std::cout << " lbfgs configure" << std::endl;
39+ for (const auto kv : cfg)
40+ cfg_[kv.first ] = kv.second ;
41+
42+ if (cfg_.count (" linesearch_c1" ))
43+ linesearch_c1 = static_cast <float >(atof (cfg_[" linesearch_c1" ].c_str ()));
44+ if (cfg_.count (" linesearch_backoff" ))
45+ linesearch_backoff = static_cast <float >(atof (cfg_[" linesearch_backoff" ].c_str ()));
46+ if (cfg_.count (" max_linesearch_iter" ))
47+ max_linesearch_iter = static_cast <int >(atoi (cfg_[" max_linesearch_iter" ].c_str ()));
48+ if (cfg_.count (" lbfgs_stop_tol" ))
49+ lbfgs_stop_tol = static_cast <float >(atof (cfg_[" lbfgs_stop_tol" ].c_str ()));
50+ if (cfg_.count (" max_lbfgs_iter" ))
51+ max_lbfgs_iter = static_cast <float >(atof (cfg_[" max_lbfgs_iter" ].c_str ()));
52+ if (cfg_.count (" memory_size" ))
53+ memory_size = static_cast <int >(atoi (cfg_[" memory_size" ].c_str ()));
54+ if (cfg_.count (" num_fea" ))
55+ num_fea = static_cast <size_t >(atoi (cfg_[" num_fea" ].c_str ()));
56+ if (cfg_.count (" nthread" ))
57+ nthread = static_cast <int >(atoi (cfg_[" nthread" ].c_str ()));
58+ if (cfg_.count (" debug" ))
59+ debug = static_cast <bool >(atoi (cfg_[" debug" ].c_str ()));
60+ if (cfg_.count (" rank" ))
61+ rank = static_cast <int >(atoi (cfg_[" rank" ].c_str ()));
62+ this ->MemInit ();
63+
64+ if (rank == 0 )
65+ LOG (INFO) << " L-BFGS solver starts, num_fea=" << num_fea << " ,memory_size=" << memory_size
66+ << " ,lbfgs_stop_tol=" << lbfgs_stop_tol << " ,max_lbfgs_iter=" << max_lbfgs_iter
67+ << " ,max_linesearch_iter=" << max_linesearch_iter << " ,nthread=" << nthread << " ,debug=" << debug;
2468 }
2569
2670 float PredIns (const dmlc::Row<unsigned > &v,
@@ -34,12 +78,23 @@ class LBFGSSolver : public Learner {
3478 float rho,
3579 dmlc::RowBlockIter<unsigned > *dtrain)
3680 {
81+ // bind with the Eigen vector
82+ Eigen::VectorXf old_w = Eigen::VectorXf::Map (primal, num_fea);
83+ // Eigen::Map<Eigen::VectorXf> old_w(primal, num_fea);
84+ Eigen::Map<Eigen::VectorXf> d (dual, num_fea);
85+ Eigen::Map<Eigen::VectorXf> c (cons, num_fea);
86+
87+ this ->ParamInit (old_w, d, c, rho, dtrain);
3788 int iter = 0 ;
3889 while (iter < max_lbfgs_iter) {
39- if (this ->UpdateOneIter (primal,dual,cons, rho,iter,dtrain))
90+ if (this ->UpdateOneIter (old_w, d, c, rho, iter, dtrain))
4091 break ;
4192 iter++;
4293 }
94+ // primal = new_w.data();
95+ for (size_t i = 0 ;i < num_fea;i++){
96+ primal[i] = new_w[i];
97+ }
4398 }
4499
45100 private:
@@ -51,59 +106,246 @@ class LBFGSSolver : public Learner {
51106 int max_lbfgs_iter;
52107 size_t memory_size;
53108 size_t num_fea;
54-
109+ int nthread;
110+
111+ bool debug;
112+ int rank;
113+
55114 double old_objval;
56115 double new_objval;
57116 double init_objval;
58117
59118 std::vector<float > alpha;
60- std::vector<float * > y;
61- std::vector<float * > s;
119+ std::vector<Eigen::VectorXf > y;
120+ std::vector<Eigen::VectorXf > s;
62121
63- float *z;
64- float *grad;
65- float *old_weight;
122+ Eigen::VectorXf z;
123+ Eigen::VectorXf grad;
124+ Eigen::VectorXf new_w;
125+ std::map<std::string,std::string> cfg_;
66126
67- void Init () {
68- float *e = new float [num_fea];
69- memset (e,0.0 ,sizeof (float ) * num_fea);
127+ private:
128+ inline void MemInit ()
129+ {
130+ Eigen::VectorXf e (num_fea);
70131 y.resize (memory_size,e);
71132 s.resize (memory_size,e);
72133
73134 alpha.resize (memory_size,0.0 );
74-
135+
136+ z = Eigen::VectorXf::Zero (num_fea);
137+ grad = Eigen::VectorXf::Zero (num_fea);
138+ new_w = Eigen::VectorXf::Zero (num_fea);
139+ }
140+
141+ inline void ParamInit (const Eigen::VectorXf & old_w,
142+ const Eigen::VectorXf & dual,
143+ const Eigen::VectorXf & cons,
144+ const float &rho,
145+ dmlc::RowBlockIter<unsigned > *dtrain) {
146+ // init gradient
147+ CalGrad (grad, old_w, dual, cons, rho, dtrain);
148+ // init obj val
149+ init_objval = this ->Eval (old_w, dual, cons, rho, dtrain);
150+ old_objval = init_objval;
75151 }
152+
76153 // one update
77- bool UpdateOneIter (float *prima ,
78- float *dual ,
79- float *cons ,
154+ bool UpdateOneIter (Eigen::VectorXf &w ,
155+ const Eigen::VectorXf &d ,
156+ const Eigen::VectorXf &c ,
80157 float rho,
81158 int iter,
82159 dmlc::RowBlockIter<unsigned > *dtrain)
83160 {
84161 bool stop = false ;
85- // float vdot = FindChangeDirection();
86- BacktrackLineSearch ();
87- UpdateHistInfo ();
162+ float vdot = FindChangeDirection (iter );
163+ int k = BacktrackLineSearch (w, d, c, rho, vdot, iter, dtrain );
164+ UpdateHistInfo (w, d, c, rho, iter, dtrain );
88165 if (old_objval - new_objval < lbfgs_stop_tol * init_objval)
89166 return true ;
167+ if (rank == 0 && debug)
168+ LOG (INFO) << " [" << iter <<" ]" << " L-BFGS: linesearch finishes in " << k+1 << " rounds, new_objval="
169+ << new_objval << " , improvment=" << old_objval - new_objval;
170+ old_objval = new_objval;
90171 return stop;
91172 }
92173
93- float FindChangeDirection ()
174+ float FindChangeDirection (int iter)
175+ {
176+ int k = iter;
177+ int M = memory_size;
178+ int j,bound,end = 0 ;
179+
180+ if (k != 0 )
181+ end = (k-1 ) % M;
182+ z = grad;
183+ // k - M >= 0? j = M - 1:j = k - 1;
184+ k > M? bound = M - 1 :bound = k - 1 ;
185+ j = end;
186+ for (int i = 0 ; i <= bound;++i) {
187+ alpha[j] = s[j].dot (z)/y[j].dot (s[j]);
188+ z.noalias () -= alpha[j] * y[j];
189+ j = (j - 1 + M) % M;
190+ }
191+ // init H0
192+ if (k != 0 ){
193+ int pre_k = (k - 1 ) % M;
194+ z = s[pre_k].dot (y[pre_k])/y[pre_k].dot (y[pre_k]) * z;
195+ }
196+ for (int i = 0 ;i <= bound;++i){
197+ j = (j + 1 ) % M;
198+ z.noalias () += s[j] * (alpha[j] - y[j].dot (z)/y[j].dot (s[j]));
199+ }
200+ // SetDirL1Sign(l1_grad,grad,linear.old_weight);
201+ return grad.dot (-z);
202+ }
203+
204+ int BacktrackLineSearch (Eigen::VectorXf &old_w,
205+ const Eigen::VectorXf &d,
206+ const Eigen::VectorXf &c,
207+ float rho,
208+ float dot_dir_grad, int iter,
209+ dmlc::RowBlockIter<unsigned > *dtrain)
210+ {
211+ int k = 0 ;
212+ float alpha_ = 1.0 ;
213+ float backoff = linesearch_backoff;
214+ float c1 = linesearch_c1;
215+ float dginit = 0.0 ,dgtest;
216+
217+ if (iter == 0 ) {
218+ alpha_ = 1 .0f / std::sqrt (-dot_dir_grad);
219+ backoff = 0 .1f ;
220+ }
221+ // direction dot gradient
222+ // dginit = grad.dot(-z);
223+ if (dginit > 0 ){
224+ LOG (FATAL) << " The s point is not decent direction." ;
225+ // return alpha;
226+ }
227+ dgtest = c1 * dot_dir_grad;
228+ while (k < max_linesearch_iter){
229+ new_w = old_w - alpha_ * z;
230+ new_objval = this ->Eval (new_w, d, c, rho, dtrain);
231+ if (new_objval <= old_objval + alpha_ * dgtest)
232+ break ;
233+ else
234+ alpha_ *= backoff;
235+ k++;
236+ }
237+ return k;
238+ }
239+
240+ void UpdateHistInfo (Eigen::VectorXf &old_w,
241+ const Eigen::VectorXf &d,
242+ const Eigen::VectorXf &c,
243+ const float & rho,
244+ const int &iter,
245+ dmlc::RowBlockIter<unsigned > *dtrain)
246+ {
247+ int k = iter;
248+ y[k % memory_size] = grad;
249+ CalGrad (grad, new_w, d, c, rho, dtrain);
250+ y[k % memory_size] = grad - y[k % memory_size];
251+ s[k % memory_size] = new_w - old_w;
252+ old_w = new_w;
253+ }
254+
255+ // grad_i = pred - label + d_i + rho * (old_w_i - c_i)
256+ void CalGrad (Eigen::VectorXf & grad,
257+ const Eigen::VectorXf & old_w,
258+ const Eigen::VectorXf & d,
259+ const Eigen::VectorXf & c,
260+ const float &rho,
261+ dmlc::RowBlockIter<unsigned > *dtrain)
94262 {
95- return 0 ;
263+ if (nthread != 0 ) omp_set_num_threads (nthread);
264+ std::vector<float > grad_;
265+ // out_grad.setZero();
266+ grad.setZero ();
267+ dtrain->BeforeFirst ();
268+ while (dtrain->Next ()) {
269+ const dmlc::RowBlock<unsigned > &batch = dtrain->Value ();
270+ grad_.resize (batch.size ,0 .0f );
271+ #pragma omp parallel for schedule(static)
272+ for (size_t i = 0 ;i < batch.size ;i++) {
273+ dmlc::Row<unsigned > v = batch[i];
274+ grad_[i] = PredToGrad (old_w, v);
275+ }
276+ #pragma omp parallel
277+ for (size_t i = 0 ;i < batch.size ;i++) {
278+ dmlc::Row<unsigned > v = batch[i];
279+ for (size_t j = 0 ; j < v.length ;j++) {
280+ grad[v.index [j]] += grad_[i];
281+ }
282+ }
283+ }
284+ for (size_t i = 0 ;i < num_fea;++i)
285+ grad[i] += d[i] + rho * (old_w[i] - c[i]);
286+ }
287+
288+ float Eval (const Eigen::VectorXf & old_w,
289+ const Eigen::VectorXf & d,
290+ const Eigen::VectorXf & c,
291+ const float &rho,
292+ dmlc::RowBlockIter<unsigned > *dtrain)
293+ {
294+ float sum_val = 0 .0f ;
295+ dtrain->BeforeFirst ();
296+ while (dtrain->Next ()) {
297+ const dmlc::RowBlock<unsigned > &batch = dtrain->Value ();
298+ #pragma omp parallel for schedule(static) reduction(+:sum_val)
299+ for (size_t i = 0 ;i < batch.size ;i++) {
300+ float fv
301+ = CalLoss (old_w, batch[i]);
302+ sum_val += fv;
303+ }
304+ }
305+ float r = d.dot (old_w - c) + rho / 2 * (old_w - c).dot (old_w - c);
306+ sum_val += r;
307+ return sum_val;
96308 }
97309
98- void BacktrackLineSearch ( )
310+ inline float Sigmoid ( float inx )
99311 {
312+ return 1 . / (1 . + exp (-inx));
313+ }
314+
315+ inline float PredIns (const Eigen::VectorXf & old_w,
316+ const dmlc::Row<unsigned > &v)
317+ {
318+ float inner = 0.0 ;
319+ for (unsigned int i = 0 ; i < v.length ;++i)
320+ {
321+ if (v.index [i] > num_fea)
322+ continue ;
323+ inner += old_w[v.index [i]];
324+ }
325+ return Sigmoid (inner);
326+ }
100327
328+ inline float PredToGrad (const Eigen::VectorXf & old_w,
329+ const dmlc::Row<unsigned > &v)
330+ {
331+ return PredIns (old_w, v) - v.get_label ();
101332 }
102333
103- void UpdateHistInfo ()
334+ inline float CalLoss (const Eigen::VectorXf & old_w,
335+ const dmlc::Row<unsigned > &v)
104336 {
337+ float nlogprob = 0 .;
338+ float pred = PredIns (old_w, v);
105339
340+ if (int (v.get_label ()) == 0 )
341+ {
342+ nlogprob = std::log (1 - pred);
343+ }else {
344+ nlogprob = std::log (pred);
345+ }
346+ return -nlogprob;
106347 }
348+
107349};
108350
109351}
0 commit comments