Skip to content

Commit 30adba0

Browse files
author
ling.fang
committed
add lbfgs model parallel algo
1 parent 10d9753 commit 30adba0

File tree

268 files changed

+55006
-0
lines changed

Some content is hidden

Large Commits have some content hidden by default. Use the searchbox below for content that may be hidden.

268 files changed

+55006
-0
lines changed

lbfgs_parallel/Makefile

Lines changed: 25 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,25 @@
1+
config = ./config.mk
2+
3+
include $(config)
4+
include ./dmlc.mk
5+
6+
BIN = lbfgs.dmlc
7+
8+
# common build script for programs
9+
10+
export LDFLAGS= -L../../lib -pthread -lm -lrt -lglog $(DMLC_LDFLAGS)
11+
export CFLAGS = -Wall -msse2 -Wno-unknown-pragmas -fPIC -I./include/rabit/include -I./include/dmlc-core/include -std=c++11 -fopenmp
12+
13+
CFLAGS+= -I./deps/include
14+
LDFLAGS+= -L./deps/lib
15+
16+
lbfgs.dmlc : lbfgs.cc ./lib/libdmlc.a ./lib/librabit.a
17+
18+
all: $(BIN)
19+
20+
$(BIN) :
21+
$(CXX) $(CFLAGS) -o $@ $(filter %.cpp %.o %.c %.cc %.a, $^) $(LDFLAGS)
22+
23+
clean:
24+
$(RM) $(OBJ) $(BIN) *~ ../src/*~
25+

lbfgs_parallel/README.md

Lines changed: 45 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,45 @@
1+
Linear and Logistic Regression (Model Parallel L-BFGS Algo)
2+
====
3+
* input format: LibSVM
4+
* Local Example: [run-linear.sh](run-linear.sh)
5+
* Runnig on YARN: [run-yarn.sh](run-yarn.sh)
6+
7+
Multi-Threading Optimization
8+
====
9+
* The code can be multi-threaded, we encourage you to use it
10+
- Simply add ```nthread=k``` where k is the number of threads you want to use
11+
* If you submit with YARN
12+
- Use ```--vcores``` and ```-mem``` to request CPU and memory resources
13+
- Some scheduler in YARN do not honor CPU request, you can request more memory to grab working slots
14+
* Usually multi-threading improves speed in general
15+
- You can use less workers and assign more resources to each of worker
16+
- This usually means less communication overhead and faster running time
17+
18+
Parameters
19+
====
20+
All the parameters can be set by param=value
21+
22+
#### Important Parameters
23+
* objective [default = logistic]
24+
- can be linear or logistic
25+
* base_score [default = 0.5]
26+
- global bias, recommended set to mean value of label
27+
* reg_L1 [default = 0]
28+
- l1 regularization co-efficient
29+
* reg_L2 [default = 1]
30+
- l2 regularization co-efficient
31+
* lbfgs_stop_tol [default = 1e-5]
32+
- relative tolerance level of loss reduction with respect to initial loss
33+
* max_lbfgs_iter [default = 500]
34+
- maximum number of lbfgs iterations
35+
36+
### Optimization Related parameters
37+
* min_lbfgs_iter [default = 5]
38+
- minimum number of lbfgs iterations
39+
* max_linesearch_iter [default = 100]
40+
- maximum number of iterations in linesearch
41+
* linesearch_c1 [default = 1e-4]
42+
- c1 co-efficient in backoff linesearch
43+
* linesarch_backoff [default = 0.5]
44+
- backoff ratio in linesearch
45+

lbfgs_parallel/config.mk

Lines changed: 30 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,30 @@
1+
#-----------------------------------------------------
2+
# wormhole: the configuration compile script
3+
#
4+
# This is the default configuration setup for all dmlc projects
5+
# If you want to change configuration, do the following steps:
6+
#
7+
# - copy this file to the root of wormhole folder
8+
# - modify the configuration you want
9+
# - type make or make -j n on each of the folder
10+
#----------------------------------------------------
11+
12+
# choice of compiler
13+
export CC = gcc
14+
export CXX = g++
15+
export MPICXX = mpicxx
16+
17+
# whether use google logging
18+
USE_GLOG = 1
19+
20+
# whether use AWS S3 support during compile, which depends libcurl4-openssl-dev
21+
# you can install it on ubuntu via
22+
# sudo apt-get install libcurl4-openssl-dev
23+
24+
USE_S3 = 0
25+
26+
# whether use HDFS support during compile. libhdfs is required
27+
USE_HDFS = 1
28+
29+
# path to libjvm.so
30+
LIBJVM=$(JAVA_HOME)/jre/lib/amd64/server
11 KB
Binary file not shown.
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
#!/bin/bash
2+
3+
# Copyright (c) 2008, Google Inc.
4+
# All rights reserved.
5+
#
6+
# Redistribution and use in source and binary forms, with or without
7+
# modification, are permitted provided that the following conditions are
8+
# met:
9+
#
10+
# * Redistributions of source code must retain the above copyright
11+
# notice, this list of conditions and the following disclaimer.
12+
# * Redistributions in binary form must reproduce the above
13+
# copyright notice, this list of conditions and the following disclaimer
14+
# in the documentation and/or other materials provided with the
15+
# distribution.
16+
# * Neither the name of Google Inc. nor the names of its
17+
# contributors may be used to endorse or promote products derived from
18+
# this software without specific prior written permission.
19+
#
20+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
21+
# "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
22+
# LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
23+
# A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
24+
# OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
25+
# SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
26+
# LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
27+
# DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
28+
# THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
29+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
30+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
31+
#
32+
# ---
33+
# Author: Dave Nicponski
34+
#
35+
# This script is invoked by bash in response to a matching compspec. When
36+
# this happens, bash calls this script using the command shown in the -C
37+
# block of the complete entry, but also appends 3 arguments. They are:
38+
# - The command being used for completion
39+
# - The word being completed
40+
# - The word preceding the completion word.
41+
#
42+
# Here's an example of how you might use this script:
43+
# $ complete -o bashdefault -o default -o nospace -C \
44+
# '/usr/local/bin/gflags_completions.sh --tab_completion_columns $COLUMNS' \
45+
# time env binary_name another_binary [...]
46+
47+
# completion_word_index gets the index of the (N-1)th argument for
48+
# this command line. completion_word gets the actual argument from
49+
# this command line at the (N-1)th position
50+
completion_word_index="$(($# - 1))"
51+
completion_word="${!completion_word_index}"
52+
53+
# TODO(user): Replace this once gflags_completions.cc has
54+
# a bool parameter indicating unambiguously to hijack the process for
55+
# completion purposes.
56+
if [ -z "$completion_word" ]; then
57+
# Until an empty value for the completion word stops being misunderstood
58+
# by binaries, don't actually execute the binary or the process
59+
# won't be hijacked!
60+
exit 0
61+
fi
62+
63+
# binary_index gets the index of the command being completed (which bash
64+
# places in the (N-2)nd position. binary gets the actual command from
65+
# this command line at that (N-2)nd position
66+
binary_index="$(($# - 2))"
67+
binary="${!binary_index}"
68+
69+
# For completions to be universal, we may have setup the compspec to
70+
# trigger on 'harmless pass-through' commands, like 'time' or 'env'.
71+
# If the command being completed is one of those two, we'll need to
72+
# identify the actual command being executed. To do this, we need
73+
# the actual command line that the <TAB> was pressed on. Bash helpfully
74+
# places this in the $COMP_LINE variable.
75+
if [ "$binary" == "time" ] || [ "$binary" == "env" ]; then
76+
# we'll assume that the first 'argument' is actually the
77+
# binary
78+
79+
80+
# TODO(user): This is not perfect - the 'env' command, for instance,
81+
# is allowed to have options between the 'env' and 'the command to
82+
# be executed'. For example, consider:
83+
# $ env FOO="bar" bin/do_something --help<TAB>
84+
# In this case, we'll mistake the FOO="bar" portion as the binary.
85+
# Perhaps we should continuing consuming leading words until we
86+
# either run out of words, or find a word that is a valid file
87+
# marked as executable. I can't think of any reason this wouldn't
88+
# work.
89+
90+
# Break up the 'original command line' (not this script's command line,
91+
# rather the one the <TAB> was pressed on) and find the second word.
92+
parts=( ${COMP_LINE} )
93+
binary=${parts[1]}
94+
fi
95+
96+
# Build the command line to use for completion. Basically it involves
97+
# passing through all the arguments given to this script (except the 3
98+
# that bash added), and appending a '--tab_completion_word "WORD"' to
99+
# the arguments.
100+
params=""
101+
for ((i=1; i<=$(($# - 3)); ++i)); do
102+
params="$params \"${!i}\"";
103+
done
104+
params="$params --tab_completion_word \"$completion_word\""
105+
106+
# TODO(user): Perhaps stash the output in a temporary file somewhere
107+
# in /tmp, and only cat it to stdout if the command returned a success
108+
# code, to prevent false positives
109+
110+
# If we think we have a reasonable command to execute, then execute it
111+
# and hope for the best.
112+
candidate=$(type -p "$binary")
113+
if [ ! -z "$candidate" ]; then
114+
eval "$candidate 2>/dev/null $params"
115+
elif [ -f "$binary" ] && [ -x "$binary" ]; then
116+
eval "$binary 2>/dev/null $params"
117+
fi

lbfgs_parallel/deps/bin/lz4

109 KB
Binary file not shown.

lbfgs_parallel/deps/bin/lz4c

109 KB
Binary file not shown.

lbfgs_parallel/deps/bin/lz4cat

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
lz4

lbfgs_parallel/deps/bin/protoc

118 KB
Binary file not shown.

lbfgs_parallel/deps/bin/unlz4

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1 @@
1+
lz4

0 commit comments

Comments
 (0)