diff --git a/README.md b/README.md
index a6c4e85d..d2b86026 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,10 @@
# node2vec
+## Notes:
+1. Ignore the `requirement.txt` file. Just install with `pip`
+2. If some error showed up with `type map() has no len() ...` in `gensim/models/word2vec.py`, just edit the `gensim/models/word2vec.py` file and change `len(sentence)` to `len(list(sentence))`.
+
+## This repository is fork from https://github.com/aditya-grover/node2vec
This repository provides a reference implementation of *node2vec* as described in the paper:
> node2vec: Scalable Feature Learning for Networks.
> Aditya Grover and Jure Leskovec.
diff --git a/src/main.py b/src/main.py
index 82ac7357..d3d856ac 100644
--- a/src/main.py
+++ b/src/main.py
@@ -1,8 +1,6 @@
'''
Reference implementation of node2vec.
-
Author: Aditya Grover
-
For more details, refer to the paper:
node2vec: Scalable Feature Learning for Networks
Aditya Grover and Jure Leskovec
@@ -16,89 +14,101 @@
from gensim.models import Word2Vec
def parse_args():
- '''
- Parses the node2vec arguments.
- '''
- parser = argparse.ArgumentParser(description="Run node2vec.")
+ '''
+ Parses the node2vec arguments.
+ '''
+ parser = argparse.ArgumentParser(description="Run node2vec.")
- parser.add_argument('--input', nargs='?', default='graph/karate.edgelist',
- help='Input graph path')
+ parser.add_argument('--input', nargs='?', default='graph/karate.edgelist',
+ help='Input graph path')
- parser.add_argument('--output', nargs='?', default='emb/karate.emb',
- help='Embeddings path')
+ parser.add_argument('--output', nargs='?', default='emb/karate.emb',
+ help='Embeddings path')
- parser.add_argument('--dimensions', type=int, default=128,
- help='Number of dimensions. Default is 128.')
+ parser.add_argument('--dimensions', type=int, default=128,
+ help='Number of dimensions. Default is 128.')
- parser.add_argument('--walk-length', type=int, default=80,
- help='Length of walk per source. Default is 80.')
+ parser.add_argument('--walk-length', type=int, default=80,
+ help='Length of walk per source. Default is 80.')
- parser.add_argument('--num-walks', type=int, default=10,
- help='Number of walks per source. Default is 10.')
+ parser.add_argument('--num-walks', type=int, default=10,
+ help='Number of walks per source. Default is 10.')
- parser.add_argument('--window-size', type=int, default=10,
- help='Context size for optimization. Default is 10.')
+ parser.add_argument('--window-size', type=int, default=10,
+ help='Context size for optimization. Default is 10.')
- parser.add_argument('--iter', default=1, type=int,
- help='Number of epochs in SGD')
+ parser.add_argument('--iter', default=1, type=int,
+ help='Number of epochs in SGD')
- parser.add_argument('--workers', type=int, default=8,
- help='Number of parallel workers. Default is 8.')
+ parser.add_argument('--workers', type=int, default=8,
+ help='Number of parallel workers. Default is 8.')
- parser.add_argument('--p', type=float, default=1,
- help='Return hyperparameter. Default is 1.')
+ parser.add_argument('--p', type=float, default=1,
+ help='Return hyperparameter. Default is 1.')
- parser.add_argument('--q', type=float, default=1,
- help='Inout hyperparameter. Default is 1.')
+ parser.add_argument('--q', type=float, default=1,
+ help='Inout hyperparameter. Default is 1.')
- parser.add_argument('--weighted', dest='weighted', action='http://github.com/store_true',
- help='Boolean specifying (un)weighted. Default is unweighted.')
- parser.add_argument('--unweighted', dest='unweighted', action='http://github.com/store_false')
- parser.set_defaults(weighted=False)
+ parser.add_argument('--weighted', dest='weighted', action='http://github.com/store_true',
+ help='Boolean specifying (un)weighted. Default is unweighted.')
+ parser.add_argument('--unweighted', dest='unweighted', action='http://github.com/store_false')
+ parser.set_defaults(weighted=False)
- parser.add_argument('--directed', dest='directed', action='http://github.com/store_true',
- help='Graph is (un)directed. Default is undirected.')
- parser.add_argument('--undirected', dest='undirected', action='http://github.com/store_false')
- parser.set_defaults(directed=False)
+ parser.add_argument('--directed', dest='directed', action='http://github.com/store_true',
+ help='Graph is (un)directed. Default is undirected.')
+ parser.add_argument('--undirected', dest='undirected', action='http://github.com/store_false')
+ parser.set_defaults(directed=False)
- return parser.parse_args()
+ return parser.parse_args()
def read_graph():
- '''
- Reads the input network in networkx.
- '''
- if args.weighted:
- G = nx.read_edgelist(args.input, nodetype=int, data=(('weight',float),), create_using=nx.DiGraph())
- else:
- G = nx.read_edgelist(args.input, nodetype=int, create_using=nx.DiGraph())
- for edge in G.edges():
- G[edge[0]][edge[1]]['weight'] = 1
+ '''
+ Reads the input network in networkx.
+ '''
+ if args.weighted:
+ G = nx.read_edgelist(args.input, nodetype=int, data=(('weight',float),), create_using=nx.DiGraph())
+ else:
+ G = nx.read_edgelist(args.input, nodetype=int, create_using=nx.DiGraph())
+ for edge in G.edges():
+ G[edge[0]][edge[1]]['weight'] = 1
- if not args.directed:
- G = G.to_undirected()
+ if not args.directed:
+ G = G.to_undirected()
- return G
+ return G
def learn_embeddings(walks):
- '''
- Learn embeddings by optimizing the Skipgram objective using SGD.
- '''
- walks = [map(str, walk) for walk in walks]
- model = Word2Vec(walks, size=args.dimensions, window=args.window_size, min_count=0, sg=1, workers=args.workers, iter=args.iter)
- model.save_word2vec_format(args.output)
-
- return
+ '''
+ Learn embeddings by optimizing the Skipgram objective using SGD.
+ '''
+ walks = [map(str, walk) for walk in walks]
+ model = Word2Vec(walks, vector_size=args.dimensions, window=args.window_size, min_count=0, sg=1, workers=args.workers, epochs=args.iter)
+ #model.save_word2vec_format(args.output)
+ model.wv.save_word2vec_format(args.output)
+ #KeyedVectors.load_word2vec_format
+ return
def main(args):
- '''
- Pipeline for representational learning for all nodes in a graph.
- '''
- nx_G = read_graph()
- G = node2vec.Graph(nx_G, args.directed, args.p, args.q)
- G.preprocess_transition_probs()
- walks = G.simulate_walks(args.num_walks, args.walk_length)
- learn_embeddings(walks)
-
+ '''
+ Pipeline for representational learning for all nodes in a graph.
+ '''
+ nx_G = read_graph()
+ G = node2vec.Graph(nx_G, args.directed, args.p, args.q)
+ G.preprocess_transition_probs()
+ walks = G.simulate_walks(args.num_walks, args.walk_length)
+ learn_embeddings(walks)
+
+"""
if __name__ == "__main__":
- args = parse_args()
- main(args)
+ args = parse_args()
+
+ args.input = graph/karate.edgelist
+ args.output = emb/karate.emd
+ main(args)
+"""
+import sys
+sys.argv = ['--input graph/karate.edgelist --output emb/karate.emd']
+
+args = parse_args()
+
+main(args)
\ No newline at end of file
diff --git a/src/node2vec.py b/src/node2vec.py
index 0293411a..1b81b768 100644
--- a/src/node2vec.py
+++ b/src/node2vec.py
@@ -43,9 +43,9 @@ def simulate_walks(self, num_walks, walk_length):
G = self.G
walks = []
nodes = list(G.nodes())
- print 'Walk iteration:'
+ print('Walk iteration:')
for walk_iter in range(num_walks):
- print str(walk_iter+1), '/', str(num_walks)
+ print(str(walk_iter+1), '/', str(num_walks))
random.shuffle(nodes)
for node in nodes:
walks.append(self.node2vec_walk(walk_length=walk_length, start_node=node))
@@ -146,4 +146,4 @@ def alias_draw(J, q):
if np.random.rand() < q[kk]:
return kk
else:
- return J[kk]
\ No newline at end of file
+ return J[kk]