33import csv
44from Data_process .com_data_extraction import file_name
55import re
6+ import time
67
78
89graph = Con_Neo4j (http = 'http://127.0.0.1:7474' , username = 'neo4j' , password = '123456' )
@@ -30,6 +31,7 @@ def com_type(code): # 识别公司代码所属类别,包括:深A、沪A、
3031
3132
3233def create_company (): # 在图中创建A股上市公司节点
34+ time1 = time .time ()
3335 with open ('../Data/company.csv' , 'r' , encoding = 'utf-8' , newline = '' ) as csvfile :
3436 rows = csv .reader (csvfile , delimiter = ';' )
3537 count = - 1
@@ -52,10 +54,13 @@ def create_company(): # 在图中创建A股上市公司节点
5254 node ['adv_ser' ] = row [11 ]
5355 node ['com_type' ] = com_type (row [0 ])
5456 graph .create (node )
55- print (count , row )
57+ # print(count, row)
58+ time2 = time .time ()
59+ print ('Over: create_company' , time2 - time1 )
5660
5761
5862def create_industry (): # 在图中的创建行业节点
63+ time1 = time .time ()
5964 with open ('../Data/industry_tags_id.txt' , mode = 'r' , encoding = 'utf-8' , newline = '' ) as txtfile :
6065 rows = txtfile .readlines ()
6166 for row in rows :
@@ -65,10 +70,13 @@ def create_industry(): # 在图中的创建行业节点
6570 node ['ind_name' ] = inds [1 ]
6671 node ['class_system' ] = '申万三级'
6772 graph .create (node )
68- print (inds )
73+ # print(inds)
74+ time2 = time .time ()
75+ print ('Over: create_industry' , time2 - time1 )
6976
7077
7178def create_com_to_ind (): # 在图中创建公司与行业的关系
79+ time1 = time .time ()
7280 with open ('../Data/com_industry_tags.csv' , 'r' , encoding = 'utf-8' , newline = '' ) as csvfile :
7381 rows = csv .reader (csvfile )
7482 k = - 1
@@ -79,21 +87,23 @@ def create_com_to_ind(): # 在图中创建公司与行业的关系
7987 com_node = graph .find_one (label = 'COMPANY' , property_key = 'stock_code' , property_value = row [0 ])
8088 ind_node = graph .find_one (label = 'INDUSTRY' , property_key = 'ind_name' , property_value = row [2 ])
8189 if not com_node :
82- # print(k, row)
8390 continue
8491 if ind_node :
85- com_rel = Relationship (com_node , 'COM_BelongTo_I ' , ind_node )
92+ com_rel = Relationship (com_node , 'COM_BelongTo_IND ' , ind_node )
8693 graph .create (com_rel )
8794 else :
95+ print ('Missing industry:' , k , row )
8896 new_node = Node ('INDUSTRY' )
8997 new_node ['ind_name' ] = row [2 ]
9098 new_node ['class_system' ] = '申万三级'
91- com_rel = Relationship (com_node , 'COM_BelongTo_I ' , new_node )
99+ com_rel = Relationship (com_node , 'COM_BelongTo_IND ' , new_node )
92100 graph .create (new_node | com_rel )
93- print (k , row )
101+ time2 = time .time ()
102+ print ('Over: create_com_to_ind' , time2 - time1 )
94103
95104
96105def create_com_block (): # 在图中创建板块节点,以及A股上市公司与板块的关系
106+ time1 = time .time ()
97107 file_path = '../Data/A股上市公司所属板块/'
98108 files = file_name (file_path )
99109 rel_num = 0
@@ -112,7 +122,7 @@ def create_com_block(): # 在图中创建板块节点,以及A股上市公司
112122 if k == 0 :
113123 continue
114124 rel_num += 1
115- print (rel_num , stock_code , '-->' , row )
125+ # print(rel_num, stock_code, '-->', row)
116126 block_node = graph .find_one (label = 'BLOCK' , property_key = 'block_name' , property_value = row [0 ])
117127 if block_node :
118128 rel = Relationship (node , 'COM_BelongTo_B' , block_node )
@@ -122,9 +132,12 @@ def create_com_block(): # 在图中创建板块节点,以及A股上市公司
122132 nod ['block_name' ] = row [0 ]
123133 rel = Relationship (node , 'COM_BelongTo_B' , nod )
124134 graph .create (nod | rel )
135+ time2 = time .time ()
136+ print ('Over: create_com_block' , time2 - time1 )
125137
126138
127139def create_com_output (): # 在图中创建公司产业输出关系(上下游),如果公司节点不存在则创建
140+ time1 = time .time ()
128141 file_path = '../Data/A股上市公司上下游/'
129142 files = file_name (file_path )
130143 rel_num = 0
@@ -146,7 +159,7 @@ def create_com_output(): # 在图中创建公司产业输出关系(上下游
146159 if k == 0 :
147160 continue
148161 rel_num += 1
149- print (rel_num , row , '-->' , stock_code )
162+ # print(rel_num, row, '-->', stock_code)
150163 if row [3 ] not in ['' , '-' , '--' ]:
151164 row [3 ] = float (row [3 ].replace (',' , '' ))
152165 if row [1 ] != '-' :
@@ -189,7 +202,7 @@ def create_com_output(): # 在图中创建公司产业输出关系(上下游
189202 if k == 0 :
190203 continue
191204 rel_num += 1
192- print (rel_num , stock_code , '-->' , row )
205+ # print(rel_num, stock_code, '-->', row)
193206 if row [3 ] not in ['' , '-' , '--' ]:
194207 row [3 ] = float (row [3 ].replace (',' , '' ))
195208 if row [1 ] != '-' :
@@ -222,9 +235,12 @@ def create_com_output(): # 在图中创建公司产业输出关系(上下游
222235 rel ['report_dt' ] = row [2 ]
223236 rel ['output_funt' ] = row [3 ]
224237 graph .create (rel )
238+ time2 = time .time ()
239+ print ('Over: create_com_output' , time2 - time1 )
225240
226241
227242def create_com_invest (): # 在图中创建公司投资关系,如果公司节点不存在则创建
243+ time1 = time .time ()
228244 file_path = '../Data/A股上市公司投资情况/'
229245 files = file_name (file_path )
230246 rel_num = 0
@@ -243,7 +259,7 @@ def create_com_invest(): # 在图中创建公司投资关系,如果公司节
243259 if k == 0 :
244260 continue
245261 rel_num += 1
246- print (rel_num , stock_code , '-->' , row )
262+ # print(rel_num, stock_code, '-->', row)
247263 if row [3 ] not in ['' , '-' , '--' ]:
248264 row [3 ] = float (row [3 ].replace (',' , '' ))
249265 if row [1 ] != '-' :
@@ -276,24 +292,35 @@ def create_com_invest(): # 在图中创建公司投资关系,如果公司节
276292 rel ['report_dt' ] = row [2 ]
277293 rel ['proportion' ] = row [3 ]
278294 graph .create (rel )
295+ time2 = time .time ()
296+ print ('Over: create_com_invest' , time2 - time1 )
279297
280298
281299def create_user_to_industry (): # 在图中创建用户节点,以及用户与行业的关系
300+ time1 = time .time ()
282301 with open ('../Data/user_labels.txt' , mode = 'r' , encoding = 'utf-8' , newline = '' ) as txtfile :
283302 rows = txtfile .readlines ()
284303 for row in rows :
285304 pattern = re .compile (r'\d+' )
286305 res = re .findall (pattern , row )
287- print (res )
306+ # print(res)
288307 user_node = Node ('USER' )
289308 user_node ['user_id' ] = res [0 ]
290309 graph .create (user_node )
291310 codes = res [1 :]
292311 for code in codes :
293312 ind_node = graph .find_one (label = 'INDUSTRY' , property_key = 'ind_code' , property_value = code )
294- rel = Relationship (user_node , 'U_FocusOn_I ' , ind_node )
313+ rel = Relationship (user_node , 'U_FocusOn_IND ' , ind_node )
295314 graph .create (rel )
315+ time2 = time .time ()
316+ print ('Over: create_user_to_industry' , time2 - time1 )
296317
297318
298319if __name__ == '__main__' :
299- create_user_to_industry ()
320+ create_company ()
321+ create_industry ()
322+ create_com_to_ind ()
323+ create_com_block ()
324+ create_com_output ()
325+ create_com_invest ()
326+ create_user_to_industry ()
0 commit comments