Skip to content

Commit b066c12

Browse files
committed
add local srna db module
1 parent 692aec2 commit b066c12

File tree

2 files changed

+43
-24
lines changed

2 files changed

+43
-24
lines changed

physpetool/phylotree/retrievessurna.py

Lines changed: 35 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -28,11 +28,12 @@
2828
import os
2929
import time
3030
from physpetool.phylotree.log import getLogging
31+
import shutil
3132

3233
log_retrieve = getLogging('SSU rRNA DB')
3334

3435

35-
def retrieve16srna(spenamelist, outpath):
36+
def retrieve16srna(spenamelist, outpath, local_db):
3637
"""
3738
retrieve 16s rna form bioinfor.scu.edu.cn
3839
:param spenamelist: a list contain species names
@@ -54,21 +55,38 @@ def retrieve16srna(spenamelist, outpath):
5455
dirname = os.path.join(dirname, subdir)
5556
if not os.path.exists(dirname):
5657
os.makedirs(dirname)
57-
# connect database
58-
connect = ftplib.FTP("173.255.208.244")
59-
connect.login('anonymous')
60-
connect.cwd('/pub/database16s')
61-
# connect.dir()
62-
for abb in spelist:
63-
retrievename = abb + '.fasta'
64-
downloadfilname = 'rna_sequence' + '.fasta'
65-
downloadfilnamepath = os.path.join(dirname, downloadfilname)
66-
fw = open(downloadfilnamepath, 'ab')
67-
remoteFileName = 'RETR ' + os.path.basename(retrievename)
68-
connect.retrbinary(remoteFileName, fw.write)
69-
fw.write(b'\n')
70-
fw.close()
71-
log_retrieve.info("Retrieve and download of organism '{0}' SSU rRNA sequence was successful".format(abb))
72-
connect.quit()
7358

59+
# connect ftp database
60+
61+
if local_db == "":
62+
connect = ftplib.FTP("173.255.208.244")
63+
connect.login('anonymous')
64+
connect.cwd('/pub/database16s')
65+
# connect.dir()
66+
for abb in spelist:
67+
retrievename = abb + '.fasta'
68+
downloadfilname = 'rna_sequence' + '.fasta'
69+
downloadfilnamepath = os.path.join(dirname, downloadfilname)
70+
fw = open(downloadfilnamepath, 'ab')
71+
remoteFileName = 'RETR ' + os.path.basename(retrievename)
72+
connect.retrbinary(remoteFileName, fw.write)
73+
fw.write(b'\n')
74+
fw.close()
75+
log_retrieve.info("Retrieve and download of organism '{0}' SSU rRNA sequence was successful".format(abb))
76+
connect.quit()
77+
# local db method
78+
else:
79+
file_name_new = 'rna_sequence' + '.fasta'
80+
abb_data_path_new = os.path.join(dirname, file_name_new)
81+
w_ = open(abb_data_path_new, 'a')
82+
for abb in spelist:
83+
file_name = abb + ".fasta"
84+
85+
abb_data_path = os.path.join(local_db, file_name)
86+
87+
seq = open(abb_data_path, "r").read()
88+
w_.write(seq + "\n")
89+
90+
log_retrieve.info("Retrieve and download of organism '{0}' SSU rRNA sequence was successful".format(abb))
91+
w_.close()
7492
return dirname

physpetool/physpe/autobuild.py

Lines changed: 8 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ def starting(args):
168168
args.raxml, args.raxml_parameter,
169169
args.fasttree, args.fasttree_parameter,
170170
args.iqtree, args.iqtree_parameter,
171-
args.thread)
171+
args.thread,args.db)
172172

173173
# Reconstruct phylogenetic tree by extend highly conserved proteins.
174174
elif args.EHCP:
@@ -197,7 +197,8 @@ def starting(args):
197197
args.raxml, args.raxml_parameter,
198198
args.fasttree, args.fasttree_parameter,
199199
args.iqtree, args.iqtree_parameter,
200-
args.thread, args.extenddata)
200+
args.thread, args.extenddata,
201+
args.db)
201202
# Reconstruct phylogenetic tree by highly conserved proteins.
202203
elif args.HCP:
203204
setlogdir(out_put)
@@ -270,11 +271,11 @@ def starting_srna(in_put, out_put,
270271
args_raxml, args_raxml_p,
271272
args_fasttree, args_fasttree_p,
272273
args_iqtree, args_iqtree_p,
273-
args_thread):
274+
args_thread,args_db):
274275
'''reconstruct phylogenetic tree by ssu rna method'''
275276
ssu_input, recovery_dic = checkSilvaOrganism(in_put)
276277
start = time.time()
277-
out_retrieve = retrieve16srna(ssu_input, out_put)
278+
out_retrieve = retrieve16srna(ssu_input, out_put,args_db)
278279
end = time.time()
279280
auto_build_log.info('Retrieving SSU rRNA sequences used time: {} Seconds'.format(end - start))
280281
if not recovery_dic == []:
@@ -370,7 +371,6 @@ def starting_ehcp(in_put, out_put,
370371
end2 = time.time()
371372
auto_build_log.info('Constructing species tree used time: {} Seconds'.format(end2 - start2))
372373

373-
374374
def starting_esrna(in_put, out_put,
375375
args_muscle, args_muscle_p,
376376
args_clustalw, args_clustalw_p,
@@ -380,12 +380,13 @@ def starting_esrna(in_put, out_put,
380380
args_raxml, args_raxml_p,
381381
args_fasttree, args_fasttree_p,
382382
args_iqtree, args_iqtree_p,
383-
args_thread, args_extenddata):
383+
args_thread, args_extenddata,
384+
args_db):
384385
'''reconstruct phylogenetic tree by ssu rna extend method'''
385386
extend_check = checkFile(args_extenddata)
386387
ssu_input, recovery_dic = checkSilvaOrganism(in_put)
387388
start = time.time()
388-
out_retrieve = retrieve16srna(ssu_input, out_put)
389+
out_retrieve = retrieve16srna(ssu_input, out_put,args_db)
389390
end = time.time()
390391
auto_build_log.info('Retrieving SSU rRNA sequences used time: {} Seconds'.format(end - start))
391392
if not recovery_dic == {}:

0 commit comments

Comments
 (0)