Doiar
Profilo di
Nome | Doiar |
---|---|
Indirizzo email | n/a |
Messaggi | 1 |
-
- 2016-06-08 18:20:50
- Script per interrogazione database BLAST
- Forum >> Programmazione Python >> Files e Directory
- ciao a tutti
ho trovato in un articolo uni script (blast_with_ncbi.py) che permette, fornendo un file di sequenze di geni in formato .fasta, l'interrogazione di un database locale.
aprendo il file con gedit risulta chiaro che devo modificare alcune variabili, in particolare le directory in cui sono contenuti i vari file.
tuttavia non riesco a farlo funzionare. credo che l'errore dipenda dal fatto che non so indicare in modo corretto la "workdir":
# Main directory for work
workdir = os.path.dirname(os.path.realpath(__file__))
qualcuno potrebbe aiutarmi.
riporto di seguito lo script originale e a seguire la parte modificata da me
# # Imports import os import glob from Bio import SeqIO from Bio.Blast.Applications import NcbiblastnCommandline # # Defines # Change these for your specific needs # Main directory for work workdir = os.path.dirname(os.path.realpath(__file__)) # Local BLAST database and GI filter list blast_db = '%s/../blast-db' % workdir # FASTA directory, where to find the sequences fasta_dir = '%s/../fasta' % workdir # Output of our BLAST results outdir = '%s/../blast_results' % workdir # # Given a directory this returns a list of fasta files # Change if 'fa' is not the extension that you want to find. def get_fasta_files(fasta_dir): abs_path = os.path.abspath(fasta_dir) print('Looking for fasta files in: ',abs_path) return glob.glob('%s/*.fasta' % abs_path) # # Returns the output file def outfile(dir,file): return '%s/%s.csv' % (dir,os.path.splitext(os.path.basename(file))0) # # BLASTS the sequence file against the local database def ncbi_blast(in_file,out_file,dbp): cur_dir = os.getcwd() os.chdir(dbp) # Replace the database names with your own local databases # ...plus we are using 8 threads so change according to the resources available cmd_line = NcbiblastnCommandline(query=in_file, db="'nt_ncbi_plants fpuk'", out=out_file, outfmt="'10 std score stitle'", max_target_seqs=20,num_threads=8) cmd_line() os.chdir(cur_dir) # # Each fasta file contains a set of sequences that were # matched for a given tag. They are either reverse or forward. # We blast them to the database. def blast_sequences( fastas, odir, db_dir ): for file in fastas: print( 'Processing: ', file ) ncbi_blast(file,outfile(odir,file),db_dir) # Main print('Running') os.chdir(workdir) fasta_files = get_fasta_files(fasta_dir) blast_sequences(fasta_files,outdir,blast_db) print('Done')
_______________________
# # Imports import os import glob from Bio import SeqIO from Bio.Blast.Applications import NcbiblastnCommandline # # Defines # Change these for your specific needs # Main directory for work workdir = os.path.dirname(os.path.realpath(__file__)) workdir = '/home/doiar/Scrivania/home/doiar/Scrivania/pyscript/BLAST-Tools' ('/home/doiar/Scrivania/home/doiar/Scrivania/pyscript/BLAST-Tools/bin' '%(blast_with_ncbi.py)%') # Local BLAST database and GI filter list blast_db = '%s/home/doiar/Scrivania/pyscript/BLAST-Tools/blast-db/' % workdir # FASTA directory, where to find the sequences fasta_dir = '%s/home/doiar/Scrivania/pyscript/BLAST-Tools/fasta' % workdir # Output of our BLAST results outdir = '%s/home/doiar/Scrivania/pyscript/BLAST-Tools/blast_results' % workdir # # Given a directory this returns a list of fasta files # Change if 'fa' is not the extension that you want to find. def get_fasta_files(fasta_dir): abs_path = os.path.abspath(fasta_dir) print('Looking for fasta files in: ',abs_path) return glob.glob('%s/*.fasta' % abs_path) # # Returns the output file def outfile(dir,file): return '%s/%s.csv' % (dir,os.path.splitext(os.path.basename(file))0) # # BLASTS the sequence file against the local database def ncbi_blast(in_file,out_file,dbp): cur_dir = os.getcwd() os.chdir(dbp) # Replace the database names with your own local databases # ...plus we are using 8 threads so change according to the resources available cmd_line = NcbiblastnCommandline(query=in_file, db="'ITSNCBIDB'", out=out_file, outfmt="'10 std score stitle'", max_target_seqs=10,num_threads=8) cmd_line() os.chdir(cur_dir) # # Each fasta file contains a set of sequences that were # matched for a given tag. They are either reverse or forward. # We blast them to the database. def blast_sequences( fastas, odir, db_dir ): for file in fastas: print( 'Processing: ', file ) ncbi_blast(file,outfile(odir,file),db_dir) # Main print('Running') os.chdir(workdir) fasta_files = get_fasta_files(fasta_dir) blast_sequences(fasta_files,outdir,blast_db) print('Done')