| Linguaggio di programmazione Python

Profilo di Doiar

Nome	Doiar
Indirizzo email	n/a
Messaggi	1

2016-06-08 18:20:50

Script per interrogazione database BLAST

Forum >> Programmazione Python >> Files e Directory

ciao a tutti
ho trovato in un articolo uni script (blast_with_ncbi.py) che permette, fornendo un file di sequenze di geni in formato .fasta, l'interrogazione di un database locale.

aprendo il file con gedit risulta chiaro che devo modificare alcune variabili, in particolare le directory in cui sono contenuti i vari file.

tuttavia non riesco a farlo funzionare. credo che l'errore dipenda dal fatto che non so indicare in modo corretto la "workdir":
# Main directory for work
workdir = os.path.dirname(os.path.realpath(__file__))

qualcuno potrebbe aiutarmi.
riporto di seguito lo script originale e a seguire la parte modificata da me

#
# Imports
import os
import glob
from   Bio import SeqIO
from   Bio.Blast.Applications import NcbiblastnCommandline
 
#
# Defines
# Change these for your specific needs
 
# Main directory for work
workdir   = os.path.dirname(os.path.realpath(__file__))
 
# Local BLAST database and GI filter list
blast_db  = '%s/../blast-db' % workdir
 
# FASTA directory, where to find the sequences
fasta_dir = '%s/../fasta' % workdir
 
# Output of our BLAST results
outdir    = '%s/../blast_results' % workdir
 
#
# Given a directory this returns a list of fasta files
# Change if 'fa' is not the extension that you want to find.
def get_fasta_files(fasta_dir):
    abs_path = os.path.abspath(fasta_dir)
    print('Looking for fasta files in: ',abs_path)
    return glob.glob('%s/*.fasta' % abs_path)
 
#
# Returns the output file
def outfile(dir,file):
    return '%s/%s.csv' % (dir,os.path.splitext(os.path.basename(file))0)
 
#
# BLASTS the sequence file against the local database
def ncbi_blast(in_file,out_file,dbp):
    cur_dir = os.getcwd()
    os.chdir(dbp)
    # Replace the database names with your own local databases
    # ...plus we are using 8 threads so change according to the resources available
    cmd_line = NcbiblastnCommandline(query=in_file, db="'nt_ncbi_plants fpuk'", out=out_file, outfmt="'10 std score stitle'", max_target_seqs=20,num_threads=8)
    cmd_line()
    os.chdir(cur_dir)
 
#
# Each fasta file contains a set of sequences that were
# matched for a given tag. They are either reverse or forward.
# We blast them to the database.
def blast_sequences( fastas, odir, db_dir ):
    for file in fastas:
        print( 'Processing: ', file )
        ncbi_blast(file,outfile(odir,file),db_dir)            
 
# Main
print('Running')
os.chdir(workdir)
fasta_files = get_fasta_files(fasta_dir)
blast_sequences(fasta_files,outdir,blast_db)
print('Done')

_______________________

#
# Imports
import os
import glob
from   Bio import SeqIO
from   Bio.Blast.Applications import NcbiblastnCommandline
 
#
# Defines
# Change these for your specific needs
 
# Main directory for work workdir   = os.path.dirname(os.path.realpath(__file__))
workdir   = '/home/doiar/Scrivania/home/doiar/Scrivania/pyscript/BLAST-Tools' ('/home/doiar/Scrivania/home/doiar/Scrivania/pyscript/BLAST-Tools/bin' '%(blast_with_ncbi.py)%')

 
# Local BLAST database and GI filter list
blast_db  = '%s/home/doiar/Scrivania/pyscript/BLAST-Tools/blast-db/' % workdir

 
# FASTA directory, where to find the sequences
fasta_dir = '%s/home/doiar/Scrivania/pyscript/BLAST-Tools/fasta' % workdir

 
# Output of our BLAST results
outdir = '%s/home/doiar/Scrivania/pyscript/BLAST-Tools/blast_results' % workdir

 
#
# Given a directory this returns a list of fasta files
# Change if 'fa' is not the extension that you want to find.
def get_fasta_files(fasta_dir): 
    abs_path = os.path.abspath(fasta_dir)
    print('Looking for fasta files in: ',abs_path)
    return glob.glob('%s/*.fasta' % abs_path)
 
#
# Returns the output file
def outfile(dir,file):
    return '%s/%s.csv' % (dir,os.path.splitext(os.path.basename(file))0)
 
#
# BLASTS the sequence file against the local database
def ncbi_blast(in_file,out_file,dbp):
    cur_dir = os.getcwd()
    os.chdir(dbp)
    # Replace the database names with your own local databases
    # ...plus we are using 8 threads so change according to the resources available
    cmd_line = NcbiblastnCommandline(query=in_file, db="'ITSNCBIDB'", out=out_file, outfmt="'10 std score stitle'", max_target_seqs=10,num_threads=8)
    cmd_line()
    os.chdir(cur_dir)
 
#
# Each fasta file contains a set of sequences that were
# matched for a given tag. They are either reverse or forward.
# We blast them to the database.
def blast_sequences( fastas, odir, db_dir ):
    for file in fastas:
        print( 'Processing: ', file )
        ncbi_blast(file,outfile(odir,file),db_dir)            
 
# Main
print('Running')
os.chdir(workdir)
fasta_files = get_fasta_files(fasta_dir)
blast_sequences(fasta_files,outdir,blast_db)
print('Done')