I'm trying to making a BLAST search with a nucleotide sequence and print the best matching hit but not sure about which option/command should I use. There are options like max_hpsp
and best_hit_overhang
. I don't have an idea about their differences and I want to print just 1 hit. (best matching one) Should i use max_hpsp 1
?
I wrote this code but it's still not useful. If you could tell me, where I am mistaken and what should to do, I would be very appreciated :) Thank you!
from Bio.Blast import NCBIWWW
seq = Seq("GTTGA......CT")
def best_matching_hit(seq):
try:
result_handle = NCBIWWW.qblast("blastn", "nt", seq)
except:
print('BLAST run failed!')
return None
blast_record = NCBIXML.read(result_handle)
for hit in blast_record.alignments:
for hsp in hit.hsps:
if hsp.expect == max_hsps 1:
print(hit.title)
print(hsp.sbjct)
best_matching_hit(seq)
this returns just one hit , the first one I suppose, as per
Limiting the number of hits in a Biopython NCBIWWW Search on Biostars:
#!/usr/bin/env python3
# -*- coding: utf-8 -*-
"""
Created on Mon Jun 7 15:28:11 2021
@author: Pietro
https://stackoverflow.com/questions/67872118/how-to-print-the-best-matching-hit-in-the-blast-search-biopython
"""
from Bio.Blast import NCBIWWW
from Bio.Seq import Seq
seq = Seq("ATGGCGTGGAATGAGCCTGGAAATAACAACGGCAACAATGGCCGCGATAATGACCCTTGGGGTAATAA\
TAATCGTGGTGGCCAGCGTCCTGGTGGCCGAGATCAAGGTCCGCCAGATTTAGATGAAGTGTTCAACAA\
ACTGAGTCAAAAGCTGGGTGGCAAGTTTGGTAAAAAAGGCGGCGGTGGTTCCTCTATCGGCGGTGGCGG\
TGGTGCAATTGGCTTTGGTGTCATTGCGATCATTGCAATTGCGGTGTGGATTTTCGCTGGTTTTTACAC\
CATCGGTGAAGCAGAGCGTGGTGTTGTACTGCGTTTAGGTAAATACGATCGTATCGTAGACCCAGGCCT\
TAACTGGCGTCCTCGTTTTATTGATGAATACGAAGCGGTTAACGTACAAGCGATTCGCTCACTACGTGC\
ATCTGGTCTAATGCTGACGAAAGATGAAAACGTAGTAACGGTTGCAATGGACGTTCAATACCGAGTTGC\
TGACCCATACAAATACCTATACCGCGTGACCAATGCAGATGATAGCTTGCGTCAAGCAACAGACTCTGC\
GCTACGTGCGGTAATTGGTGATTCACTAATGGATAGCATTCTAACCAGTGGTCGTCAGCAAATTCGTCA\
AAGCACTCAAGAAACACTAAACCAAATCATCGATAGCTATGATATGGGTCTGGTGATTGTTGACGTGAA\
CTTCCAGTCTGCACGTCCGCCAGAGCAAGTAAAAGATGCGTTTGATGACGCGATTGCTGCGCGTGAGGA\
TGAAGAGCGTTTCATCCGTGAAGCAGAAGCTTACAAGAACGAAATCTTGCCGAAGGCAACGGGTCGTGC\
TGAACGTTTGAAGAAGGAAGCTCAAGGTTACAACGAGCGTGTAACTAACGAAGCATTAGGTCAAGTAGC\
ACAGTTTGAAAAACTACTACCTGAATACCAAGCGGCTCCTGGCGTAACACGTGACCGTCTGTACATTGA\
CGCGATGGAAGAGGTTTACACCAACACATCTAAAGTGTTGATTGACTCTGAATCAAGCGGCAACCTTTT\
GTACCTACCAATCGATAAATTGGCAGGTCAAGAAGGCCAAACAGACACTAAACGTAAATCGAAATCTTC\
TTCAACCTACGATCACATTCAACTAGAGTCTGAGCGTACACAAGAAGAAACATCGAACACGCAGTCTCG\
TTCAACAGGTACACGTCAAGGGAGATACTAA")
def best_matching_hit(seq):
try:
result_handle = NCBIWWW.qblast("blastn", "nt", seq, hitlist_size=1)
except:
print('BLAST run failed!')
return None
blast_record = result_handle.read()
print(blast_record)
best_matching_hit(seq)