I'm trying to download the nr blastdabase form NCBIs ftp site (ftp://ftp.ncbi.nlm.nih.gov/blast/db). One of the files is quite large (16GB) and takes some time to download. At the end of downloading this file the program just hangs, it does not move on to the next file.
The portion of my program related to downloading the files is:
from pathlib import Path
import ftplib
from tqdm import tqdm
def _file_write_progress(block, fh, pbar):
"""Write ftp file and updates progress bar.
Args:
block (binary): Block of data received from ftp.retrbinary
fh (BufferedWriter): Open file to write to in wb mode
pbar (ProgressBar): Progress bar to update with download progress
"""
fh.write(block)
pbar.update(len(block))
def _download_ftp_files(url, remote_path, files_list, db_dir):
"""Download ftp file and update progress bar.
Args:
url (str): Url of ftp server to connect to
remote_path (str): Path to directory containing tartget files
files_list (list(str)): List of files to download
db_dir (Path): Path to local directory to download files to
"""
ftp = ftplib.FTP(url, timeout=3600)
ftp.login()
ftp.cwd(remote_path)
for fn in tqdm(files_list, desc="Downloading file #"):
with (db_dir / fn).open('wb') as fh:
pbar = tqdm(desc=fn, total=ftp.size(fn))
ftp.retrbinary(
'RETR ' + fn,
lambda block: _file_write_progress(block, fh, pbar),
1024*1024
)
ftp.close()
I think the issue is related to the ftp connection timing out but I can't seem to fix it.
I have tried solutions at Python: ftplib hangs at end of transfer and Python: File download using ftplib hangs forever after file is successfully downloaded but can't seem to get this to work.
The above code modified based on these answers:
def _background(sock, fh, pbar):
while True:
block = sock.recv(1024*1024)
if not block:
break
fh.write(block)
pbar.update(len(block))
def _download_ftp_files(url, remote_path, files_list, db_dir):
ftp = ftplib.FTP(url)
ftp.login()
ftp.cwd(remote_path)
for fn in tqdm(files_list, desc="Downloading file #"):
try:
sock, size = ftp.ntransfercmd('RETR ' + fn)
pbar = tqdm(desc=fn, total=size)
with (db_dir / fn).open('wb') as fh:
t = threading.Thread(target=_background(sock, fh, pbar))
t.start()
while t.is_alive():
t.join(60)
ftp.voidcmd('NOOP')
sock.close()
except ftplib.error_reply as e:
print(e)
This returns an ftplib.error_reply 226 Transfer Completed as an exception for some reason. I try to handle it but the program just freezes.
I can provide more info if needed, any help is papreciated!
Okay, I switched to ftputil which wraps ftplib and seems to work better for now.
The following is the modified code:
def _download_ftp_files(url, remote_path, files_list, db_dir):
"""Download ftp file and update progress bar.
Args:
url (str): URL of ftp server to connect to
remote_path (str): Path to directory containing tartget files
files_list (list(str)): List of files to download
db_dir (Path): Path to local directory to download files to
"""
with ftputil.FTPHost(url, user='anonymous', passwd='@anonymous') as ftp_host:
ftp_host.chdir(remote_path)
for fn in tqdm(files_list, desc="Downloading file #"):
pbar = tqdm(desc=fn, total=ftp_host.path.getsize(fn))
ftp_host.download(
fn, str(db_dir / fn),
lambda block: pbar.update(len(block)))