I am trying to download youtube videos / audio files and upload them to Azure Blob Storage without downloading them locally. I am using yt-dlp as it seems to be the most robust tool for my task.
I've been trying an approach like below:
import io
from yt_dlp import YoutubeDL
from azure.storage.blob import BlobServiceClient
video_url = 'https://www.youtube.com/watch?v=<video>
connect_str = 'DefaultEndpointsProtocol=https;AccountName=accountname;AccountKey=accountkey;EndpointSuffix=core.windows.net'
container_name = 'container_name'
blob_name = 'video_name.mp4'
blob_service_client = BlobServiceClient.from_connection_string(connect_str)
blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob_name)
ydl_opts = {
'format': 'best',
'quiet': True,
'outtmpl': '-', # Output to stdout
}
with YoutubeDL(ydl_opts) as ydl, io.BytesIO() as byte_stream:
ydl.download([video_url])
byte_stream.seek(0)
blob_client.upload_blob(byte_stream, overwrite=True)
print(f'Uploaded {blob_name} to Azure Blob Storage.')
but it throws
TypeError: write() argument must be str, not <class 'bytes'>
I would appreciate any help.
EDIT: Complete traceback:
---------------------------------------------------------------------------
TypeError Traceback (most recent call last)
Cell In[4], line 27
24 # Download video and upload to Azure Blob Storage
25 with YoutubeDL(ydl_opts) as ydl, io.BytesIO() as byte_stream:
26 # Redirect yt-dlp's stdout to the byte stream
---> 27 ydl.download([video_url])
28 byte_stream.seek(0)
29 blob_client.upload_blob(byte_stream, overwrite=True)
File ~/.virtualenvs/myenv/lib/python3.12/site-packages/yt_dlp/YoutubeDL.py:3618, in YoutubeDL.download(self, url_list)
3615 raise SameFileError(outtmpl)
3617 for url in url_list:
-> 3618 self.__download_wrapper(self.extract_info)(
3619 url, force_generic_extractor=self.params.get('force_generic_extractor', False))
3621 return self._download_retcode
File ~/.virtualenvs/myenv/lib/python3.12/site-packages/yt_dlp/YoutubeDL.py:3591, in YoutubeDL.__download_wrapper.<locals>.wrapper(*args, **kwargs)
3588 @functools.wraps(func)
3589 def wrapper(*args, **kwargs):
3590 try:
-> 3591 res = func(*args, **kwargs)
3592 except CookieLoadError:
3593 raise
File ~/.virtualenvs/myenv/lib/python3.12/site-packages/yt_dlp/YoutubeDL.py:1626, in YoutubeDL.extract_info(self, url, download, ie_key, extra_info, process, force_generic_extractor)
1624 raise ExistingVideoReached
1625 break
-> 1626 return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
1627 else:
1628 extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])
File ~/.virtualenvs/myenv/lib/python3.12/site-packages/yt_dlp/YoutubeDL.py:1637, in YoutubeDL._handle_extraction_exceptions.<locals>.wrapper(self, *args, **kwargs)
1635 while True:
1636 try:
-> 1637 return func(self, *args, **kwargs)
1638 except (CookieLoadError, DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
1639 raise
File ~/.virtualenvs/myenv/lib/python3.12/site-packages/yt_dlp/YoutubeDL.py:1793, in YoutubeDL.__extract_info(self, url, ie, download, extra_info, process)
1791 if process:
1792 self._wait_for_video(ie_result)
-> 1793 return self.process_ie_result(ie_result, download, extra_info)
1794 else:
1795 return ie_result
File ~/.virtualenvs/myenv/lib/python3.12/site-packages/yt_dlp/YoutubeDL.py:1852, in YoutubeDL.process_ie_result(self, ie_result, download, extra_info)
1850 if result_type == 'video':
1851 self.add_extra_info(ie_result, extra_info)
-> 1852 ie_result = self.process_video_result(ie_result, download=download)
1853 self._raise_pending_errors(ie_result)
1854 additional_urls = (ie_result or {}).get('additional_urls')
File ~/.virtualenvs/myenv/lib/python3.12/site-packages/yt_dlp/YoutubeDL.py:3024, in YoutubeDL.process_video_result(self, info_dict, download)
3022 downloaded_formats.append(new_info)
3023 try:
-> 3024 self.process_info(new_info)
3025 except MaxDownloadsReached:
3026 max_downloads_reached = True
File ~/.virtualenvs/myenv/lib/python3.12/site-packages/yt_dlp/YoutubeDL.py:177, in _catch_unsafe_extension_error.<locals>.wrapper(self, *args, **kwargs)
174 @functools.wraps(func)
175 def wrapper(self, *args, **kwargs):
176 try:
--> 177 return func(self, *args, **kwargs)
178 except _UnsafeExtensionError as error:
179 self.report_error(
180 f'The extracted extension ({error.extension!r}) is unusual '
181 'and will be skipped for safety reasons. '
182 f'If you believe this is an error{bug_reports_message(",")}')
File ~/.virtualenvs/myenv/lib/python3.12/site-packages/yt_dlp/YoutubeDL.py:3492, in YoutubeDL.process_info(self, info_dict)
3488 dl_filename = existing_video_file(full_filename, temp_filename)
3489 if dl_filename is None or dl_filename == temp_filename:
3490 # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
3491 # So we should try to resume the download
-> 3492 success, real_download = self.dl(temp_filename, info_dict)
3493 info_dict['__real_download'] = real_download
3494 else:
File ~/.virtualenvs/myenv/lib/python3.12/site-packages/yt_dlp/YoutubeDL.py:3212, in YoutubeDL.dl(self, name, info, subtitle, test)
3210 if new_info.get('http_headers') is None:
3211 new_info['http_headers'] = self._calc_headers(new_info)
-> 3212 return fd.download(name, new_info, subtitle)
File ~/.virtualenvs/myenv/lib/python3.12/site-packages/yt_dlp/downloader/common.py:464, in FileDownloader.download(self, filename, info_dict, subtitle)
461 self.to_screen(f'[download] Sleeping {sleep_interval:.2f} seconds ...')
462 time.sleep(sleep_interval)
--> 464 ret = self.real_download(filename, info_dict)
465 self._finish_multiline_status()
466 return ret, True
File ~/.virtualenvs/myenv/lib/python3.12/site-packages/yt_dlp/downloader/http.py:368, in HttpFD.real_download(self, filename, info_dict)
366 try:
367 establish_connection()
--> 368 return download()
369 except RetryDownload as err:
370 retry.error = err.source_error
File ~/.virtualenvs/myenv/lib/python3.12/site-packages/yt_dlp/downloader/http.py:279, in HttpFD.real_download.<locals>.download()
276 self.report_error(f'unable to set filesize xattr: {err}')
278 try:
--> 279 ctx.stream.write(data_block)
280 except OSError as err:
281 self.to_stderr('\n')
File ~/.virtualenvs/myenv/lib/python3.12/site-packages/ipykernel/iostream.py:668, in OutStream.write(self, string)
666 if not isinstance(string, str):
667 msg = f"write() argument must be str, not {type(string)}" # type:ignore[unreachable]
--> 668 raise TypeError(msg)
670 if self.echo is not None:
671 try:
TypeError: write() argument must be str, not <class 'bytes'>
Downloading content of Youtube Audio file directly to Azure Blob Storage.
You can use this below code is used to downloading content of Youtube
sample video file directly to Azure Blob Storage using Python SDK.
Code:
import subprocess
from azure.storage.blob import BlobServiceClient
import uuid
# Video URL
video_url = "https://www.youtube.com/watch?xxx"
connect_str = "xxxxx"
container_name = "test"
blob_name = "sample.mp4"
blob_client = BlobServiceClient.from_connection_string(connect_str).get_blob_client(
container=container_name, blob=blob_name
)
# Download video as stream
yt_dlp_cmd = ["yt-dlp", "-f", "best", "-o", "-", video_url]
block_list = []
with subprocess.Popen(yt_dlp_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as process:
while True:
chunk = process.stdout.read(4 * 1024 * 1024) # Read 4MB chunks
if not chunk:
break
block_id = str(uuid.uuid4()).replace("-", "") # Generate unique block ID
block_list.append(block_id)
blob_client.stage_block(block_id=block_id, data=chunk)
blob_client.commit_block_list(block_list)
print(f"Upload complete! File stored as: {blob_name}")
The abovecode streams the YouTube video directly from yt-dlp
to Azure Blob Storage without saving it locally. It reads the video data in 4MB chunks from stdout
, uploads each chunk as a block using stage_block
, and then finalizes the upload by committing all blocks with commit_block_list
Output:
Upload complete! File stored as: sample.mp4
Portal:
Reference: yt-dlp/yt-dlp: A feature-rich command-line audio/video downloader