pythonpython-3.xazure-blob-storageyoutube-dlyt-dlp

Downloading content of Youtube Audio file directly to Azure Blob Storage


I am trying to download youtube videos / audio files and upload them to Azure Blob Storage without downloading them locally. I am using yt-dlp as it seems to be the most robust tool for my task.

I've been trying an approach like below:

import io
from yt_dlp import YoutubeDL
from azure.storage.blob import BlobServiceClient

video_url = 'https://www.youtube.com/watch?v=<video>

connect_str = 'DefaultEndpointsProtocol=https;AccountName=accountname;AccountKey=accountkey;EndpointSuffix=core.windows.net'
container_name = 'container_name'
blob_name = 'video_name.mp4'

blob_service_client = BlobServiceClient.from_connection_string(connect_str)
blob_client = blob_service_client.get_blob_client(container=container_name, blob=blob_name)

ydl_opts = {
    'format': 'best',
    'quiet': True,
    'outtmpl': '-',  # Output to stdout
}

with YoutubeDL(ydl_opts) as ydl, io.BytesIO() as byte_stream:
    ydl.download([video_url])
    byte_stream.seek(0)
    blob_client.upload_blob(byte_stream, overwrite=True)

print(f'Uploaded {blob_name} to Azure Blob Storage.')

but it throws

TypeError: write() argument must be str, not <class 'bytes'>

I would appreciate any help.

EDIT: Complete traceback:

---------------------------------------------------------------------------
TypeError                                 Traceback (most recent call last)
Cell In[4], line 27
     24 # Download video and upload to Azure Blob Storage
     25 with YoutubeDL(ydl_opts) as ydl, io.BytesIO() as byte_stream:
     26     # Redirect yt-dlp's stdout to the byte stream
---> 27     ydl.download([video_url])
     28     byte_stream.seek(0)
     29     blob_client.upload_blob(byte_stream, overwrite=True)

File ~/.virtualenvs/myenv/lib/python3.12/site-packages/yt_dlp/YoutubeDL.py:3618, in YoutubeDL.download(self, url_list)
   3615     raise SameFileError(outtmpl)
   3617 for url in url_list:
-> 3618     self.__download_wrapper(self.extract_info)(
   3619         url, force_generic_extractor=self.params.get('force_generic_extractor', False))
   3621 return self._download_retcode

File ~/.virtualenvs/myenv/lib/python3.12/site-packages/yt_dlp/YoutubeDL.py:3591, in YoutubeDL.__download_wrapper.<locals>.wrapper(*args, **kwargs)
   3588 @functools.wraps(func)
   3589 def wrapper(*args, **kwargs):
   3590     try:
-> 3591         res = func(*args, **kwargs)
   3592     except CookieLoadError:
   3593         raise

File ~/.virtualenvs/myenv/lib/python3.12/site-packages/yt_dlp/YoutubeDL.py:1626, in YoutubeDL.extract_info(self, url, download, ie_key, extra_info, process, force_generic_extractor)
   1624             raise ExistingVideoReached
   1625         break
-> 1626     return self.__extract_info(url, self.get_info_extractor(key), download, extra_info, process)
   1627 else:
   1628     extractors_restricted = self.params.get('allowed_extractors') not in (None, ['default'])

File ~/.virtualenvs/myenv/lib/python3.12/site-packages/yt_dlp/YoutubeDL.py:1637, in YoutubeDL._handle_extraction_exceptions.<locals>.wrapper(self, *args, **kwargs)
   1635 while True:
   1636     try:
-> 1637         return func(self, *args, **kwargs)
   1638     except (CookieLoadError, DownloadCancelled, LazyList.IndexError, PagedList.IndexError):
   1639         raise

File ~/.virtualenvs/myenv/lib/python3.12/site-packages/yt_dlp/YoutubeDL.py:1793, in YoutubeDL.__extract_info(self, url, ie, download, extra_info, process)
   1791 if process:
   1792     self._wait_for_video(ie_result)
-> 1793     return self.process_ie_result(ie_result, download, extra_info)
   1794 else:
   1795     return ie_result

File ~/.virtualenvs/myenv/lib/python3.12/site-packages/yt_dlp/YoutubeDL.py:1852, in YoutubeDL.process_ie_result(self, ie_result, download, extra_info)
   1850 if result_type == 'video':
   1851     self.add_extra_info(ie_result, extra_info)
-> 1852     ie_result = self.process_video_result(ie_result, download=download)
   1853     self._raise_pending_errors(ie_result)
   1854     additional_urls = (ie_result or {}).get('additional_urls')

File ~/.virtualenvs/myenv/lib/python3.12/site-packages/yt_dlp/YoutubeDL.py:3024, in YoutubeDL.process_video_result(self, info_dict, download)
   3022 downloaded_formats.append(new_info)
   3023 try:
-> 3024     self.process_info(new_info)
   3025 except MaxDownloadsReached:
   3026     max_downloads_reached = True

File ~/.virtualenvs/myenv/lib/python3.12/site-packages/yt_dlp/YoutubeDL.py:177, in _catch_unsafe_extension_error.<locals>.wrapper(self, *args, **kwargs)
    174 @functools.wraps(func)
    175 def wrapper(self, *args, **kwargs):
    176     try:
--> 177         return func(self, *args, **kwargs)
    178     except _UnsafeExtensionError as error:
    179         self.report_error(
    180             f'The extracted extension ({error.extension!r}) is unusual '
    181             'and will be skipped for safety reasons. '
    182             f'If you believe this is an error{bug_reports_message(",")}')

File ~/.virtualenvs/myenv/lib/python3.12/site-packages/yt_dlp/YoutubeDL.py:3492, in YoutubeDL.process_info(self, info_dict)
   3488 dl_filename = existing_video_file(full_filename, temp_filename)
   3489 if dl_filename is None or dl_filename == temp_filename:
   3490     # dl_filename == temp_filename could mean that the file was partially downloaded with --no-part.
   3491     # So we should try to resume the download
-> 3492     success, real_download = self.dl(temp_filename, info_dict)
   3493     info_dict['__real_download'] = real_download
   3494 else:

File ~/.virtualenvs/myenv/lib/python3.12/site-packages/yt_dlp/YoutubeDL.py:3212, in YoutubeDL.dl(self, name, info, subtitle, test)
   3210 if new_info.get('http_headers') is None:
   3211     new_info['http_headers'] = self._calc_headers(new_info)
-> 3212 return fd.download(name, new_info, subtitle)

File ~/.virtualenvs/myenv/lib/python3.12/site-packages/yt_dlp/downloader/common.py:464, in FileDownloader.download(self, filename, info_dict, subtitle)
    461     self.to_screen(f'[download] Sleeping {sleep_interval:.2f} seconds ...')
    462     time.sleep(sleep_interval)
--> 464 ret = self.real_download(filename, info_dict)
    465 self._finish_multiline_status()
    466 return ret, True

File ~/.virtualenvs/myenv/lib/python3.12/site-packages/yt_dlp/downloader/http.py:368, in HttpFD.real_download(self, filename, info_dict)
    366 try:
    367     establish_connection()
--> 368     return download()
    369 except RetryDownload as err:
    370     retry.error = err.source_error

File ~/.virtualenvs/myenv/lib/python3.12/site-packages/yt_dlp/downloader/http.py:279, in HttpFD.real_download.<locals>.download()
    276             self.report_error(f'unable to set filesize xattr: {err}')
    278 try:
--> 279     ctx.stream.write(data_block)
    280 except OSError as err:
    281     self.to_stderr('\n')

File ~/.virtualenvs/myenv/lib/python3.12/site-packages/ipykernel/iostream.py:668, in OutStream.write(self, string)
    666 if not isinstance(string, str):
    667     msg = f"write() argument must be str, not {type(string)}"  # type:ignore[unreachable]
--> 668     raise TypeError(msg)
    670 if self.echo is not None:
    671     try:

TypeError: write() argument must be str, not <class 'bytes'>

Solution

  • Downloading content of Youtube Audio file directly to Azure Blob Storage.

    You can use this below code is used to downloading content of Youtube sample video file directly to Azure Blob Storage using Python SDK.

    Code:

    import subprocess
    from azure.storage.blob import BlobServiceClient
    import uuid
    
    # Video URL
    video_url = "https://www.youtube.com/watch?xxx"
    
    connect_str = "xxxxx"
    container_name = "test"
    blob_name = "sample.mp4"  
    
    blob_client = BlobServiceClient.from_connection_string(connect_str).get_blob_client(
        container=container_name, blob=blob_name
    )
    
    # Download video as stream
    yt_dlp_cmd = ["yt-dlp", "-f", "best", "-o", "-", video_url]
    
    block_list = [] 
    
    with subprocess.Popen(yt_dlp_cmd, stdout=subprocess.PIPE, stderr=subprocess.PIPE) as process:
        while True:
            chunk = process.stdout.read(4 * 1024 * 1024)  # Read 4MB chunks
            if not chunk:
                break  
    
            block_id = str(uuid.uuid4()).replace("-", "")  # Generate unique block ID
            block_list.append(block_id)
    
            blob_client.stage_block(block_id=block_id, data=chunk)  
    
    blob_client.commit_block_list(block_list)
    print(f"Upload complete! File stored as: {blob_name}")
    

    The abovecode streams the YouTube video directly from yt-dlp to Azure Blob Storage without saving it locally. It reads the video data in 4MB chunks from stdout, uploads each chunk as a block using stage_block, and then finalizes the upload by committing all blocks with commit_block_list

    Output:

    Upload complete! File stored as: sample.mp4
    

    enter image description here

    Portal: enter image description here

    Reference: yt-dlp/yt-dlp: A feature-rich command-line audio/video downloader