We are trying to get all videos in a channel, like this. This list has 291k videos, we figured out the channel id of this channel (and replaced the second alphabet "C" in the id by "U"), and trying this code, iterating over 50 videos at a time. We are getting only upto some 20k videos, not more than that. Any idea on how to fix this and get all 291k videos in this channel? Checked this for a variety of channels with large number of videos, all have the same problem.
api_key = "my Google YouTube API V3 key"
from googleapiclient.discovery import build
youtube = build('youtube', 'v3', developerKey=api_key)
def get_channel_videos():
videos = []
next_page_token = None
while 1:
res = youtube.playlistItems().list(playlistId="UU...",
part='snippet',
maxResults=50,
pageToken=next_page_token).execute()
videos += res['items']
next_page_token = res.get('nextPageToken')
if next_page_token is None:
break
return videos
videos = get_channel_videos()
with open("video.txt", "a") as myfile:
for video in videos:
myfile.write(f"{video['snippet']['resourceId']['videoId']} => {video['snippet']['title']}\n")
print(f"Total video count => {len(videos)}")
I investigated many different approaches and the only one which seems to perfectly work is the following one based on web-scraping the Videos
tab of the specified channel:
import requests
from lxml import html
import json
CHANNEL_HANDLE = '@MLB'
text = requests.get(f'https://www.youtube.com/{CHANNEL_HANDLE}/videos').text
tree = html.fromstring(text)
ytVariableName = 'ytInitialData'
ytVariableDeclaration = ytVariableName + ' = '
for script in tree.xpath('//script'):
scriptContent = script.text_content()
if ytVariableDeclaration in scriptContent:
ytVariableData = json.loads(scriptContent.split(ytVariableDeclaration)[1][:-1])
break
contents = ytVariableData['contents']['twoColumnBrowseResultsRenderer']['tabs'][1]['tabRenderer']['content']['richGridRenderer']['contents']
videoIds = set()
def treatContents(contents):
for content in contents:
if not 'richItemRenderer' in content:
break
videoId = content['richItemRenderer']['content']['videoRenderer']['videoId']
videoIds.add(videoId)
print(len(videoIds))
return getContinuationToken(contents)
def getContinuationToken(contents):
# Sometimes have 29 actual results instead of 30.
lastContent = contents[-1]
if not 'continuationItemRenderer' in lastContent:
return None
return lastContent['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token']
continuationToken = treatContents(contents)
if continuationToken is not None:
url = 'https://www.youtube.com/youtubei/v1/browse'
headers = {
'Content-Type': 'application/json'
}
requestData = {
'context': {
'client': {
'clientName': 'WEB',
'clientVersion': '2.20240313.05.00'
}
}
}
while True:
requestData['continuation'] = continuationToken
data = requests.post(url, headers = headers, json = requestData).json()
# Happens not deterministically sometimes.
if not 'onResponseReceivedActions' in data:
print('Retrying')
continue
continuationItems = data['onResponseReceivedActions'][0]['appendContinuationItemsAction']['continuationItems']
continuationToken = treatContents(continuationItems)
if continuationToken is None:
break
While @MLB About
claims 291,597 videos
, my method finds 289,814
unique videos. It is unknown where the count difference comes from, possibly from Live
s and unlisted videos.