phpregexyoutubetext-extractionurlparse

Parse video id and start time from differently formatted youtube URL strings


I needed to extract the video Id and the start time from any kind of youtube url that the users can input. I have a working solution but it is not right.

Questions:

UPDATE 2024/01/16: It has to work wit playlists too

I have checked this stackoverflow page to build my own youtube url parser.

This preg_match can extract the video Id and the start time but cannot handle the many different youtube url formats:

preg_match("/[a-zA-Z\/\/:\.]*youtu(?:be.com\/watch\?v=|.be\/)([a-zA-Z0-9\-_]+)(?:[&?\/]t=)?(\d*)(?:[a-zA-Z0-9\/\*\-\_\?\&\;\%\=\.]*)/i", $url, $matches);

This preg_match handles many different youtube urls (maybe all kind of?) but doesn't extract the start time:

preg_match("/^(?:http(?:s)?:\/\/)?(?:www\.)?(?:m\.)?(?:youtu\.be\/|youtube\.com\/(?:(?:watch)?\?(?:.*&)?v(?:i)?=|(?:embed|v|vi|user|shorts)\/))([^\?&\"'>]+)/", $url, $matches);

I have changed it and it works for me, but I know that my change is not right because I don't parse the end of the url properly:

preg_match("/^(?:http(?:s)?:\/\/)?(?:www\.)?(?:m\.)?(?:youtu\.be\/|youtube\.com\/(?:(?:watch)?\?(?:.*&)?v(?:i)?=|(?:embed|v|vi|user|shorts)\/))([^\?&\"'>]+)(?:[&?\/]t=)?(\d*)/", $url, $matches);

The code

<?php
declare(strict_types=1);

namespace AppBundle\Value;

class YoutubeVideoData
{
    private function __construct(public ?string $videoId = null, public ?int $time = null)
    {
    }

    public static function fromUrl(string $url): self
    {
        // `#action=share` is not supported
        preg_match("/^(?:http(?:s)?:\/\/)?(?:www\.)?(?:m\.)?(?:youtu\.be\/|youtube\.com\/(?:(?:watch)?\?(?:.*&)?v(?:i)?=|(?:embed|v|vi|user|shorts)\/))([^\?&\"'>]+)(?:[&?\/]t=)?(\d*)/", $url, $matches);

        $videoId = null;
        if (isset($matches[1])) {
            $videoId = $matches[1];
        }

        $time = null;
        if (isset($matches[2]) && $matches[2] !== "") {
            $time = (int) $matches[2];
        }

        return new self($videoId, $time);
    }

}

The tests:

<?php

namespace Justimmo\Tests\Value;

use AppBundle\Value\YoutubeVideoData;
use PHPUnit\Framework\Attributes\DataProvider;
use PHPUnit\Framework\TestCase;

/**
 * @covers \AppBundle\Value\YoutubeVideoData::class
 */
class YoutubeVideoDataTest extends TestCase
{
    #[DataProvider('urlProvider')]
    public function testUrls(string $url, ?string $expectedVideoId, ?int $expectedTime)
    {
        $videoData = YoutubeVideoData::fromUrl($url);

        $this->assertSame($expectedVideoId, $videoData->videoId);
        $this->assertSame($expectedTime, $videoData->time);
    }

    public static function urlProvider(): iterable
    {
        // vimeo
        yield 'vimeo' => ['https://vimeo.com/1016625668', null, null];

        // playlist
        yield 'youtube_link_pl1' => ['https://www.youtube.com/watch?v=YjdIF7PuUug&list=PLiIQbaWYR99iZFpLIJ5SImA2y8DaDTv9G&index=21', 'YjdIF7PuUug', null];
        yield 'youtube_link_pl2' => ['https://www.youtube.com/watch?list=PLiIQbaWYR99iZFpLIJ5SImA2y8DaDTv9G&v=YjdIF7PuUug&index=21', 'YjdIF7PuUug', null];
        yield 'youtube_link_pl3' => ['https://youtu.be/YjdIF7PuUug?list=PLiIQbaWYR99iZFpLIJ5SImA2y8DaDTv9G', 'YjdIF7PuUug', null];

        // without https://www
        yield 'youtube_link_1' => ['youtube.com/v/dE5jPNvLvOk', 'dE5jPNvLvOk', null];

        yield 'youtube_link_2' => ['youtube.com/v/dE5jPNvLvOk', 'dE5jPNvLvOk', null];
        yield 'youtube_link_3' => ['youtube.com/vi/dE5jPNvLvOk', 'dE5jPNvLvOk', null];

        yield 'youtube_link_4' => ['youtube.com/?v=dE5jPNvLvOk', 'dE5jPNvLvOk', null];
        yield 'youtube_link_5' => ['youtube.com/?vi=dE5jPNvLvOk', 'dE5jPNvLvOk', null];

        yield 'youtube_link_6' => ['youtube.com/watch?v=dE5jPNvLvOk', 'dE5jPNvLvOk', null];
        yield 'youtube_link_7' => ['youtube.com/watch?vi=dE5jPNvLvOk', 'dE5jPNvLvOk', null];

        yield 'youtube_link_8' => ['youtu.be/dE5jPNvLvOk', 'dE5jPNvLvOk', null];
        yield 'youtube_link_9' => ['youtube.com/embed/dE5jPNvLvOk', 'dE5jPNvLvOk', null];
        yield 'youtube_link_10' => ['youtube.com/shorts/dE5jPNvLvOk', 'dE5jPNvLvOk', null];
        yield 'youtube_link_11' => ['m.youtube.com/watch?v=dE5jPNvLvOk', 'dE5jPNvLvOk', null];

        // without https://
        yield 'youtube_link_12' => ['www.youtube.com/v/dE5jPNvLvOk', 'dE5jPNvLvOk', null];

        yield 'youtube_link_13' => ['www.youtube.com/v/dE5jPNvLvOk', 'dE5jPNvLvOk', null];
        yield 'youtube_link_14' => ['www.youtube.com/vi/dE5jPNvLvOk', 'dE5jPNvLvOk', null];

        yield 'youtube_link_15' => ['www.youtube.com/?v=dE5jPNvLvOk', 'dE5jPNvLvOk', null];
        yield 'youtube_link_16' => ['www.youtube.com/?vi=dE5jPNvLvOk', 'dE5jPNvLvOk', null];

        yield 'youtube_link_17' => ['www.youtube.com/watch?v=dE5jPNvLvOk', 'dE5jPNvLvOk', null];
        yield 'youtube_link_18' => ['www.youtube.com/watch?vi=dE5jPNvLvOk', 'dE5jPNvLvOk', null];

        yield 'youtube_link_19' => ['www.youtu.be/dE5jPNvLvOk', 'dE5jPNvLvOk', null];
        yield 'youtube_link_20' => ['www.youtube.com/embed/dE5jPNvLvOk', 'dE5jPNvLvOk', null];
        yield 'youtube_link_21' => ['www.youtube.com/shorts/dE5jPNvLvOk', 'dE5jPNvLvOk', null];

        // http
        yield 'youtube_link_22' => ['http://youtube.com/v/dE5jPNvLvOk', 'dE5jPNvLvOk', null];

        yield 'youtube_link_23' => ['http://youtube.com/v/dE5jPNvLvOk', 'dE5jPNvLvOk', null];
        yield 'youtube_link_24' => ['http://youtube.com/vi/dE5jPNvLvOk', 'dE5jPNvLvOk', null];

        yield 'youtube_link_25' => ['http://www.youtube.com/?v=dE5jPNvLvOk', 'dE5jPNvLvOk', null];
        yield 'youtube_link_26' => ['http://www.youtube.com/?vi=dE5jPNvLvOk', 'dE5jPNvLvOk', null];

        yield 'youtube_link_27' => ['http://www.youtube.com/watch?v=dE5jPNvLvOk', 'dE5jPNvLvOk', null];
        yield 'youtube_link_28' => ['http://www.youtube.com/watch?vi=dE5jPNvLvOk', 'dE5jPNvLvOk', null];

        yield 'youtube_link_29' => ['http://www.youtu.be/dE5jPNvLvOk', 'dE5jPNvLvOk', null];
        yield 'youtube_link_30' => ['http://youtube.com/embed/dE5jPNvLvOk', 'dE5jPNvLvOk', null];
        yield 'youtube_link_31' => ['http://www.youtube.com/shorts/dE5jPNvLvOk', 'dE5jPNvLvOk', null];
        yield 'youtube_link_32' => ['http://m.youtube.com/watch?v=dE5jPNvLvOk', 'dE5jPNvLvOk', null];

        // https
        yield 'youtube_link_33' => ['https://youtube.com/v/dE5jPNvLvOk', 'dE5jPNvLvOk', null];

        yield 'youtube_link_34' => ['https://youtube.com/v/dE5jPNvLvOk', 'dE5jPNvLvOk', null];
        yield 'youtube_link_35' => ['https://youtube.com/vi/dE5jPNvLvOk', 'dE5jPNvLvOk', null];

        yield 'youtube_link_36' => ['https://www.youtube.com/?v=dE5jPNvLvOk', 'dE5jPNvLvOk', null];
        yield 'youtube_link_37' => ['https://www.youtube.com/?vi=dE5jPNvLvOk', 'dE5jPNvLvOk', null];

        yield 'youtube_link_38' => ['https://www.youtube.com/watch?v=dE5jPNvLvOk', 'dE5jPNvLvOk', null];
        yield 'youtube_link_39' => ['https://www.youtube.com/watch?vi=dE5jPNvLvOk', 'dE5jPNvLvOk', null];

        yield 'youtube_link_40' => ['https://www.youtu.be/dE5jPNvLvOk', 'dE5jPNvLvOk', null];
        yield 'youtube_link_41' => ['https://youtube.com/embed/dE5jPNvLvOk', 'dE5jPNvLvOk', null];
        yield 'youtube_link_42' => ['https://www.youtube.com/shorts/dE5jPNvLvOk', 'dE5jPNvLvOk', null];
        yield 'youtube_link_43' => ['https://m.youtube.com/watch?v=dE5jPNvLvOk', 'dE5jPNvLvOk', null];

        // with start time
        yield 'youtube_link_44' => ['https://youtube.com/v/dE5jPNvLvOk?t=30', 'dE5jPNvLvOk', 30];

        yield 'youtube_link_45' => ['https://youtube.com/v/dE5jPNvLvOk?t=30', 'dE5jPNvLvOk', 30];
        yield 'youtube_link_46' => ['https://youtube.com/vi/dE5jPNvLvOk?t=30', 'dE5jPNvLvOk', 30];

        yield 'youtube_link_47' => ['https://www.youtube.com/?v=dE5jPNvLvOk&t=30', 'dE5jPNvLvOk', 30];
        yield 'youtube_link_48' => ['https://www.youtube.com/?vi=dE5jPNvLvOk&t=30', 'dE5jPNvLvOk', 30];

        yield 'youtube_link_49' => ['https://www.youtube.com/watch?v=dE5jPNvLvOk&t=30', 'dE5jPNvLvOk', 30];
        yield 'youtube_link_50' => ['https://www.youtube.com/watch?vi=dE5jPNvLvOk&t=30', 'dE5jPNvLvOk', 30];

        yield 'youtube_link_51' => ['https://www.youtu.be/dE5jPNvLvOk?t=30', 'dE5jPNvLvOk', 30];
        yield 'youtube_link_52' => ['https://youtube.com/embed/dE5jPNvLvOk?t=30', 'dE5jPNvLvOk', 30];
        yield 'youtube_link_53' => ['https://www.youtube.com/shorts/dE5jPNvLvOk?t=30', 'dE5jPNvLvOk', 30];
        yield 'youtube_link_54' => ['https://m.youtube.com/watch?v=dE5jPNvLvOk&t=30', 'dE5jPNvLvOk', 30];

        // with feature
        yield 'youtube_link_55' => ['https://www.youtube.com/watch?dev=inprogress&v=7HCZvhRAk-M&feature=related', '7HCZvhRAk-M', null];

        yield 'youtube_link_56' => ['https://youtube.com/v/dE5jPNvLvOk?feature=youtube_gdata_player', 'dE5jPNvLvOk', null];

        yield 'youtube_link_57' => ['https://youtube.com/v/dE5jPNvLvOk?feature=youtube_gdata_player', 'dE5jPNvLvOk', null];
        yield 'youtube_link_58' => ['https://youtube.com/vi/dE5jPNvLvOk?feature=youtube_gdata_player', 'dE5jPNvLvOk', null];

        yield 'youtube_link_59' => ['https://www.youtube.com/?v=dE5jPNvLvOk&feature=youtube_gdata_player', 'dE5jPNvLvOk', null];
        yield 'youtube_link_60' => ['https://www.youtube.com/?vi=dE5jPNvLvOk&feature=youtube_gdata_player', 'dE5jPNvLvOk', null];

        yield 'youtube_link_61' => ['https://www.youtube.com/watch?v=dE5jPNvLvOk&feature=youtube_gdata_player', 'dE5jPNvLvOk', null];
        yield 'youtube_link_62' => ['https://www.youtube.com/watch?vi=dE5jPNvLvOk&feature=youtube_gdata_player', 'dE5jPNvLvOk', null];

        yield 'youtube_link_63' => ['https://www.youtu.be/dE5jPNvLvOk?feature=youtube_gdata_player', 'dE5jPNvLvOk', null];
        yield 'youtube_link_64' => ['https://youtube.com/embed/dE5jPNvLvOk?feature=youtube_gdata_player', 'dE5jPNvLvOk', null];
        yield 'youtube_link_65' => ['https://www.youtube.com/shorts/dE5jPNvLvOk?feature=youtube_gdata_player', 'dE5jPNvLvOk', null];
        yield 'youtube_link_66' => ['https://m.youtube.com/watch?v=dE5jPNvLvOk&feature=youtube_gdata_player', 'dE5jPNvLvOk', null];

        // with #action=share
        yield 'youtube_link_67' => ['https://youtube.com/v/dE5jPNvLvOk#action=share ', 'dE5jPNvLvOk', null];

        yield 'youtube_link_68' => ['https://youtube.com/v/dE5jPNvLvOk#action=share ', 'dE5jPNvLvOk', null];
        yield 'youtube_link_69' => ['https://youtube.com/vi/dE5jPNvLvOk#action=share ', 'dE5jPNvLvOk', null];

        yield 'youtube_link_70' => ['https://www.youtube.com/?v=dE5jPNvLvOk#action=share ', 'dE5jPNvLvOk', null];
        yield 'youtube_link_71' => ['https://www.youtube.com/?vi=dE5jPNvLvOk#action=share ', 'dE5jPNvLvOk', null];

        yield 'youtube_link_72' => ['https://www.youtube.com/watch?v=dE5jPNvLvOk#action=share ', 'dE5jPNvLvOk', null];
        yield 'youtube_link_73' => ['https://www.youtube.com/watch?vi=dE5jPNvLvOk#action=share ', 'dE5jPNvLvOk', null];

        yield 'youtube_link_74' => ['https://www.youtu.be/dE5jPNvLvOk#action=share ', 'dE5jPNvLvOk', null];
        yield 'youtube_link_75' => ['https://youtube.com/embed/dE5jPNvLvOk#action=share ', 'dE5jPNvLvOk', null];
        yield 'youtube_link_76' => ['https://www.youtube.com/shorts/dE5jPNvLvOk#action=share ', 'dE5jPNvLvOk', null];
        yield 'youtube_link_77' => ['https://m.youtube.com/watch?v=dE5jPNvLvOk#action=share ', 'dE5jPNvLvOk', null];
    }

}

Working and PHPStan safe code - 2025/07/21

Based on mickmackusa and Rob Eyre's code.

<?php
declare(strict_types=1);

namespace AppBundle\Value;

class YoutubeVideoData
{
    private function __construct(public ?string $videoId = null, public ?int $time = null)
    {
    }

    public static function fromUrl(string $url): self
    {
        if (!str_starts_with($url, 'http')) {
            $url = 'https://' . $url;
        }

        $urlParts    = parse_url($url);
        $queryParams = [];

        if (!isset($urlParts['host'])) {
            return new self(null, null);
        }

        if (!in_array($urlParts['host'], ['www.youtube.com', 'youtube.com', 'www.youtu.be', 'youtu.be', 'm.youtube.com'])) {
            return new self(null, null);
        }

        if (isset($urlParts['query'])) {
            parse_str($urlParts['query'], $queryParams);
        }

        if (isset($queryParams['vi']) && is_string($queryParams['vi'])) {
            $videoId = $queryParams['vi'];

        } elseif (isset($queryParams['v']) && is_string($queryParams['v'])) {
            $videoId = $queryParams['v'];

        } elseif (isset($urlParts['path'])) {
            $videoId = basename($urlParts['path']);

        } else {
            $videoId = null;
        }

        $time = isset($queryParams['t']) ? (int) $queryParams['t'] : null;

        return new self($videoId, $time);
    }

}

Solution

  • My opinion is that your codebase will smell of "updoc" if you use regex or other string surgery tools to parse something that PHP already has a native parser for.

    Also, your task seems to be more about text extraction than text validation, so I'll assume that we are always working with legitimate youtube formatted strings.

    Parse the url, then parse the querystring if it exists. Then null coalesce while you attempt to extract the desired values from the generated arrays. This is going to save you LOOOOOOADS of headaches versus maintaining a cumbersome regex. Demo

    foreach (urlProvider() as $data) {
        unset($params);  // prevent previous iteration data bleeding into current iteration
        $components = parse_url($data[0]);
        if (isset($components['query'])) {
            parse_str($components['query'], $params);
        }
        var_export(
            [
               'id' => $params['vi'] ?? $params['v'] ?? basename($components['path'] ?? '') ?: null,
                't' => $params['t'] ?? null,
            ]
        );
    }
    

    This approach appears to work well for all of your provided test cases.

    Now that I am reviewing other answers, my answer seems to be a more solid version of Rob's answer (their answer doesn't sufficiently leverage null coalescing).