javascriptaudiosynchronizationsrt

How to sync timestamped animation frames to audio file?


I have the following code parsing a demo SRT file and printing out the text at the appropriate time, like this:

const { default: srtParser2 } = require('srt-parser-2')
const fs = require('fs')

const parser = new srtParser2()

const srtText = fs.readFileSync('ex.srt', 'utf-8')
const chunks = parser.fromSrt(srtText).map(simplify)

animate(chunks)

function animate(chunks) {
  chunks.forEach(({ startms, endms, text }) => {
    setTimeout(() => {
      console.clear()
      console.log(text)
    }, startms)
  })
}

function simplify({ startTime, endTime, text }) {
  let startms = toInterval(startTime)
  let endms = toInterval(endTime)
  return { startms, endms, text }
}

function toInterval(string) {
  const [a, ms] = string.split(',')
  const [h, m, s] = a.split(':')
  const H = parseInt(h, 10) * 1000 * 60 * 60
  const M = parseInt(m, 10) * 1000 * 60
  const S = parseInt(s, 10) * 1000
  const MS = parseInt(ms, 10)
  return H + M + S + MS
}

This works fine, when there is no associated audio. But how do I more properly associate this with an audio file in the browser?

Precompiling some of the stuff, essentially what I have now boils down to this (so you can run in the browser):

animate([
  {
    "startms": 50,
    "endms": 2000,
    "text": "- [Adam] Hello, my name is Adam Wilbert,"
  },
  {
    "startms": 2000,
    "endms": 3010,
    "text": "and I'd like to welcome you"
  },
  {
    "startms": 3010,
    "endms": 5040,
    "text": "to Learning Relational Databases."
  },
  {
    "startms": 5040,
    "endms": 7000,
    "text": "In this course, I'm going\nto give you an overview"
  },
  {
    "startms": 7000,
    "endms": 8090,
    "text": "of the planning steps that\nyou should move through"
  },
  {
    "startms": 8090,
    "endms": 11000,
    "text": "before you start development\nin order to ensure"
  },
  {
    "startms": 11000,
    "endms": 13020,
    "text": "that your system works as expected."
  },
  {
    "startms": 13020,
    "endms": 15000,
    "text": "I'll start with an\noverview of what exactly"
  },
  {
    "startms": 15000,
    "endms": 17090,
    "text": "a relational database is and\nhow its structure differs"
  },
  {
    "startms": 17090,
    "endms": 18070,
    "text": "from the spreadsheets"
  },
  {
    "startms": 18070,
    "endms": 20030,
    "text": "that you might be used to working with."
  },
  {
    "startms": 20030,
    "endms": 22030,
    "text": "And I'll outline some of\nthe hidden difficulties"
  },
  {
    "startms": 22030,
    "endms": 24010,
    "text": "that can arise if the\nstructure of your data"
  },
  {
    "startms": 24010,
    "endms": 27020,
    "text": "isn't fully considered\nbefore development begins."
  },
  {
    "startms": 27020,
    "endms": 30000,
    "text": "Then we'll discover the\ndatabase development lifecycle"
  },
  {
    "startms": 30000,
    "endms": 32050,
    "text": "and use it as a guide for\nmoving through the process"
  },
  {
    "startms": 32050,
    "endms": 35040,
    "text": "of thinking about our\nspecific data storage needs."
  },
  {
    "startms": 35040,
    "endms": 36090,
    "text": "Finally, we'll talk about all of the rules"
  },
  {
    "startms": 36090,
    "endms": 38060,
    "text": "that we've identified\nabout how the database"
  },
  {
    "startms": 38060,
    "endms": 40060,
    "text": "needs to function and\nstart translating them"
  },
  {
    "startms": 40060,
    "endms": 41090,
    "text": "into the components that will make up"
  },
  {
    "startms": 41090,
    "endms": 44020,
    "text": "the actual relational database."
  },
  {
    "startms": 44020,
    "endms": 46050,
    "text": "And along the way, we'll\ndiscuss design considerations"
  },
  {
    "startms": 46050,
    "endms": 48030,
    "text": "that'll make the database\neasier to construct"
  },
  {
    "startms": 48030,
    "endms": 50050,
    "text": "and easier to maintain."
  },
  {
    "startms": 50050,
    "endms": 52000,
    "text": "So I'd like to thank you for joining me"
  },
  {
    "startms": 52000,
    "endms": 53070,
    "text": "in learning relational databases."
  },
  {
    "startms": 53070,
    "endms": 55040,
    "text": "Now let's get started."
  }
]
)

function animate(chunks) {
  chunks.forEach(({ startms, endms, text }) => {
    setTimeout(() => {
      console.clear()
      console.log(text)
    }, startms)
  })
}

All I am doing is showing the text approximately when it is time to. But that is the problem, it is approximate. If there is any lag in the audio, then everything will get thrown off.

How can I make this animate the text properly when the corresponding audio plays? Say I just do:

var audio = new Audio('audio_file.mp3')
audio.play()

How do I sync the SRT with such an audio file?

It's not so straightforward that I can just hardcode or embed the SRT file directly into the audio file or show subtitles type of thing. I need to run custom animations on text and allow user interaction with the text while it is being spoken.


Solution

  • I just did this:

    <!doctype html>
    <html lang='en'>
      <head>
        <meta charset='utf-8'>
        <style>
          html, body {
            margin: 0;
            padding: 0;
            height: 800px;
            width: 800px;
          }
    
          @font-face {
            font-family: Tone;
            src: url('font.otf');
          }
    
          #content {
            display: flex;
            align-items: center;
            justify-content: center;
            height: 100%;
            background-color: #000;
          }
    
          #row {
            font-size: 92px;
            font-family: Tone;
            width: 600px;
            text-align: center;
          }
    
          #row * {
            text-align: center;
            position: relative;
            color: #9A7FAE;
          }
    
          #row .highlight {
            color: #BEE5B0;
          }
        </style>
      </head>
      <body>
        <div id='content'><div id='row'><span>`ԀŰ ĀӢŀÐА ÐàӢðԀ ŰԀ 0ѰВÐàԀ$ àԀ`Őϰŀ`Őђŀ`Ӡ ѰŰ ÐƐВŰӠ!</span></div></div>
        <script>
          const TIMED = [
      {
        "startms": 599,
        "endms": 2980,
        "text": "`ԀŰ ĀӢŀÐА ÐàӢðԀ ŰԀ 0ѰВÐàԀ$"
      },
      {
        "startms": 2980,
        "endms": 4848,
        "text": "àԀ`Őϰŀ`Őђŀ`Ӡ ѰŰ ÐƐВŰӠ!"
      },
      {
        "startms": 5563,
        "endms": 7932,
        "text": "Ѱŀ ŀВpϰŀ ŰϰӢİӠ pϲŰԀ ÐВààԀ"
      },
      {
        "startms": 7932,
        "endms": 12480,
        "text": "pђŀÐѰ `ВŰŰԀ ŀԀÐϲàԀ ѰŰ 0ӠÐВàА 0ԀƠԀŰŰВŰӠ!"
      },
      {
        "startms": 12480,
        "endms": 14974,
        "text": "PӠŀ ѰŰ ÐƐВŰӠ PӠѲ ðѰÐƐѲŀӠ$"
      },
      {
        "startms": 14974,
        "endms": 16152,
        "text": "0ϲӠѰ ðА`ВàА$"
      },
      {
        "startms": 16152,
        "endms": 17644,
        "text": "ÐӠPPԂàА$"
      },
      {
        "startms": 17644,
        "endms": 18658,
        "text": "pѐŀÐѲàА!"
      },
      {
        "startms": 19461,
        "endms": 23907,
        "text": "ŰԀ ÐϰԀ АŀѐƠ`ŐѲԀ ԂŰŰԀ 0ѰВÐàԀ Ѱŀ ÀàВÐÐԀ `ђðѰ ӠÀàѲàА!"
      },
      {
        "startms": 23907,
        "endms": 25125,
        "text": "ŰԀ ĀӠŀÐԂŀƀԀ"
      },
      {
        "startms": 25125,
        "endms": 27500,
        "text": "PӠŀpђàðԀ ŰԀ pϲԀ ÀӢàĀԀ$"
      },
      {
        "startms": 27500,
        "endms": 30118,
        "text": "PӢĀА ϲŀԀ 0ԂŰŰԀ pАİàВÐԀ!"
      },
      {
        "startms": 30448,
        "endms": 33103,
        "text": "ӠàԀ Ðϰ PӠŀÐàӢŰŰѰ ŰԀ 0ѰВÐàԀ$"
      },
      {
        "startms": 33103,
        "endms": 34930,
        "text": "àАԀŰѲАԀ$"
      },
      {
        "startms": 34930,
        "endms": 36543,
        "text": "ӠppВàðԀ!"
      },
      {
        "startms": 36543,
        "endms": 38522,
        "text": "0ӠàÐԂŀ`ӠÐѰ ŀАŰ ÐƐВŰӠ$"
      },
      {
        "startms": 38522,
        "endms": 40656,
        "text": "ðђàpӠ ŰԀ İàԀŀ`ВАԀ"
      },
      {
        "startms": 40656,
        "endms": 42565,
        "text": "ŰԀ pÐàԂ`Ԁ ВàÐԀ!"
      },
      {
        "startms": 42565,
        "endms": 45582,
        "text": "PӠpÐàϰѲƐѰ ÐƐѰӢ PА ðϲӠѰ$"
      },
      {
        "startms": 45582,
        "endms": 47607,
        "text": "PӠѲ PА ѰŰ ĀӢŀ`Ӡ ðВ`Ԁ!"
      },
      {
        "startms": 47607,
        "endms": 49113,
        "text": "Ӡ ŀВŰŰԀ ÐϲԀ ĀԂŀӠ$"
      },
      {
        "startms": 49113,
        "endms": 51704,
        "text": "0ѐƠ pѰPϰàВАԀ àѰpѰВ`Ԁ!"
      },
      {
        "startms": 52817,
        "endms": 54553,
        "text": "ŰԀ 0ѰВÐàԀ Ѱ ŰԀ PѰԂðА$"
      },
      {
        "startms": 54553,
        "endms": 56769,
        "text": "PА Ԃ0àА ӢİŀѰ PӢԀ!"
      }
    ]
    
    const TEXT = [
      '`ԀŰ ĀӢŀÐА ÐàӢðԀ ŰԀ 0ѰВÐàԀ$ àԀ`Őϰŀ`Őђŀ`Ӡ ѰŰ ÐƐВŰӠ!',
      'Ѱŀ ŀВpϰŀ ŰϰӢİӠ pϲŰԀ ÐВààԀ pђŀÐѰ `ВŰŰԀ ŀԀÐϲàԀ ѰŰ 0ӠÐВàА 0ԀƠԀŰŰВŰӠ!',
      'PӠŀ ѰŰ ÐƐВŰӠ PӠѲ ðѰÐƐѲŀӠ$ 0ϲӠѰ ðА`ВàА$ ÐӠPPԂàА$ pѐŀÐѲàА!',
      'ŰԀ ÐϰԀ АŀѐƠ`ŐѲԀ ԂŰŰԀ 0ѰВÐàԀ Ѱŀ ÀàВÐÐԀ `ђðѰ ӠÀàѲàА!',
      'ŰԀ ĀӠŀÐԂŀƀԀ PӠŀpђàðԀ ŰԀ pϲԀ ÀӢàĀԀ$ PӢĀА ϲŀԀ 0ԂŰŰԀ pАİàВÐԀ!',
      '',
      'ӠàԀ Ðϰ PӠŀÐàӢŰŰѰ ŰԀ 0ѰВÐàԀ$ àАԀŰѲАԀ$ ӠppВàðԀ!',
      '0ӠàÐԂŀ`ӠÐѰ ŀАŰ ÐƐВŰӠ$ ðђàpӠ ŰԀ İàԀŀ`ВАԀ ŰԀ pÐàԂ`Ԁ ВàÐԀ!',
      'PӠpÐàϰѲƐѰ ÐƐѰӢ PА ðϲӠѰ$ PӠѲ PА ѰŰ ĀӢŀ`Ӡ ðВ`Ԁ!',
      'Ӡ ŀВŰŰԀ ÐϲԀ ĀԂŀӠ$ 0ѐƠ pѰPϰàВАԀ àѰpѰВ`Ԁ!',
      'ŰԀ 0ѰВÐàԀ Ѱ ŰԀ PѰԂðА$ PА Ԃ0àА ӢİŀѰ PӢԀ!',
    ]
    
        window.addEventListener('click', start)
    
        function start() {
          const audio = new Audio('it.wav')
          audio.play()
          let start = Date.now()
          update()
    
          function update() {
            const now = Date.now()
            const elapsedTime = now - start
            const next = TIMED[0]
            if (next.startms <= elapsedTime) {
              TIMED.shift()
              show(next.text)
            }
            requestAnimationFrame(update)
          }
    
          function find(text) {
            for (let i = 0, n = TEXT.length; i < n; i++) {
              let line = TEXT[i]
              let index = line.indexOf(text)
              if (index > -1) {
                let left = line.substr(0, index)
                let center = line.substr(index, text.length)
                let right = line.substr(index + text.length)
                return { left, center, right, i }
              }
            }
            return {}
          }
    
          function show(text) {
            const { left, center, right, i } = find(text)
            if (!center) return
    
            const l = `<span>${left}</span>`
            const c = `<span class='highlight'>${center}</span>`
            const r = `<span>${right}</span>`
    
            let container = document.querySelector('#row')
            container.innerHTML = `${l}${c}${r}`
          }
        }
        </script>
      </body>
    </html>