javascriptregexsrt

Javascript .replace and cleaning an .srt


I have a .srt file with text like this:

19
00:01:05,100 --> 00:01:08,820
countries such as Spain. Another factor to

20
00:01:08,820 --> 00:01:11,850
consider is the southern tip of Spain's coast

21
00:01:11,850 --> 00:01:15,060
being so close to northern Africa could have


I've found this code which works pretty well at cleaning the information but this code leaves in the initial numbers (these can be from one digit to four digits)

the result:

19countries such as Spain. Another factor to 20consider is the southern tip of Spain's coast 21being so close to northern Africa could have

Any idea how to remove the digits?

This is my code:

 <script>
            document.querySelector('#files').addEventListener('change', (e) => {
                
                let files = e.target.files,
                    i = 0,
                    reader = new FileReader;
            
                
                reader.onload = (e) => {
                    //console.log(files[i].name, e.target.result);
                    var fileName = files[i].name;
                    var text = e.target.result;

                    text = text.replace(/WEBVTT[\r\n]/,"");
                    text = text.replace(/NOTE duration:.*[\r\n]/,"");
                    text = text.replace(/NOTE language:.*[\r\n]/,"");
                    text = text.replace(/NOTE Confidence:.+\d/g,"");
                    text = text.replace(/NOTE recognizability.+\d/g,"");
                    text = text.replace(/[\r\n].+-.+-.+-.+-.+/g,"");
                    text = text.replace(/[\r\n].+ --> .+[\r\n]/g,"");
                    text = text.replace(/.[\r\n]. --> .+[\r\n]/g,"");
                    text = text.replace(/[\n](.)/g," $1");
                    text = text.replace(/[\r\n]+/g,"");
                    text = text.replace(/^ /,"");
                
                    var heading = document.createElement('h3');
                    document.body.appendChild(heading);
                    heading.innerHTML = "Transcript for '" + files[i].name + "'";
                
                    var copyButton = document.createElement('button');
                    document.body.appendChild(copyButton);
                    copyButton.onclick = function() {copyToClip(text,fileName); };
                    copyButton.innerHTML = "Copy transcript";
                    copyButton.className = "copyButton";
                
                    var div = document.createElement('div');
                    document.body.appendChild(div);
                    div.className = "cleanVTTText";
                    div.innerHTML = text;
            
                    //console.log(files[i].name, text);
                    console.log(files[i].name);
                    
                    
                    if (i++ < files.length - 1) {
                        reader.readAsText(files[i]);
                    } else {
                        console.log('done');
                        
                    }
                };
                
                reader.readAsText(files[i]);
            
            }, false);
            
            function copyToClip(str,fileName) {
                function listener(e) {
                e.clipboardData.setData("text/html", str);
                e.clipboardData.setData("text/plain", str);
                e.preventDefault();
                }
                document.addEventListener("copy", listener);
                document.execCommand("copy");
                document.removeEventListener("copy", listener);
                alert("Copied transcript to clipboard:\n'"+fileName+"'");
            };     
            </script>

Solution

  • For this problem adding this line of code worked:

    text = text.replace(/\n?\d*?\n?^.* --> [012345]{2}:.*$/mg ,"");