Friday, 31 December 2021

Subtitles

 I was watching a French movie today and realised that the subtitle was coming up to early - about 1 second too early. It's a bit annoying. So I decided to fix it with a simple Javascript - it runs under Node.js.


//srt_update.js

function main() {
    // syntax: node srt_update.js filename
    // therefore the first argument is the 3rd word in the command line.
    const args = process.argv.slice(2);

    var filename = args[0];
    if (filename == null) {
        console.log("please specify file name.");
        return;
    }

    const readLine = require('readline');
    const f = require('fs');

    var rl = readLine.createInterface({
        input : f.createReadStream(filename),
        output : process.stdout,
        terminal: false
    });
    //const re=/(?<order>\d+)\n(?<start>[\d:,]+)\s+-{2}\>\s+(?<end>[\d:,]+)\n(?<text>[\s\S]*?(?=\n{2}|$))/gi;
    //const re=/(?<start>[\d:,]+)\s+-{2}\>\s+(?<end>[\d:,]+)/gi;
    const re=/(?<shour>[\d]+)[:](?<sminute>[\d]+)[:](?<ssecond>[\d]+)[,](?<smilli>[\d]+)\s+-{2}\>\s+(?<ehour>[\d]+)[:](?<eminute>[\d]+)[:](?<esecond>[\d]+)[,](?<emilli>[\d]+)/gi;
    rl.on('line', function (line) {
        var elements=re.exec(line);
        if (elements!=null) {
            var ssecond = parseInt( elements.groups.ssecond);
            if (ssecond!=59)
                ssecond=String(ssecond+1).padStart(2,'0');
            var esecond = parseInt(elements.groups.esecond);
            if (esecond!=59)
                esecond=String(esecond+1).padStart(2,'0');
            //console.log(`${line} => ${elements.groups.shour}-${elements.groups.sminute}-${ssecond}-${elements.groups.smilli}, ${elements.groups.ehour}-${elements.groups.eminute}-${esecond}-${elements.groups.emilli}`);
            console.log(`${elements.groups.shour}:${elements.groups.sminute}:${ssecond},${elements.groups.smilli} --> ${elements.groups.ehour}:${elements.groups.eminute}:${esecond},${elements.groups.emilli}`);
        } else
            console.log(line);
    });
}

main();
To run it in a command shell:

$ time node srt_update.js input.srt > output.srt

real    0m1.321s
user    0m0.701s
sys     0m0.062s

An alternative version by not using regular expression:

//srt_update2.js

function main() {
    // syntax: node srt_update.js filename
    // therefore the first argument is the 3rd word in the command line.
    const args = process.argv.slice(2);

    var filename = args[0];
    if (filename == null) {
        console.log("please specify file name.");
        return;
    }

    const readLine = require('readline');
    const f = require('fs');

    var rl = readLine.createInterface({
        input : f.createReadStream(filename),
        output : process.stdout,
        terminal: false
    });
   
    rl.on('line', function (line) {
        var elements = line.split(" --> ", 2);
        if (elements.length==2) {    
            var ssecond=parseInt(elements[0].substring(6, 8));
            var esecond=parseInt(elements[1].substring(6, 8));
            if (ssecond!=59)
                ssecond=String(ssecond+1).padStart(2,'0');
            if (esecond!=59)
                esecond=String(esecond+1).padStart(2,'0');

            console.log(`${elements[0].substring(0,6)}${ssecond}${elements[0].substring(8)} --> ${elements[1].substring(0,6)}${esecond}${elements[1].substring(8)}`);
        } else
            console.log(line);
    });
}

main();

Running result:


$ time node srt_update2.js input.srt > output.srt

real    0m1.304s
user    0m0.701s
sys     0m0.049s

The performance of the two versions seem to be very similar.

No comments: