extractchessdata-extractioninformation-extractionlichess

How do convert/parse/extract data from a PGN into a spreadsheet/google sheet/excel file?


Sequel to this question: Live statistics chess960 from chess.com?

So suppose I go to like

https://api.chess.com/pub/player/gmwso/games/2020/12

or

https://api.chess.com/pub/player/gmwso/games/2020/12/pgn

there's gonna be a bunch of stuff like say

[UTCDate "2018.01.03"]
[WhiteElo "2706"]
[BlackElo "2940"]

How do I get this data into a spreadsheet like column 1 is all the dates, column 2 is the corresponding white elo, column 3 black elo, col4 white username and col5 black username?


Update 2: Fixed now. see the 'json' vs the 'preformed'. WOW.

Update 1: It appears Mike Steelson has an answer here, where the code is given as

=arrayformula( regexextract(split( substitute(substitute(substitute(getDataJSON(A1;"/games";"/pgn");"[";"");"]";"");"""";"") ;char(10));"\s.*") )

with an example given here

https://docs.google.com/spreadsheets/d/1MX1o5qdy0K3gTMzbimUV3SmFf-0XPCSJ8Vz4IjI-8Ak/copy

It appears there's a problem when it gets to the case of chess960 only. Consider for example this player: Replacing 'gmwso' with the player's username will yield a weird output. i imagine the output will be messier for mixed chess960 and chess.


Solution

  • From lichess.org, assuming the pgn file has been loaded into your drive (if I catch info directly from lichess.org, google alerts 'Exceeded maximum execution time')

    // Mike Steelson
    
    function onOpen() {
      var ui = SpreadsheetApp.getUi();
      ui.createMenu('↓ lichess.org ↓')
        .addItem('parse PGN file from Drive', 'readPGNFromDrive')
        .addToUi();
    }
    
    function readPGNFromDrive() {
    
      var sh = SpreadsheetApp.getActiveSpreadsheet().getSheetByName('from drive')
      var id = sh.getRange('B1').getValue()
      var list = ['Event', 'Site', 'Date', 'White', 'Black', 'Result', 'UTCDate', 'UTCTime', 'WhiteElo', 'BlackElo', 'WhiteRatingDiff', 'BlackRatingDiff', 'Variant', 'TimeControl', 'ECO', 'Termination', 'FEN', 'SetUp', 'sequence']
    
      sh.deleteRows(2,sh.getLastRow()+2)
      var result=[]
      result.push(list)
      var data = DriveApp.getFileById(id).getBlob().getDataAsString().split(String.fromCharCode(10,10,10))
      for (var i=0; i<data.length; i++) {
        var prov=[]
        var parts = data[i].split(String.fromCharCode(10,10))
        var pparts = parts[0].split(String.fromCharCode(10))
        for (var x=0; x<pparts.length; x++){
          pparts[x]=pparts[x].replace('[','"').replace(']','').replace(' "','":"') // "key":"value"
        }
        var donnees = JSON.parse('{'+pparts.join(',')+'}') // mise au format json
        list.forEach(function(path){
          if (path == 'sequence'){prov.push(parts[1])}
          else {prov.push(donnees.item(path))}
        })
        result.push(prov)
      }
      sh.getRange(2,1,result.length,result[0].length).setValues(result)
    }
    
    Object.prototype.item=function(i){return this[i]};
    

    https://docs.google.com/spreadsheets/d/1ZX-YefSfeNx0z6mo7Bo3EY_tOkYcVLT7Ke0fqQUiHNI/copy