ocaml

How to efficiently read a line of integers in ocaml


I'd like to efficiently read a large line (~4000 characters) from stdin. I'll have to read ~4000 lines as well.

The line is formatted as follows:

INTEGERwhitespaceINTEGERwhitespace.... For example, 100 33 22 19 485 3601...

Afterwards, the data needs to be processed, so the initial solution I used with read_line() |> String.split_on_char ' ' |> ... was too slow O(3n).

I want to use something like Scanf:

bscanf ic "%d" int_of_string

But I'm not sure how to account for the whitespaces, or if it's fast enough. Are there any solutions for this?


Solution

  • I created a file with 10000 lines of 4000 random integers.

    I then wrote these 4 main functions (read_int is an auxiliary one) that have the same output:

    let read_int ic =
      let rec aux acc =
        match input_char ic with
        | ' ' | '\n' -> acc
        | c -> aux ((10 * acc) + (Char.code c - 48))
      in
      aux 0
    
    let read_test_int () =
      let ic = open_in "test" in
      let max = ref 0 in
      try
        while true do
          read_int ic |> fun e -> if e > !max then max := e
        done
      with End_of_file ->
        close_in ic;
        Format.eprintf "%d@." !max
    
    let read_test_line () =
      let ic = open_in "test" in
      let max = ref 0 in
      try
        while true do
          input_line ic |> String.split_on_char ' '
          |> List.iter (fun e ->
                 let e = int_of_string e in
                 if e > !max then max := e)
        done
      with End_of_file ->
        close_in ic;
        Format.eprintf "%d@." !max
    
    let read_test_line_map () =
      let ic = open_in "test" in
      let max = ref 0 in
      try
        while true do
          input_line ic |> String.split_on_char ' ' |> List.map int_of_string
          |> List.iter (fun e -> if e > !max then max := e)
        done
      with End_of_file ->
        close_in ic;
        Format.eprintf "%d@." !max
    
    let read_test_scanf () =
      let ic = Scanf.Scanning.open_in "test" in
      let max = ref 0 in
      try
        while true do
          Scanf.bscanf ic "%d " (fun i -> i) |> fun e -> if e > !max then max := e
        done
      with End_of_file ->
        Scanf.Scanning.close_in ic;
        Format.eprintf "%d@." !max
    
    1. read_test_int creates an integer by reading characters one by one
    2. read_test_line is your initial solution
    3. read_test_line_map is your initial solution with a mapping from string to int
    4. read_test_scanf is the solution you'd like to test

    I then tested the four of them with hyperfine and here are the outputs:

    hyperfine --warmup 3 -P arg 1 4 'dune exec program -- {arg}'

    1. read_int
    Benchmark #1: dune exec program -- 1
      Time (mean ± σ):      1.509 s ±  0.072 s    [User: 1.460 s, System: 0.049 s]
      Range (min … max):    1.436 s …  1.618 s    10 runs
    
    1. read_line
    Benchmark #2: dune exec program -- 2
      Time (mean ± σ):      1.818 s ±  0.016 s    [User: 1.717 s, System: 0.100 s]
      Range (min … max):    1.794 s …  1.853 s    10 runs
    
    1. read_line_map
    Benchmark #4: dune exec program -- 4
      Time (mean ± σ):      2.158 s ±  0.127 s    [User: 2.108 s, System: 0.050 s]
      Range (min … max):    2.054 s …  2.482 s    10 runs
    
    1. read_scanf
    Benchmark #3: dune exec program -- 3
      Time (mean ± σ):      5.017 s ±  0.103 s    [User: 4.957 s, System: 0.060 s]
      Range (min … max):    4.893 s …  5.199 s    10 runs
    

    It looks like my own implementation of read_int is the better one and input_line is just slightly worse since you first create a string then go through it once to split it then go through the list to read the integers. scanf is sadly always the worst. The difference starts to be visible with these kind of values (10000 lines, 4000 integers), for 4000 lines of 4000 characters I couldn't find any real difference.

    Hyperline gives the following summary:

    Summary
      'dune exec program -- 1' ran
        1.20 ± 0.06 times faster than 'dune exec program -- 2'
        1.43 ± 0.11 times faster than 'dune exec program -- 4'
        3.33 ± 0.17 times faster than 'dune exec program -- 3'
    

    [EDIT]

    I created two new benchs using OCamllex:

    1. lexer.mll
    let digit = ['0'-'9']
    
    rule integers = parse
      | ' ' | '\n'        { integers lexbuf }
      | digit+ as inum { int_of_string inum }
      | _           { failwith "not a digit or a space" }
      | eof         { raise End_of_file }
    

    and

    1. lexer_list.mll
    { let l = ref [] }
    
    let digit = ['0'-'9']
    
    rule integers = parse
      | ' ' | '\n'        { integers lexbuf }
      | digit+ as inum { l := int_of_string inum :: !l; integers lexbuf }
      | _           { failwith "not a digit or a space" }
      | eof         { !l }
    

    Rerunning the benchmarks here are the results:

    ❯ hyperfine --warmup 3 -P arg 1 6 'dune exec program -- {arg}'
    Benchmark #1: dune exec program -- 1
      Time (mean ± σ):      1.394 s ±  0.044 s    [User: 1.358 s, System: 0.036 s]
      Range (min … max):    1.360 s …  1.483 s    10 runs
    
    Benchmark #2: dune exec program -- 2
      Time (mean ± σ):      1.674 s ±  0.011 s    [User: 1.590 s, System: 0.084 s]
      Range (min … max):    1.657 s …  1.692 s    10 runs
    
    Benchmark #3: dune exec program -- 3
      Time (mean ± σ):      4.886 s ±  0.304 s    [User: 4.847 s, System: 0.037 s]
      Range (min … max):    4.627 s …  5.460 s    10 runs
    
    Benchmark #4: dune exec program -- 4
      Time (mean ± σ):      1.949 s ±  0.023 s    [User: 1.908 s, System: 0.041 s]
      Range (min … max):    1.925 s …  1.984 s    10 runs
    
    Benchmark #5: dune exec program -- 5
      Time (mean ± σ):      2.824 s ±  0.013 s    [User: 2.784 s, System: 0.039 s]
      Range (min … max):    2.798 s …  2.843 s    10 runs
    
    Benchmark #6: dune exec program -- 6
      Time (mean ± σ):      5.832 s ±  0.074 s    [User: 5.493 s, System: 0.333 s]
      Range (min … max):    5.742 s …  5.981 s    10 runs
    
    Summary
      'dune exec program -- 1' ran
        1.20 ± 0.04 times faster than 'dune exec program -- 2'
        1.40 ± 0.05 times faster than 'dune exec program -- 4'
        2.03 ± 0.07 times faster than 'dune exec program -- 5'
        3.51 ± 0.24 times faster than 'dune exec program -- 3'
        4.18 ± 0.14 times faster than 'dune exec program -- 6'
    

    Creating a list before iterating over it is the worst possible solution (even worse than scanf, imagine!) but lexing is not that bad (but not that good either)

    So, to summarise, the solutions from best to worst are:

    1. custom read int
    2. read line
    3. lexing int by int
    4. read line with a mapping
    5. scanf
    6. lexing the whole file to a list of int

    [Benching with memtrace]

    This made me realise something, by the way, in case you ever read this:

    ❯ hyperfine --warmup 3 -P arg 1 6 'dune exec program -- {arg}'
    Benchmark #1: dune exec program -- 1
      Time (mean ± σ):      7.003 s ±  0.201 s    [User: 6.959 s, System: 0.043 s]
      Range (min … max):    6.833 s …  7.420 s    10 runs
    
    Benchmark #2: dune exec program -- 2
      Time (mean ± σ):      1.801 s ±  0.060 s    [User: 1.697 s, System: 0.104 s]
      Range (min … max):    1.729 s …  1.883 s    10 runs
    
    Benchmark #3: dune exec program -- 3
      Time (mean ± σ):      4.817 s ±  0.120 s    [User: 4.757 s, System: 0.058 s]
      Range (min … max):    4.679 s …  5.068 s    10 runs
    
    Benchmark #4: dune exec program -- 4
      Time (mean ± σ):      2.028 s ±  0.023 s    [User: 1.994 s, System: 0.032 s]
      Range (min … max):    1.993 s …  2.071 s    10 runs
    
    Benchmark #5: dune exec program -- 5
      Time (mean ± σ):      2.997 s ±  0.108 s    [User: 2.948 s, System: 0.046 s]
      Range (min … max):    2.889 s …  3.191 s    10 runs
    
    Benchmark #6: dune exec program -- 6
      Time (mean ± σ):      6.109 s ±  0.161 s    [User: 5.753 s, System: 0.349 s]
      Range (min … max):    5.859 s …  6.322 s    10 runs
    
    Summary
      'dune exec program -- 2' ran
        1.13 ± 0.04 times faster than 'dune exec program -- 4'
        1.66 ± 0.08 times faster than 'dune exec program -- 5'
        2.67 ± 0.11 times faster than 'dune exec program -- 3'
        3.39 ± 0.14 times faster than 'dune exec program -- 6'
        3.89 ± 0.17 times faster than 'dune exec program -- 1'
    

    My understanding is that memtrace is able to do a lot of work on my custom solution since the whole code is directly available whereas for the rest it can just scratch the surface (I may be completely wrong but it took me some time to figure out that memtrace was spoiling my benchmarks)


    [Following @ivg's comment]

    1. lexer_parser.mll
    {
       open Parser
    }
    
    let digit = ['0'-'9']
    
    rule integers = parse
      | ' ' | '\n'        { integers lexbuf }
      | digit+ as inum    { INT (int_of_string inum) }
      | _                 { failwith "not a digit or a space" }
      | eof               { raise End_of_file }
    

    and parser.mly

    %token <int> INT
    %start main             /* the entry point */
    %type <int> main
    %%
    main:
    | INT { $1 }
    ;
    

    and in main.ml

    let read_test_lexer_parser () =
      let ic = open_in "test" in
      let lexbuf = Lexing.from_channel ic in
      let max = ref 0 in
      try
        while true do
          let result = Parser.main Lexer_parser.integers lexbuf in
          if result > !max then max := result
        done
      with End_of_file ->
        close_in ic;
        Format.eprintf "%d@." !max
    

    (I cut some benchs)

    ❯ hyperfine --warmup 3 -P arg 1 7 'dune exec program -- {arg}'
    Benchmark #1: dune exec program -- 1
      Time (mean ± σ):      1.357 s ±  0.030 s    [User: 1.316 s, System: 0.041 s]
      Range (min … max):    1.333 s …  1.431 s    10 runs
    
    Benchmark #6: dune exec program -- 6
      Time (mean ± σ):      5.745 s ±  0.289 s    [User: 5.230 s, System: 0.513 s]
      Range (min … max):    5.549 s …  6.374 s    10 runs
    
    Benchmark #7: dune exec program -- 7
      Time (mean ± σ):      7.195 s ±  0.049 s    [User: 7.161 s, System: 0.034 s]
      Range (min … max):    7.148 s …  7.300 s    10 runs
    
    Summary
      'dune exec program -- 1' ran
        4.23 ± 0.23 times faster than 'dune exec program -- 6'
        5.30 ± 0.12 times faster than 'dune exec program -- 7'
    

    I may have not done it properly hence the poor result but this doesn't seem promising. The way I'm doing it is that I want to get the value as soon as it's read to handle it otherwise I'll have to create a list of values and this will be even worse (believe me, I tried, it took 30 seconds to find the max value).

    My dune file, in case you're wondering, looks like this (I have an empty program.opam file to please dune):

    (executable
     (name main)
     (public_name program)
    )
    
    (ocamllex lexer)
    (ocamllex lexer_list)
    (ocamllex lexer_parser)
    (ocamlyacc parser)