This question is a continuation of a previous question. I wrote the following piece of code to determine if File.openRead()
created a Stream that could be streamed line-by-line. It turns out that the answer is no. The entire file is read and then passed to the next transform. My question is then: How do you Stream a file line-by-line in Dart?
import 'dart:async';
import 'dart:convert';
import 'dart:io';
void main(List<String> arguments) {
Stream<List<int>> stream = new File('Data.txt').openRead();
stream
.transform(const Utf8InterceptDecoder())
.transform(const LineSplitterIntercept())
.listen((line) {
// stdout.writeln(line);
}).asFuture().catchError((_) => print(_));
}
int lineSplitCount = 0;
class LineSplitterIntercept extends LineSplitter {
const LineSplitterIntercept() : super();
// Never gets called
List<String> convert(String data) {
stdout.writeln("LineSplitterIntercept.convert : Data:" + data);
return super.convert(data);
}
StringConversionSink startChunkedConversion(ChunkedConversionSink<String> sink) {
stdout.writeln("LineSplitterIntercept.startChunkedConversion Count:"+lineSplitCount.toString()+ " Sink: " + sink.toString());
lineSplitCount++;
return super.startChunkedConversion(sink);
}
}
int utfCount = 0;
class Utf8InterceptDecoder extends Utf8Decoder {
const Utf8InterceptDecoder() : super();
//never gets called
String convert(List<int> codeUnits) {
stdout.writeln("Utf8InterceptDecoder.convert : codeUnits.length:" + codeUnits.length.toString());
return super.convert(codeUnits);
}
ByteConversionSink startChunkedConversion(ChunkedConversionSink<String> sink) {
stdout.writeln("Utf8InterceptDecoder.startChunkedConversion Count:"+ utfCount.toString() + " Sink: "+ sink.toString());
utfCount++;
return super.startChunkedConversion(sink);
}
}
The converter's startChunkedConversion
is only called once, when the transformation is started. However, the returned sink's add
method is invoked multiple times with parts of the file.
It's up to the source to decide how big the chunks are, but a 37MB file (as mentioned in your previous question) will definitely be sent in smaller chunks.
If you want to see the chunks you can either intercept startChunkedConversion
and return a wrapped sink, or you can put yourself between the openRead
and the transformer.
Intercept:
class InterceptSink {
static int lineSplitCount = 0;
final _sink;
InterceptSink(this._sink);
add(x) {
print("InterceptSink.add Count: $lineSplitCount");
lineSplitCount++;
_sink.add(x);
}
close() { _sink.close(); }
}
class LineSplitterIntercept extends Converter {
convert(x) { throw "unimplemented"; }
startChunkedConversion(outSink) {
var lineSink = new LineSplitter().startChunkedConversion(outSink);
return new InterceptSink(lineSink);
}
}
After openRead
:
file.openRead()
.transform(UTF8.decoder)
.map(x) {
print("chunk size: ${x.length)");
return x;
}
.transform(new LineSplitter())
...