multithreadingrustrayon

How to parallelize processing of Oxc's AST?


Currently I'm trying to process huge TypeScript files with oxc_parser in Rust:

let output: Vec<&oxc_allocator::Box<'_, oxc_ast::ast::TSModuleDeclaration<'_>>> =
    parsed_source
        .program
        .body
        .iter()
        .filter_map(|each| {
            // Complex algorithm here...
        })
        .collect::<Vec<_>>();

It's working, but 100% load on a single core is not good for this case. So, I tried to use rayon for load distribution between CPU cores with these changes:

use rayon::iter::IntoParallelRefIterator; // new line

// ...

let output: Vec<&oxc_allocator::Box<'_, oxc_ast::ast::TSModuleDeclaration<'_>>> =
    parsed_source
        .program
        .body
        .par_iter() // changed from .iter()
        .filter_map(|each| {
            // Complex algorithm here...
        })
        .collect::<Vec<_>>();

However, I'm stuck on this error:

error[E0599]: the method `par_iter` exists for struct `Vec<'_, Statement<'_>>`, but its trait bounds were not satisfied
   --> src/lib/parse.rs:27:5
    |
24  | /         parsed_source
25  | |             .program
26  | |             .body
27  | |             .par_iter()
    | |_____________-^^^^^^^^
    |
   ::: /home/athaariq/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/oxc_allocator-0.61.2/src/vec.rs:37:1
    |
37  |   pub struct Vec<'alloc, T>(InnerVec<'alloc, T>);
    |   ------------------------- doesn't satisfy `_: IntoParallelRefIterator<'_>`
    |
   ::: /home/athaariq/.cargo/registry/src/index.crates.io-1949cf8c6b5b557f/oxc_allocator-0.61.2/src/vec2/mod.rs:544:1
    |
544 |   pub struct Vec<'bump, T: 'bump> {
    |   ------------------------------- doesn't satisfy `_: IntoParallelRefIterator<'_>`
    |
    = note: the following trait bounds were not satisfied:
            `&oxc_allocator::Vec<'_, Statement<'_>>: IntoParallelIterator`
            which is required by `oxc_allocator::Vec<'_, Statement<'_>>: IntoParallelRefIterator<'_>`
            `&oxc_allocator::vec2::Vec<'_, Statement<'_>>: IntoParallelIterator`
            which is required by `oxc_allocator::vec2::Vec<'_, Statement<'_>>: IntoParallelRefIterator<'_>`
            `&[Statement<'_>]: IntoParallelIterator`
            which is required by `[Statement<'_>]: IntoParallelRefIterator<'_>`

I have no idea why error happens. Have I missed something? Or is there any extra steps to solve this?


Solution

  • As mentioned in the comments, Oxc has no multi-threading capability and there is nothing that I can do.

    The only solution is to use a different parser like swc_ecma_parser.

    use rayon::iter::{IntoParallelRefIterator, ParallelIterator};
    
    // ...
    
    let allocator: swc_common::sync::Lrc<swc_common::SourceMap> = Default::default();
    
    let file_path: &str = "my_huge_typescript_file.ts";
    let relative_path: &std::path::Path = std::path::Path::new(file_path_heap.as_str());
    
    let file_source_result: Result<std::sync::Arc<swc_common::SourceFile>, std::io::Error> =
        allocator.load_file(relative_path);
    if file_source_result.is_err() {
        return Err(file_source_result.unwrap_err().to_string());
    }
    
    let source: std::sync::Arc<swc_common::SourceFile> = file_source_result.unwrap();
    
    let lexer: swc_ecma_parser::lexer::Lexer<'_> = swc_ecma_parser::lexer::Lexer::new(
        swc_ecma_parser::Syntax::Typescript(Default::default()),
        Default::default(),
        swc_ecma_parser::StringInput::from(&*source),
        None,
    );
    
    let mut parser: swc_ecma_parser::Parser<swc_ecma_parser::lexer::Lexer<'_>> =
        swc_ecma_parser::Parser::new_from(lexer);
    
    let parser_errors: Vec<swc_ecma_parser::error::Error> = parser.take_errors();
    let parser_error_messages = parser_errors
        .par_iter() // it works!
        .map(|each| each.kind().msg().to_string())
        .collect::<Vec<_>>();
    
    // More code here...
    

    From here, I can see evenly distributed load among CPU cores. However, I won't recommend this for processing a small file as it's a bit overkill.