rustnom

Parse hex and decimal number


Just started to use the crate nom. Tried the simplest example:

use nom::{
    bytes::complete::tag,
    character::complete::{hex_digit1, space0},
    combinator::{map, opt},
    error::Error,
    sequence::{delimited, pair},
    IResult,
};

#[derive(Debug, PartialEq)]
pub enum Expression {
    Number(u16),
}

fn evaluate_number(prefix: Option<&str>, input: &str) -> Expression {
    println!("### evaluate_number input = {}", &input,);
    let num = match prefix {
        Some(_) => u16::from_str_radix(input, 16), // TODO: convert negative number => u16!!!
        None => u16::from_str_radix(input, 10),
    };
    Expression::Number(num.unwrap())
}

fn hex_digit_with_prefix(input: &str) -> IResult<&str, (Option<&str>, &str), Error<&str>> {
    pair(opt(tag("#")), hex_digit1)(input)
}

fn parse_number(input: &str) -> IResult<&str, Expression> {
    map(
        delimited(space0, hex_digit_with_prefix, space0),
        |(x, y)| evaluate_number(x, y),
    )(input)
}

// cargo watch -- cargo test expression -- --nocapture
#[cfg(test)]
mod tests {
    use super::*;

    #[test]
    fn test_parse_number() {
        let (_, exp) = parse_number(&"  #abc  ").unwrap();

        assert!(exp == Expression::Number(0xabc));

        let (_, exp) = parse_number(&"  987  ").unwrap();

        assert!(exp == Expression::Number(987));
    }
}

Additionally, if I include sign, things are getting more complicated:

fn parse_hex_digit(input: &str) -> IResult<&str, (Option<Option<&str>, &str>, &str), Error<&str>> {
    pair(pair(opt(tag("#")), opt(tag("#"))), hex_digit1)(input)
}

It looks like that approach given by example my be more suitable. Please, could you advice what direction will be in order to keep code simple and maintainable?


Solution

  • I think your example looks good. I can't say whether it is a lot more readable and maintainable but here is a slightly modified and probably slightly more straightforward syntaxis together with a sign. I was not sure why need Expression so I voluntarily chose integers. Hope it is ok and you can change to Expression if needed.

    use nom::{
        bytes::complete::tag,
        character::complete::{hex_digit1, space0},
        combinator::{map, opt},
        error::Error,
        sequence::{delimited, pair},
        IResult,
        branch::alt,
        multi::{many0, many1},
        combinator::{map_res, recognize},
        sequence::{preceded, terminated},
        character::complete::{char, one_of},
        sequence::tuple,
    };
    
    fn decimal(input: &str) -> IResult<&str, i64> {
        map_res(
            tuple(
                (
                    opt(char('-')),
                    recognize(many1(one_of("0123456789"))),
                )
            ),
            |(sign, digits): (Option<char>, &str)| {
                let sign = if sign.is_some() { -1 } else { 1 };
                digits.parse::<i64>().map(|n| sign * n)
            }
        )(input)
    }
    
    fn hexadecimal_value(input: &str) -> IResult<&str, i64> {
        map_res(
            tuple(
                (
                    opt(char('-')),
                    alt((tag("#"), tag("0x"), tag("0X"))),
                    recognize(many1(one_of("0123456789abcdefABCDEF"))),
                )),
            |(sign, _, digits)| {
                let sign = if sign.is_some() { -1 } else { 1 };
                i64::from_str_radix(digits, 16).map(|n| sign * n)
            }
        )(input)
    }
    
    fn parse_number(input: &str) -> IResult<&str, i64> {
        delimited(
            space0,
            alt((decimal, hexadecimal_value)),
            space0,
        )(input)
    }
    
    // cargo watch -- cargo test expression -- --nocapture
    #[cfg(test)]
    mod tests {
        use super::*;
    
        #[test]
        fn test_parse_number() {
            let (_, exp) = parse_number("  -#abc  ").unwrap();
    
            assert!(exp == -0xabc);
    
    
            let (_, exp) = parse_number("  -987  ").unwrap();
            assert!(exp == -987);
        }
    }
    

    More examples here https://github.com/Geal/nom/blob/main/doc/nom_recipes.md