Combine (revisited)

I wanted to revisit combine, while the original works, I was able to create an implementation that is actually faster than nom's. Doing so required quite a bit of additional work in understanding how the "zero-copy" tools in combine work. In truth I still haven't been able to figure out how the type annotations need to be set for this implementation to work, instead each parser is assigned to a variable inside of a function body. With the following implementation the combine parser, the whole set of benchmarks are:

  • Learning Curve gets steeper
  • Performance gets better
crate parse 1 (+/-) parse 1000 (+/-) build time bin size
combine 893.00ns (37.00ns) 1.17s (73.91ms) 25.41s 723.99 kb
nom 1.33ms (69.00ns) 833.53ms (37.55ms) 10.66s 727.08 kb
pest 3.66ms (342.00ns) 3.89s (279.92ms) 31.31s 767.86 kb
hand_rolled 694.00ns (93.00ns) 551.86ms (77.66ms) 10.81s 718.87 kb

# #![allow(unused_variables)]
#fn main() {
extern crate duration;
use duration::{Duration, DurationPart};
extern crate combine;
use combine::{
    optional,
    Parser,
    range::recognize,
    parser::{
        item::item,
        byte::digit,
        repeat::{
            skip_many,
            skip_many1
        },
        repeat::skip_count,
    },
    error::UnexpectedParse,
};
pub fn parse(s: &str) -> Result<Duration, String> {
    let value = || {
        recognize((
            skip_many1(digit()),
            optional((
                item(b'.'),
                skip_many(digit())
            )),
        ))
        .and_then(|bs: &[u8]| {
            let s = ::std::str::from_utf8(bs).map_err(|_| UnexpectedParse::Unexpected)?;
            s.parse::<f32>().map_err(|_| UnexpectedParse::Unexpected)
        })
    };
    let pair = |time: bool| {
        (
            value(),
            combine::parser::item::any()
        ).and_then(move |(v, c): (f32, u8)| {
            let part = match c {
                b'Y' => DurationPart::Years(v),
                b'M' => if time {
                    DurationPart::Minutes(v)
                } else {
                    DurationPart::Months(v)
                },
                b'W' => DurationPart::Weeks(v),
                b'D' => DurationPart::Days(v),
                b'H' => DurationPart::Hours(v),
                b'S' => DurationPart::Seconds(v),
                _ => return Err(UnexpectedParse::Unexpected)
            };
            Ok(part)
        })
    };
    let date_part = combine::count(4, pair(false)).map(|p: Vec<DurationPart>| p);
    let time_part = skip_count(1, item(b'T')).and(combine::count(3, pair(true))).map(|(_, p): (_, Vec<DurationPart>)| p);
    let mut duration = skip_count(1, item(b'P')).and(date_part).and(time_part).map(|((_, d), t): ((_, std::vec::Vec<DurationPart>), std::vec::Vec<DurationPart>)| (d, t));
    let ((date_parts, time_parts), rem): ((Vec<DurationPart>, Vec<DurationPart>), &[u8]) = duration.parse(s.as_bytes()).map_err(|e| format!("{}", e))?;
    if rem.len() > 0 {
        return Err(format!("did not parse full string provided {}", String::from_utf8_lossy(rem)));
    }
    let mut ret = Duration::new();
    for part in date_parts.iter().chain(time_parts.iter()) {
        match part {
            DurationPart::Years(v) => ret.set_years(*v),
            DurationPart::Months(v) => ret.set_months(*v),
            DurationPart::Weeks(v) => ret.set_weeks(*v),
            DurationPart::Days(v) => ret.set_days(*v),
            DurationPart::Hours(v) => ret.set_hours(*v),
            DurationPart::Minutes(v) => ret.set_minutes(*v),
            DurationPart::Seconds(v) => ret.set_seconds(*v),
        }
    }
    Ok(ret)
}
#}