Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf(parser): cache regex predicates with rc using router attribute #251

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
30 changes: 19 additions & 11 deletions src/ast.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::schema::Schema;
use cidr::IpCidr;
use regex::Regex;
use std::net::IpAddr;
use std::{net::IpAddr, rc::Rc};

#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -53,7 +53,7 @@ pub enum Value {
IpAddr(IpAddr),
Int(i64),
#[cfg_attr(feature = "serde", serde(with = "serde_regex"))]
Regex(Regex),
Regex(Rc<Regex>),
}

impl PartialEq for Value {
Expand Down Expand Up @@ -137,7 +137,7 @@ pub struct Predicate {
mod tests {
use super::*;
use crate::parser::parse;
use std::fmt;
use std::{collections::HashMap, fmt};

impl fmt::Display for Expression {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
Expand Down Expand Up @@ -240,6 +240,7 @@ mod tests {

#[test]
fn expr_op_and_prec() {
let mut regex_cache = HashMap::new();
let tests = vec![
("a > 0", "(a > 0)"),
("a in \"abc\"", "(a in \"abc\")"),
Expand Down Expand Up @@ -271,13 +272,14 @@ mod tests {
),
];
for (input, expected) in tests {
let result = parse(input).unwrap();
let result = parse(input, &mut regex_cache).unwrap();
assert_eq!(result.to_string(), expected);
}
}

#[test]
fn expr_var_name_and_ip() {
let mut regex_cache = HashMap::new();
let tests = vec![
// ipv4_literal
("kong.foo in 1.1.1.1", "(kong.foo in 1.1.1.1)"),
Expand All @@ -298,13 +300,14 @@ mod tests {
),
];
for (input, expected) in tests {
let result = parse(input).unwrap();
let result = parse(input, &mut regex_cache).unwrap();
assert_eq!(result.to_string(), expected);
}
}

#[test]
fn expr_regex() {
let mut regex_cache = HashMap::new();
let tests = vec![
// regex_literal
(
Expand All @@ -318,13 +321,14 @@ mod tests {
),
];
for (input, expected) in tests {
let result = parse(input).unwrap();
let result = parse(input, &mut regex_cache).unwrap();
assert_eq!(result.to_string(), expected);
}
}

#[test]
fn expr_digits() {
let mut regex_cache = HashMap::new();
let tests = vec![
// dec literal
("kong.foo.foo7 == 123", "(kong.foo.foo7 == 123)"),
Expand All @@ -340,13 +344,14 @@ mod tests {
("kong.foo.foo12 == -0123", "(kong.foo.foo12 == -83)"),
];
for (input, expected) in tests {
let result = parse(input).unwrap();
let result = parse(input, &mut regex_cache).unwrap();
assert_eq!(result.to_string(), expected);
}
}

#[test]
fn expr_transformations() {
let mut regex_cache = HashMap::new();
let tests = vec![
// lower
(
Expand All @@ -360,13 +365,14 @@ mod tests {
),
];
for (input, expected) in tests {
let result = parse(input).unwrap();
let result = parse(input, &mut regex_cache).unwrap();
assert_eq!(result.to_string(), expected);
}
}

#[test]
fn expr_transformations_nested() {
let mut regex_cache = HashMap::new();
let tests = vec![
// lower + lower
(
Expand All @@ -390,35 +396,37 @@ mod tests {
),
];
for (input, expected) in tests {
let result = parse(input).unwrap();
let result = parse(input, &mut regex_cache).unwrap();
assert_eq!(result.to_string(), expected);
}
}

#[test]
fn str_unicode_test() {
let mut regex_cache = HashMap::new();
let tests = vec![
// cjk chars
("t_msg in \"你好\"", "(t_msg in \"你好\")"),
// 0xXXX unicode
("t_msg in \"\u{4f60}\u{597d}\"", "(t_msg in \"你好\")"),
];
for (input, expected) in tests {
let result = parse(input).unwrap();
let result = parse(input, &mut regex_cache).unwrap();
assert_eq!(result.to_string(), expected);
}
}

#[test]
fn rawstr_test() {
let mut regex_cache = HashMap::new();
let tests = vec![
// invalid escape sequence
(r##"a == r#"/path/to/\d+"#"##, r#"(a == "/path/to/\d+")"#),
// valid escape sequence
(r##"a == r#"/path/to/\n+"#"##, r#"(a == "/path/to/\n+")"#),
];
for (input, expected) in tests {
let result = parse(input).unwrap();
let result = parse(input, &mut regex_cache).unwrap();
assert_eq!(result.to_string(), expected);
}
}
Expand Down
3 changes: 2 additions & 1 deletion src/cir.rs
Original file line number Diff line number Diff line change
Expand Up @@ -298,7 +298,8 @@ mod tests {
context.add_value("a", Value::Int(3 as i64));

for source in sources {
let ast = crate::parser::parse(source)
let mut regex_cache = HashMap::new();
let ast = crate::parser::parse(source, &mut regex_cache)
.map_err(|e| e.to_string())
.unwrap();
let mut mat = Match::new();
Expand Down
3 changes: 2 additions & 1 deletion src/ffi/expression.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,6 +3,7 @@ use crate::ffi::ERR_BUF_MAX_LEN;
use crate::schema::Schema;
use bitflags::bitflags;
use std::cmp::min;
use std::collections::HashMap;
use std::ffi;
use std::os::raw::c_char;
use std::slice::from_raw_parts_mut;
Expand Down Expand Up @@ -164,7 +165,7 @@ pub unsafe extern "C" fn expression_validate(
let errbuf = from_raw_parts_mut(errbuf, ERR_BUF_MAX_LEN);

// Parse the expression
let result = parse(atc).map_err(|e| e.to_string());
let result = parse(atc, &mut HashMap::new()).map_err(|e| e.to_string());
if let Err(e) = result {
let errlen = min(e.len(), *errbuf_len);
errbuf[..errlen].copy_from_slice(&e.as_bytes()[..errlen]);
Expand Down
90 changes: 59 additions & 31 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ use pest::pratt_parser::Assoc as AssocNew;
use pest::pratt_parser::{Op, PrattParser};
use pest::Parser;
use regex::Regex;
use std::collections::HashMap;
use std::net::{IpAddr, Ipv4Addr, Ipv6Addr};
use std::rc::Rc;

type ParseResult<T> = Result<T, ParseError<Rule>>;
/// cbindgen:ignore
Expand Down Expand Up @@ -61,12 +63,16 @@ impl ATCParser {
}
// matcher = { SOI ~ expression ~ EOI }
#[allow(clippy::result_large_err)] // it's fine as parsing is not the hot path
fn parse_matcher(&mut self, source: &str) -> ParseResult<Expression> {
fn parse_matcher(
&mut self,
source: &str,
regex_cache: &mut HashMap<String, Rc<Regex>>,
) -> ParseResult<Expression> {
let pairs = ATCParser::parse(Rule::matcher, source)?;
let expr_pair = pairs.peek().unwrap().into_inner().peek().unwrap();
let rule = expr_pair.as_rule();
match rule {
Rule::expression => parse_expression(expr_pair, &self.pratt_parser),
Rule::expression => parse_expression(expr_pair, &self.pratt_parser, regex_cache),
_ => unreachable!(),
}
}
Expand Down Expand Up @@ -204,7 +210,10 @@ fn parse_int_literal(pair: Pair<Rule>) -> ParseResult<i64> {

// predicate = { lhs ~ binary_operator ~ rhs }
#[allow(clippy::result_large_err)] // it's fine as parsing is not the hot path
fn parse_predicate(pair: Pair<Rule>) -> ParseResult<Predicate> {
fn parse_predicate(
pair: Pair<Rule>,
regex_cache: &mut HashMap<String, Rc<Regex>>,
) -> ParseResult<Predicate> {
let mut pairs = pair.into_inner();
let lhs = parse_lhs(pairs.next().unwrap())?;
let op = parse_binary_operator(pairs.next().unwrap());
Expand All @@ -214,23 +223,22 @@ fn parse_predicate(pair: Pair<Rule>) -> ParseResult<Predicate> {
lhs,
rhs: if op == BinaryOperator::Regex {
if let Value::String(s) = rhs {
let r = Regex::new(&s).map_err(|e| {
ParseError::new_from_span(
ErrorVariant::CustomError {
message: e.to_string(),
},
rhs_pair.as_span(),
)
})?;

Value::Regex(r)
let regex_rc = match regex_cache.get(&s) {
Some(stored_regex_rc) => stored_regex_rc.clone(),
None => {
let r = Regex::new(&s).into_parse_result(&rhs_pair)?;

let rc = Rc::new(r);

regex_cache.insert(s, rc.clone());
rc
}
};

Value::Regex(regex_rc)
} else {
return Err(ParseError::new_from_span(
ErrorVariant::CustomError {
message: "regex operator can only be used with String operands".to_string(),
},
rhs_pair.as_span(),
));
return Err("regex operator can only be used with String operands")
.into_parse_result(&rhs_pair);
}
} else {
rhs
Expand Down Expand Up @@ -289,39 +297,53 @@ fn parse_binary_operator(pair: Pair<Rule>) -> BinaryOperator {
fn parse_parenthesised_expression(
pair: Pair<Rule>,
pratt: &PrattParser<Rule>,
regex_cache: &mut HashMap<String, Rc<Regex>>,
) -> ParseResult<Expression> {
let mut pairs = pair.into_inner();
let pair = pairs.next().unwrap();
let rule = pair.as_rule();
match rule {
Rule::expression => parse_expression(pair, pratt),
Rule::expression => parse_expression(pair, pratt, regex_cache),
Rule::not_op => Ok(Expression::Logical(Box::new(LogicalExpression::Not(
parse_expression(pairs.next().unwrap(), pratt)?,
parse_expression(pairs.next().unwrap(), pratt, regex_cache)?,
)))),
_ => unreachable!(),
}
}

// term = { predicate | parenthesised_expression }
#[allow(clippy::result_large_err)] // it's fine as parsing is not the hot path
fn parse_term(pair: Pair<Rule>, pratt: &PrattParser<Rule>) -> ParseResult<Expression> {
fn parse_term(
pair: Pair<Rule>,
pratt: &PrattParser<Rule>,
regex_cache: &mut HashMap<String, Rc<Regex>>,
) -> ParseResult<Expression> {
let pairs = pair.into_inner();
let inner_rule = pairs.peek().unwrap();
let rule = inner_rule.as_rule();
match rule {
Rule::predicate => Ok(Expression::Predicate(parse_predicate(inner_rule)?)),
Rule::parenthesised_expression => parse_parenthesised_expression(inner_rule, pratt),
Rule::predicate => Ok(Expression::Predicate(parse_predicate(
inner_rule,
regex_cache,
)?)),
Rule::parenthesised_expression => {
parse_parenthesised_expression(inner_rule, pratt, regex_cache)
}
_ => unreachable!(),
}
}

// expression = { term ~ ( logical_operator ~ term )* }
#[allow(clippy::result_large_err)] // it's fine as parsing is not the hot path
fn parse_expression(pair: Pair<Rule>, pratt: &PrattParser<Rule>) -> ParseResult<Expression> {
fn parse_expression(
pair: Pair<Rule>,
pratt: &PrattParser<Rule>,
regex_cache: &mut HashMap<String, Rc<Regex>>,
) -> ParseResult<Expression> {
let pairs = pair.into_inner();
pratt
.map_primary(|operand| match operand.as_rule() {
Rule::term => parse_term(operand, pratt),
Rule::term => parse_term(operand, pratt, regex_cache),
_ => unreachable!(),
})
.map_infix(|lhs, op, rhs| {
Expand All @@ -335,8 +357,11 @@ fn parse_expression(pair: Pair<Rule>, pratt: &PrattParser<Rule>) -> ParseResult<
}

#[allow(clippy::result_large_err)] // it's fine as parsing is not the hot path
pub fn parse(source: &str) -> ParseResult<Expression> {
ATCParser::new().parse_matcher(source)
pub fn parse(
source: &str,
regex_cache: &mut HashMap<String, Rc<Regex>>,
) -> ParseResult<Expression> {
ATCParser::new().parse_matcher(source, regex_cache)
}

#[cfg(test)]
Expand All @@ -345,16 +370,19 @@ mod tests {

#[test]
fn test_bad_syntax() {
let mut regex_cache = HashMap::new();
assert_eq!(
parse("! a == 1").unwrap_err().to_string(),
parse("! a == 1", &mut regex_cache).unwrap_err().to_string(),
" --> 1:1\n |\n1 | ! a == 1\n | ^---\n |\n = expected term"
);
assert_eq!(
parse("a == 1 || ! b == 2").unwrap_err().to_string(),
parse("a == 1 || ! b == 2", &mut regex_cache)
.unwrap_err()
.to_string(),
" --> 1:11\n |\n1 | a == 1 || ! b == 2\n | ^---\n |\n = expected term"
);
assert_eq!(
parse("(a == 1 || b == 2) && ! c == 3")
parse("(a == 1 || b == 2) && ! c == 3", &mut regex_cache)
.unwrap_err()
.to_string(),
" --> 1:23\n |\n1 | (a == 1 || b == 2) && ! c == 3\n | ^---\n |\n = expected term"
Expand Down
Loading
Loading