Skip to content

Commit

Permalink
perf(*): cache regex predicates with rc using router attribute
Browse files Browse the repository at this point in the history
KAG-3182
  • Loading branch information
nowNick committed Sep 19, 2024
1 parent 3f8f648 commit 2e13e8f
Show file tree
Hide file tree
Showing 4 changed files with 98 additions and 43 deletions.
30 changes: 19 additions & 11 deletions src/ast.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::schema::Schema;
use cidr::IpCidr;
use regex::Regex;
use std::net::IpAddr;
use std::{net::IpAddr, rc::Rc};

#[cfg(feature = "serde")]
use serde::{Deserialize, Serialize};
Expand Down Expand Up @@ -53,7 +53,7 @@ pub enum Value {
IpAddr(IpAddr),
Int(i64),
#[cfg_attr(feature = "serde", serde(with = "serde_regex"))]
Regex(Regex),
Regex(Rc<Regex>),
}

impl PartialEq for Value {
Expand Down Expand Up @@ -137,7 +137,7 @@ pub struct Predicate {
mod tests {
use super::*;
use crate::parser::parse;
use std::fmt;
use std::{collections::HashMap, fmt};

impl fmt::Display for Expression {
fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
Expand Down Expand Up @@ -240,6 +240,7 @@ mod tests {

#[test]
fn expr_op_and_prec() {
let mut regex_cache = HashMap::new();
let tests = vec![
("a > 0", "(a > 0)"),
("a in \"abc\"", "(a in \"abc\")"),
Expand Down Expand Up @@ -271,13 +272,14 @@ mod tests {
),
];
for (input, expected) in tests {
let result = parse(input).unwrap();
let result = parse(input, &mut regex_cache).unwrap();
assert_eq!(result.to_string(), expected);
}
}

#[test]
fn expr_var_name_and_ip() {
let mut regex_cache = HashMap::new();
let tests = vec![
// ipv4_literal
("kong.foo in 1.1.1.1", "(kong.foo in 1.1.1.1)"),
Expand All @@ -298,13 +300,14 @@ mod tests {
),
];
for (input, expected) in tests {
let result = parse(input).unwrap();
let result = parse(input, &mut regex_cache).unwrap();
assert_eq!(result.to_string(), expected);
}
}

#[test]
fn expr_regex() {
let mut regex_cache = HashMap::new();
let tests = vec![
// regex_literal
(
Expand All @@ -318,13 +321,14 @@ mod tests {
),
];
for (input, expected) in tests {
let result = parse(input).unwrap();
let result = parse(input, &mut regex_cache).unwrap();
assert_eq!(result.to_string(), expected);
}
}

#[test]
fn expr_digits() {
let mut regex_cache = HashMap::new();
let tests = vec![
// dec literal
("kong.foo.foo7 == 123", "(kong.foo.foo7 == 123)"),
Expand All @@ -340,13 +344,14 @@ mod tests {
("kong.foo.foo12 == -0123", "(kong.foo.foo12 == -83)"),
];
for (input, expected) in tests {
let result = parse(input).unwrap();
let result = parse(input, &mut regex_cache).unwrap();
assert_eq!(result.to_string(), expected);
}
}

#[test]
fn expr_transformations() {
let mut regex_cache = HashMap::new();
let tests = vec![
// lower
(
Expand All @@ -360,13 +365,14 @@ mod tests {
),
];
for (input, expected) in tests {
let result = parse(input).unwrap();
let result = parse(input, &mut regex_cache).unwrap();
assert_eq!(result.to_string(), expected);
}
}

#[test]
fn expr_transformations_nested() {
let mut regex_cache = HashMap::new();
let tests = vec![
// lower + lower
(
Expand All @@ -390,35 +396,37 @@ mod tests {
),
];
for (input, expected) in tests {
let result = parse(input).unwrap();
let result = parse(input, &mut regex_cache).unwrap();
assert_eq!(result.to_string(), expected);
}
}

#[test]
fn str_unicode_test() {
let mut regex_cache = HashMap::new();
let tests = vec![
// cjk chars
("t_msg in \"你好\"", "(t_msg in \"你好\")"),
// 0xXXX unicode
("t_msg in \"\u{4f60}\u{597d}\"", "(t_msg in \"你好\")"),
];
for (input, expected) in tests {
let result = parse(input).unwrap();
let result = parse(input, &mut regex_cache).unwrap();
assert_eq!(result.to_string(), expected);
}
}

#[test]
fn rawstr_test() {
let mut regex_cache = HashMap::new();
let tests = vec![
// invalid escape sequence
(r##"a == r#"/path/to/\d+"#"##, r#"(a == "/path/to/\d+")"#),
// valid escape sequence
(r##"a == r#"/path/to/\n+"#"##, r#"(a == "/path/to/\n+")"#),
];
for (input, expected) in tests {
let result = parse(input).unwrap();
let result = parse(input, &mut regex_cache).unwrap();
assert_eq!(result.to_string(), expected);
}
}
Expand Down
87 changes: 63 additions & 24 deletions src/parser.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,9 @@ use pest::pratt_parser::Assoc as AssocNew;
use pest::pratt_parser::{Op, PrattParser};
use pest::Parser;
use regex::Regex;
use std::collections::HashMap;
use std::net::{IpAddr, Ipv4Addr, Ipv6Addr};
use std::rc::Rc;

type ParseResult<T> = Result<T, ParseError<Rule>>;
/// cbindgen:ignore
Expand Down Expand Up @@ -59,12 +61,16 @@ impl ATCParser {
}
}
// matcher = { SOI ~ expression ~ EOI }
fn parse_matcher(&mut self, source: &str) -> ParseResult<Expression> {
fn parse_matcher(
&mut self,
source: &str,
regex_cache: &mut HashMap<String, Rc<Regex>>,
) -> ParseResult<Expression> {

Check warning on line 68 in src/parser.rs

View workflow job for this annotation

GitHub Actions / Rust Clippy Report

the `Err`-variant returned from this function is very large

warning: the `Err`-variant returned from this function is very large --> src/parser.rs:68:10 | 68 | ) -> ParseResult<Expression> { | ^^^^^^^^^^^^^^^^^^^^^^^ the `Err`-variant is at least 264 bytes | = help: try reducing the size of `parser::pest::error::Error<parser::Rule>`, for example by boxing large elements or replacing it with `Box<parser::pest::error::Error<parser::Rule>>` = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#result_large_err
let pairs = ATCParser::parse(Rule::matcher, source)?;
let expr_pair = pairs.peek().unwrap().into_inner().peek().unwrap();
let rule = expr_pair.as_rule();
match rule {
Rule::expression => parse_expression(expr_pair, &self.pratt_parser),
Rule::expression => parse_expression(expr_pair, &self.pratt_parser, regex_cache),
_ => unreachable!(),
}
}
Expand Down Expand Up @@ -185,7 +191,10 @@ fn parse_int_literal(pair: Pair<Rule>) -> ParseResult<i64> {
}

// predicate = { lhs ~ binary_operator ~ rhs }
fn parse_predicate(pair: Pair<Rule>) -> ParseResult<Predicate> {
fn parse_predicate(
pair: Pair<Rule>,
regex_cache: &mut HashMap<String, Rc<Regex>>,
) -> ParseResult<Predicate> {

Check warning on line 197 in src/parser.rs

View workflow job for this annotation

GitHub Actions / Rust Clippy Report

the `Err`-variant returned from this function is very large

warning: the `Err`-variant returned from this function is very large --> src/parser.rs:197:6 | 197 | ) -> ParseResult<Predicate> { | ^^^^^^^^^^^^^^^^^^^^^^ the `Err`-variant is at least 264 bytes | = help: try reducing the size of `parser::pest::error::Error<parser::Rule>`, for example by boxing large elements or replacing it with `Box<parser::pest::error::Error<parser::Rule>>` = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#result_large_err
let mut pairs = pair.into_inner();
let lhs = parse_lhs(pairs.next().unwrap())?;
let op = parse_binary_operator(pairs.next().unwrap());
Expand All @@ -195,16 +204,26 @@ fn parse_predicate(pair: Pair<Rule>) -> ParseResult<Predicate> {
lhs,
rhs: if op == BinaryOperator::Regex {
if let Value::String(s) = rhs {
let r = Regex::new(&s).map_err(|e| {
ParseError::new_from_span(
ErrorVariant::CustomError {
message: e.to_string(),
},
rhs_pair.as_span(),
)
})?;
let regex_rc = match regex_cache.get(&s) {
Some(stored_regex_rc) => stored_regex_rc.clone(),
_ => {
let r = Regex::new(&s).map_err(|e| {
ParseError::new_from_span(
ErrorVariant::CustomError {
message: e.to_string(),
},
rhs_pair.as_span(),
)
})?;

let rc = Rc::new(r);

Value::Regex(r)
regex_cache.insert(s, rc.clone());
rc
}
};

Value::Regex(regex_rc)
} else {
return Err(ParseError::new_from_span(
ErrorVariant::CustomError {
Expand Down Expand Up @@ -267,37 +286,51 @@ fn parse_binary_operator(pair: Pair<Rule>) -> BinaryOperator {
fn parse_parenthesised_expression(
pair: Pair<Rule>,
pratt: &PrattParser<Rule>,
regex_cache: &mut HashMap<String, Rc<Regex>>,
) -> ParseResult<Expression> {

Check warning on line 290 in src/parser.rs

View workflow job for this annotation

GitHub Actions / Rust Clippy Report

the `Err`-variant returned from this function is very large

warning: the `Err`-variant returned from this function is very large --> src/parser.rs:290:6 | 290 | ) -> ParseResult<Expression> { | ^^^^^^^^^^^^^^^^^^^^^^^ the `Err`-variant is at least 264 bytes | = help: try reducing the size of `parser::pest::error::Error<parser::Rule>`, for example by boxing large elements or replacing it with `Box<parser::pest::error::Error<parser::Rule>>` = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#result_large_err
let mut pairs = pair.into_inner();
let pair = pairs.next().unwrap();
let rule = pair.as_rule();
match rule {
Rule::expression => parse_expression(pair, pratt),
Rule::expression => parse_expression(pair, pratt, regex_cache),
Rule::not_op => Ok(Expression::Logical(Box::new(LogicalExpression::Not(
parse_expression(pairs.next().unwrap(), pratt)?,
parse_expression(pairs.next().unwrap(), pratt, regex_cache)?,
)))),
_ => unreachable!(),
}
}

// term = { predicate | parenthesised_expression }
fn parse_term(pair: Pair<Rule>, pratt: &PrattParser<Rule>) -> ParseResult<Expression> {
fn parse_term(
pair: Pair<Rule>,
pratt: &PrattParser<Rule>,
regex_cache: &mut HashMap<String, Rc<Regex>>,
) -> ParseResult<Expression> {

Check warning on line 308 in src/parser.rs

View workflow job for this annotation

GitHub Actions / Rust Clippy Report

the `Err`-variant returned from this function is very large

warning: the `Err`-variant returned from this function is very large --> src/parser.rs:308:6 | 308 | ) -> ParseResult<Expression> { | ^^^^^^^^^^^^^^^^^^^^^^^ the `Err`-variant is at least 264 bytes | = help: try reducing the size of `parser::pest::error::Error<parser::Rule>`, for example by boxing large elements or replacing it with `Box<parser::pest::error::Error<parser::Rule>>` = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#result_large_err
let pairs = pair.into_inner();
let inner_rule = pairs.peek().unwrap();
let rule = inner_rule.as_rule();
match rule {
Rule::predicate => Ok(Expression::Predicate(parse_predicate(inner_rule)?)),
Rule::parenthesised_expression => parse_parenthesised_expression(inner_rule, pratt),
Rule::predicate => Ok(Expression::Predicate(parse_predicate(
inner_rule,
regex_cache,
)?)),
Rule::parenthesised_expression => {
parse_parenthesised_expression(inner_rule, pratt, regex_cache)
}
_ => unreachable!(),
}
}

// expression = { term ~ ( logical_operator ~ term )* }
fn parse_expression(pair: Pair<Rule>, pratt: &PrattParser<Rule>) -> ParseResult<Expression> {
fn parse_expression(
pair: Pair<Rule>,
pratt: &PrattParser<Rule>,
regex_cache: &mut HashMap<String, Rc<Regex>>,
) -> ParseResult<Expression> {

Check warning on line 329 in src/parser.rs

View workflow job for this annotation

GitHub Actions / Rust Clippy Report

the `Err`-variant returned from this function is very large

warning: the `Err`-variant returned from this function is very large --> src/parser.rs:329:6 | 329 | ) -> ParseResult<Expression> { | ^^^^^^^^^^^^^^^^^^^^^^^ the `Err`-variant is at least 264 bytes | = help: try reducing the size of `parser::pest::error::Error<parser::Rule>`, for example by boxing large elements or replacing it with `Box<parser::pest::error::Error<parser::Rule>>` = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#result_large_err
let pairs = pair.into_inner();
pratt
.map_primary(|operand| match operand.as_rule() {
Rule::term => parse_term(operand, pratt),
Rule::term => parse_term(operand, pratt, regex_cache),
_ => unreachable!(),
})
.map_infix(|lhs, op, rhs| {
Expand All @@ -310,8 +343,11 @@ fn parse_expression(pair: Pair<Rule>, pratt: &PrattParser<Rule>) -> ParseResult<
.parse(pairs)
}

pub fn parse(source: &str) -> ParseResult<Expression> {
ATCParser::new().parse_matcher(source)
pub fn parse(
source: &str,
regex_cache: &mut HashMap<String, Rc<Regex>>,
) -> ParseResult<Expression> {

Check warning on line 349 in src/parser.rs

View workflow job for this annotation

GitHub Actions / Rust Clippy Report

the `Err`-variant returned from this function is very large

warning: the `Err`-variant returned from this function is very large --> src/parser.rs:349:6 | 349 | ) -> ParseResult<Expression> { | ^^^^^^^^^^^^^^^^^^^^^^^ the `Err`-variant is at least 264 bytes | = help: try reducing the size of `parser::pest::error::Error<parser::Rule>`, for example by boxing large elements or replacing it with `Box<parser::pest::error::Error<parser::Rule>>` = help: for further information visit https://rust-lang.github.io/rust-clippy/master/index.html#result_large_err
ATCParser::new().parse_matcher(source, regex_cache)
}

#[cfg(test)]
Expand All @@ -320,16 +356,19 @@ mod tests {

#[test]
fn test_bad_syntax() {
let mut regex_cache = HashMap::new();
assert_eq!(
parse("! a == 1").unwrap_err().to_string(),
parse("! a == 1", &mut regex_cache).unwrap_err().to_string(),
" --> 1:1\n |\n1 | ! a == 1\n | ^---\n |\n = expected term"
);
assert_eq!(
parse("a == 1 || ! b == 2").unwrap_err().to_string(),
parse("a == 1 || ! b == 2", &mut regex_cache)
.unwrap_err()
.to_string(),
" --> 1:11\n |\n1 | a == 1 || ! b == 2\n | ^---\n |\n = expected term"
);
assert_eq!(
parse("(a == 1 || b == 2) && ! c == 3")
parse("(a == 1 || b == 2) && ! c == 3", &mut regex_cache)
.unwrap_err()
.to_string(),
" --> 1:23\n |\n1 | (a == 1 || b == 2) && ! c == 3\n | ^---\n |\n = expected term"
Expand Down
6 changes: 5 additions & 1 deletion src/router.rs
Original file line number Diff line number Diff line change
Expand Up @@ -4,7 +4,9 @@ use crate::interpreter::Execute;
use crate::parser::parse;
use crate::schema::Schema;
use crate::semantics::{FieldCounter, Validate};
use regex::Regex;
use std::collections::{BTreeMap, HashMap};
use std::rc::Rc;
use uuid::Uuid;

#[derive(PartialEq, Eq, PartialOrd, Ord)]
Expand All @@ -14,6 +16,7 @@ pub struct Router<'a> {
schema: &'a Schema,
matchers: BTreeMap<MatcherKey, Expression>,
pub fields: HashMap<String, usize>,
regex_cache: HashMap<String, Rc<Regex>>,
}

impl<'a> Router<'a> {
Expand All @@ -22,6 +25,7 @@ impl<'a> Router<'a> {
schema,
matchers: BTreeMap::new(),
fields: HashMap::new(),
regex_cache: HashMap::new(),
}
}

Expand All @@ -32,7 +36,7 @@ impl<'a> Router<'a> {
return Err("UUID already exists".to_string());
}

let ast = parse(atc).map_err(|e| e.to_string())?;
let ast = parse(atc, &mut self.regex_cache).map_err(|e| e.to_string())?;

ast.validate(self.schema)?;
ast.add_to_counter(&mut self.fields);
Expand Down
Loading

0 comments on commit 2e13e8f

Please sign in to comment.