Kong · nowNick · Sep 12, 2024
diff --git a/src/ast.rs b/src/ast.rs
@@ -1,7 +1,7 @@
 use crate::schema::Schema;
 use cidr::IpCidr;
 use regex::Regex;
-use std::net::IpAddr;
+use std::{net::IpAddr, rc::Rc};
 
 #[cfg(feature = "serde")]
 use serde::{Deserialize, Serialize};
@@ -53,7 +53,7 @@ pub enum Value {
     IpAddr(IpAddr),
     Int(i64),
     #[cfg_attr(feature = "serde", serde(with = "serde_regex"))]
-    Regex(Regex),
+    Regex(Rc<Regex>),
 }
 
 impl PartialEq for Value {
@@ -137,7 +137,7 @@ pub struct Predicate {
 mod tests {
     use super::*;
     use crate::parser::parse;
-    use std::fmt;
+    use std::{collections::HashMap, fmt};
 
     impl fmt::Display for Expression {
         fn fmt(&self, f: &mut fmt::Formatter) -> fmt::Result {
@@ -240,6 +240,7 @@ mod tests {
 
     #[test]
     fn expr_op_and_prec() {
+        let mut regex_cache = HashMap::new();
         let tests = vec![
             ("a > 0", "(a > 0)"),
             ("a in \"abc\"", "(a in \"abc\")"),
@@ -271,13 +272,14 @@ mod tests {
             ),
         ];
         for (input, expected) in tests {
-            let result = parse(input).unwrap();
+            let result = parse(input, &mut regex_cache).unwrap();
             assert_eq!(result.to_string(), expected);
         }
     }
 
     #[test]
     fn expr_var_name_and_ip() {
+        let mut regex_cache = HashMap::new();
         let tests = vec![
             // ipv4_literal
             ("kong.foo in 1.1.1.1", "(kong.foo in 1.1.1.1)"),
@@ -298,13 +300,14 @@ mod tests {
             ),
         ];
         for (input, expected) in tests {
-            let result = parse(input).unwrap();
+            let result = parse(input, &mut regex_cache).unwrap();
             assert_eq!(result.to_string(), expected);
         }
     }
 
     #[test]
     fn expr_regex() {
+        let mut regex_cache = HashMap::new();
         let tests = vec![
             // regex_literal
             (
@@ -318,13 +321,14 @@ mod tests {
             ),
         ];
         for (input, expected) in tests {
-            let result = parse(input).unwrap();
+            let result = parse(input, &mut regex_cache).unwrap();
             assert_eq!(result.to_string(), expected);
         }
     }
 
     #[test]
     fn expr_digits() {
+        let mut regex_cache = HashMap::new();
         let tests = vec![
             // dec literal
             ("kong.foo.foo7 == 123", "(kong.foo.foo7 == 123)"),
@@ -340,13 +344,14 @@ mod tests {
             ("kong.foo.foo12 == -0123", "(kong.foo.foo12 == -83)"),
         ];
         for (input, expected) in tests {
-            let result = parse(input).unwrap();
+            let result = parse(input, &mut regex_cache).unwrap();
             assert_eq!(result.to_string(), expected);
         }
     }
 
     #[test]
     fn expr_transformations() {
+        let mut regex_cache = HashMap::new();
         let tests = vec![
             // lower
             (
@@ -360,13 +365,14 @@ mod tests {
             ),
         ];
         for (input, expected) in tests {
-            let result = parse(input).unwrap();
+            let result = parse(input, &mut regex_cache).unwrap();
             assert_eq!(result.to_string(), expected);
         }
     }
 
     #[test]
     fn expr_transformations_nested() {
+        let mut regex_cache = HashMap::new();
         let tests = vec![
             // lower + lower
             (
@@ -390,35 +396,37 @@ mod tests {
             ),
         ];
         for (input, expected) in tests {
-            let result = parse(input).unwrap();
+            let result = parse(input, &mut regex_cache).unwrap();
             assert_eq!(result.to_string(), expected);
         }
     }
 
     #[test]
     fn str_unicode_test() {
+        let mut regex_cache = HashMap::new();
         let tests = vec![
             // cjk chars
             ("t_msg in \"你好\"", "(t_msg in \"你好\")"),
             // 0xXXX unicode
             ("t_msg in \"\u{4f60}\u{597d}\"", "(t_msg in \"你好\")"),
         ];
         for (input, expected) in tests {
-            let result = parse(input).unwrap();
+            let result = parse(input, &mut regex_cache).unwrap();
             assert_eq!(result.to_string(), expected);
         }
     }
 
     #[test]
     fn rawstr_test() {
+        let mut regex_cache = HashMap::new();
         let tests = vec![
             // invalid escape sequence
             (r##"a == r#"/path/to/\d+"#"##, r#"(a == "/path/to/\d+")"#),
             // valid escape sequence
             (r##"a == r#"/path/to/\n+"#"##, r#"(a == "/path/to/\n+")"#),
         ];
         for (input, expected) in tests {
-            let result = parse(input).unwrap();
+            let result = parse(input, &mut regex_cache).unwrap();
             assert_eq!(result.to_string(), expected);
         }
     }

diff --git a/src/cir.rs b/src/cir.rs
@@ -298,7 +298,8 @@ mod tests {
         context.add_value("a", Value::Int(3 as i64));
 
         for source in sources {
-            let ast = crate::parser::parse(source)
+            let mut regex_cache = HashMap::new();
+            let ast = crate::parser::parse(source, &mut regex_cache)
                 .map_err(|e| e.to_string())
                 .unwrap();
             let mut mat = Match::new();

diff --git a/src/ffi/expression.rs b/src/ffi/expression.rs
@@ -3,6 +3,7 @@ use crate::ffi::ERR_BUF_MAX_LEN;
 use crate::schema::Schema;
 use bitflags::bitflags;
 use std::cmp::min;
+use std::collections::HashMap;
 use std::ffi;
 use std::os::raw::c_char;
 use std::slice::from_raw_parts_mut;
@@ -164,7 +165,7 @@ pub unsafe extern "C" fn expression_validate(
     let errbuf = from_raw_parts_mut(errbuf, ERR_BUF_MAX_LEN);
 
     // Parse the expression
-    let result = parse(atc).map_err(|e| e.to_string());
+    let result = parse(atc, &mut HashMap::new()).map_err(|e| e.to_string());
     if let Err(e) = result {
         let errlen = min(e.len(), *errbuf_len);
         errbuf[..errlen].copy_from_slice(&e.as_bytes()[..errlen]);

diff --git a/src/parser.rs b/src/parser.rs
@@ -11,7 +11,9 @@ use pest::pratt_parser::Assoc as AssocNew;
 use pest::pratt_parser::{Op, PrattParser};
 use pest::Parser;
 use regex::Regex;
+use std::collections::HashMap;
 use std::net::{IpAddr, Ipv4Addr, Ipv6Addr};
+use std::rc::Rc;
 
 type ParseResult<T> = Result<T, ParseError<Rule>>;
 /// cbindgen:ignore
@@ -61,12 +63,16 @@ impl ATCParser {
     }
     // matcher = { SOI ~ expression ~ EOI }
     #[allow(clippy::result_large_err)] // it's fine as parsing is not the hot path
-    fn parse_matcher(&mut self, source: &str) -> ParseResult<Expression> {
+    fn parse_matcher(
+        &mut self,
+        source: &str,
+        regex_cache: &mut HashMap<String, Rc<Regex>>,
+    ) -> ParseResult<Expression> {
         let pairs = ATCParser::parse(Rule::matcher, source)?;
         let expr_pair = pairs.peek().unwrap().into_inner().peek().unwrap();
         let rule = expr_pair.as_rule();
         match rule {
-            Rule::expression => parse_expression(expr_pair, &self.pratt_parser),
+            Rule::expression => parse_expression(expr_pair, &self.pratt_parser, regex_cache),
             _ => unreachable!(),
         }
     }
@@ -204,7 +210,10 @@ fn parse_int_literal(pair: Pair<Rule>) -> ParseResult<i64> {
 
 // predicate = { lhs ~ binary_operator ~ rhs }
 #[allow(clippy::result_large_err)] // it's fine as parsing is not the hot path
-fn parse_predicate(pair: Pair<Rule>) -> ParseResult<Predicate> {
+fn parse_predicate(
+    pair: Pair<Rule>,
+    regex_cache: &mut HashMap<String, Rc<Regex>>,
+) -> ParseResult<Predicate> {
     let mut pairs = pair.into_inner();
     let lhs = parse_lhs(pairs.next().unwrap())?;
     let op = parse_binary_operator(pairs.next().unwrap());
@@ -214,23 +223,22 @@ fn parse_predicate(pair: Pair<Rule>) -> ParseResult<Predicate> {
         lhs,
         rhs: if op == BinaryOperator::Regex {
             if let Value::String(s) = rhs {
-                let r = Regex::new(&s).map_err(|e| {
-                    ParseError::new_from_span(
-                        ErrorVariant::CustomError {
-                            message: e.to_string(),
-                        },
-                        rhs_pair.as_span(),
-                    )
-                })?;
-
-                Value::Regex(r)
+                let regex_rc = match regex_cache.get(&s) {
+                    Some(stored_regex_rc) => stored_regex_rc.clone(),
+                    None => {
+                        let r = Regex::new(&s).into_parse_result(&rhs_pair)?;
+
+                        let rc = Rc::new(r);
+
+                        regex_cache.insert(s, rc.clone());
+                        rc
+                    }
+                };
+
+                Value::Regex(regex_rc)
             } else {
-                return Err(ParseError::new_from_span(
-                    ErrorVariant::CustomError {
-                        message: "regex operator can only be used with String operands".to_string(),
-                    },
-                    rhs_pair.as_span(),
-                ));
+                return Err("regex operator can only be used with String operands")
+                    .into_parse_result(&rhs_pair);
             }
         } else {
             rhs
@@ -289,39 +297,53 @@ fn parse_binary_operator(pair: Pair<Rule>) -> BinaryOperator {
 fn parse_parenthesised_expression(
     pair: Pair<Rule>,
     pratt: &PrattParser<Rule>,
+    regex_cache: &mut HashMap<String, Rc<Regex>>,
 ) -> ParseResult<Expression> {
     let mut pairs = pair.into_inner();
     let pair = pairs.next().unwrap();
     let rule = pair.as_rule();
     match rule {
-        Rule::expression => parse_expression(pair, pratt),
+        Rule::expression => parse_expression(pair, pratt, regex_cache),
         Rule::not_op => Ok(Expression::Logical(Box::new(LogicalExpression::Not(
-            parse_expression(pairs.next().unwrap(), pratt)?,
+            parse_expression(pairs.next().unwrap(), pratt, regex_cache)?,
         )))),
         _ => unreachable!(),
     }
 }
 
 // term = { predicate | parenthesised_expression }
 #[allow(clippy::result_large_err)] // it's fine as parsing is not the hot path
-fn parse_term(pair: Pair<Rule>, pratt: &PrattParser<Rule>) -> ParseResult<Expression> {
+fn parse_term(
+    pair: Pair<Rule>,
+    pratt: &PrattParser<Rule>,
+    regex_cache: &mut HashMap<String, Rc<Regex>>,
+) -> ParseResult<Expression> {
     let pairs = pair.into_inner();
     let inner_rule = pairs.peek().unwrap();
     let rule = inner_rule.as_rule();
     match rule {
-        Rule::predicate => Ok(Expression::Predicate(parse_predicate(inner_rule)?)),
-        Rule::parenthesised_expression => parse_parenthesised_expression(inner_rule, pratt),
+        Rule::predicate => Ok(Expression::Predicate(parse_predicate(
+            inner_rule,
+            regex_cache,
+        )?)),
+        Rule::parenthesised_expression => {
+            parse_parenthesised_expression(inner_rule, pratt, regex_cache)
+        }
         _ => unreachable!(),
     }
 }
 
 // expression = { term ~ ( logical_operator ~ term )* }
 #[allow(clippy::result_large_err)] // it's fine as parsing is not the hot path
-fn parse_expression(pair: Pair<Rule>, pratt: &PrattParser<Rule>) -> ParseResult<Expression> {
+fn parse_expression(
+    pair: Pair<Rule>,
+    pratt: &PrattParser<Rule>,
+    regex_cache: &mut HashMap<String, Rc<Regex>>,
+) -> ParseResult<Expression> {
     let pairs = pair.into_inner();
     pratt
         .map_primary(|operand| match operand.as_rule() {
-            Rule::term => parse_term(operand, pratt),
+            Rule::term => parse_term(operand, pratt, regex_cache),
             _ => unreachable!(),
         })
         .map_infix(|lhs, op, rhs| {
@@ -335,8 +357,11 @@ fn parse_expression(pair: Pair<Rule>, pratt: &PrattParser<Rule>) -> ParseResult<
 }
 
 #[allow(clippy::result_large_err)] // it's fine as parsing is not the hot path
-pub fn parse(source: &str) -> ParseResult<Expression> {
-    ATCParser::new().parse_matcher(source)
+pub fn parse(
+    source: &str,
+    regex_cache: &mut HashMap<String, Rc<Regex>>,
+) -> ParseResult<Expression> {
+    ATCParser::new().parse_matcher(source, regex_cache)
 }
 
 #[cfg(test)]
@@ -345,16 +370,19 @@ mod tests {
 
     #[test]
     fn test_bad_syntax() {
+        let mut regex_cache = HashMap::new();
         assert_eq!(
-            parse("! a == 1").unwrap_err().to_string(),
+            parse("! a == 1", &mut regex_cache).unwrap_err().to_string(),
             " --> 1:1\n  |\n1 | ! a == 1\n  | ^---\n  |\n  = expected term"
         );
         assert_eq!(
-            parse("a == 1 || ! b == 2").unwrap_err().to_string(),
+            parse("a == 1 || ! b == 2", &mut regex_cache)
+                .unwrap_err()
+                .to_string(),
             " --> 1:11\n  |\n1 | a == 1 || ! b == 2\n  |           ^---\n  |\n  = expected term"
         );
         assert_eq!(
-            parse("(a == 1 || b == 2) && ! c == 3")
+            parse("(a == 1 || b == 2) && ! c == 3", &mut regex_cache)
                 .unwrap_err()
                 .to_string(),
                 " --> 1:23\n  |\n1 | (a == 1 || b == 2) && ! c == 3\n  |                       ^---\n  |\n  = expected term"