Skip to main content

ruso_script/script/parser/
mod.rs

1mod body;
2mod helpers;
3mod match_expr;
4mod metadata;
5mod probes;
6mod socket;
7mod statements;
8
9use pest::Parser;
10use pest::iterators::Pair;
11use thiserror::Error;
12
13use crate::script::ast::{Program, Stmt};
14use crate::script::grammar::{Rule, ScannerParser};
15
16use self::match_expr::build_qualified_expr;
17
18#[derive(Debug, Error)]
19pub enum ParseError {
20    #[error("parse error: {0}")]
21    Pest(#[from] pest::error::Error<Rule>),
22    #[error("unexpected rule: {0:?}")]
23    UnexpectedRule(Rule),
24    #[error("{0}")]
25    Invalid(String),
26}
27
28/// Maximum structural nesting depth accepted before parsing.
29///
30/// `pest` is a recursive-descent (PEG) parser, so each nested block
31/// (`if`/`for` … `end`) or object (`{ … }`) costs one parser
32/// stack frame. A few thousand levels — comfortably under the backend's
33/// 256 KiB source cap — overflow the stack and **abort the process**. A
34/// stack overflow cannot be caught by `catch_unwind` and is not bounded
35/// by the executor's wall-clock budget, so a single over-nested publish
36/// could take the whole server down. We reject such input cheaply here,
37/// before pest ever recurses. Real scanner scripts nest a handful of
38/// levels deep; 64 is generous headroom.
39const MAX_NESTING_DEPTH: usize = 64;
40
41/// Conservative pre-parse guard: bound the simultaneous nesting depth.
42///
43/// At the point of pest's deepest recursion, every still-open construct is
44/// either a brace pair (`{`…`}`) or a block keyword (`if`/`for`
45/// closed by `end`). Counting openers minus closers therefore tracks the
46/// parser's stack depth exactly, so this can't be evaded by interleaving
47/// the two forms. String (`"…"`) and regex (`'…'`) literals and `#`
48/// comments are skipped so braces/keywords inside them don't inflate the
49/// count. The scan is a single linear pass over the bytes and only slices
50/// the source at ASCII identifier boundaries, so it never panics.
51fn check_nesting_depth(source: &str) -> Result<(), ParseError> {
52    let too_deep = || {
53        ParseError::Invalid(format!(
54            "script nesting exceeds the maximum depth of {MAX_NESTING_DEPTH}"
55        ))
56    };
57    let bytes = source.as_bytes();
58    let n = bytes.len();
59    let mut i = 0;
60    let mut depth: usize = 0;
61    while i < n {
62        match bytes[i] {
63            b'#' => {
64                while i < n && bytes[i] != b'\n' {
65                    i += 1;
66                }
67            }
68            b'"' => {
69                i += 1;
70                while i < n {
71                    match bytes[i] {
72                        b'\\' => i += 2,
73                        b'"' => {
74                            i += 1;
75                            break;
76                        }
77                        _ => i += 1,
78                    }
79                }
80            }
81            b'\'' => {
82                i += 1;
83                while i < n && bytes[i] != b'\'' {
84                    i += 1;
85                }
86                i += 1;
87            }
88            b'{' => {
89                depth += 1;
90                if depth > MAX_NESTING_DEPTH {
91                    return Err(too_deep());
92                }
93                i += 1;
94            }
95            b'}' => {
96                depth = depth.saturating_sub(1);
97                i += 1;
98            }
99            b if b.is_ascii_alphabetic() || b == b'_' => {
100                let start = i;
101                while i < n && (bytes[i].is_ascii_alphanumeric() || bytes[i] == b'_') {
102                    i += 1;
103                }
104                let word = &source[start..i];
105                if word.eq_ignore_ascii_case("if") || word.eq_ignore_ascii_case("for") {
106                    depth += 1;
107                    if depth > MAX_NESTING_DEPTH {
108                        return Err(too_deep());
109                    }
110                } else if word.eq_ignore_ascii_case("end") {
111                    depth = depth.saturating_sub(1);
112                }
113            }
114            _ => i += 1,
115        }
116    }
117    Ok(())
118}
119
120pub fn parse(source: &str) -> Result<Program, ParseError> {
121    // Reject pathologically nested input *before* pest recurses on it
122    // (see `check_nesting_depth`). Without this, ~46 KiB of nested blocks
123    // overflows the parser stack and aborts the process.
124    check_nesting_depth(source)?;
125
126    let mut pairs = ScannerParser::parse(Rule::program, source)?;
127    let mut statements = Vec::new();
128
129    let program = pairs
130        .next()
131        .ok_or(ParseError::UnexpectedRule(Rule::program))?;
132
133    for item in program.into_inner() {
134        match item.as_rule() {
135            Rule::statement => statements.extend(build_statement(item)?),
136            Rule::pad => {
137                let stmt = item
138                    .into_inner()
139                    .find(|p| p.as_rule() == Rule::statement)
140                    .ok_or(ParseError::UnexpectedRule(Rule::pad))?;
141                statements.extend(build_statement(stmt)?);
142            }
143            _ => {}
144        }
145    }
146
147    Ok(Program { statements })
148}
149
150pub(crate) fn build_statement(pair: Pair<Rule>) -> Result<Vec<Stmt>, ParseError> {
151    let inner = pair
152        .into_inner()
153        .next()
154        .ok_or(ParseError::UnexpectedRule(Rule::statement))?;
155
156    Ok(match inner.as_rule() {
157        Rule::metadata_block => metadata::build_metadata_block(inner)?,
158        Rule::set_stmt => vec![statements::build_set(inner)?],
159        Rule::http_block => vec![probes::build_http_block(inner)?],
160        Rule::dns_block => {
161            let probe = socket::build_socket_block(inner)?;
162            vec![Stmt::Dns(probe)]
163        }
164        Rule::tcp_block => {
165            let probe = socket::build_socket_block(inner)?;
166            vec![Stmt::Tcp(probe)]
167        }
168        Rule::udp_block => {
169            let probe = socket::build_socket_block(inner)?;
170            vec![Stmt::Udp(probe)]
171        }
172        Rule::send_stmt => vec![statements::build_send(inner)?],
173        Rule::match_stmt => {
174            let expr = inner
175                .into_inner()
176                .find(|p| p.as_rule() == Rule::qualified_expr)
177                .ok_or(ParseError::UnexpectedRule(Rule::match_stmt))?;
178            vec![Stmt::Match(build_qualified_expr(expr)?)]
179        }
180        Rule::match_group => vec![match_expr::build_match_group(inner)?],
181        Rule::assert_stmt => {
182            let expr = inner
183                .into_inner()
184                .find(|p| p.as_rule() == Rule::qualified_expr)
185                .ok_or(ParseError::UnexpectedRule(Rule::assert_stmt))?;
186            vec![Stmt::Assert(build_qualified_expr(expr)?)]
187        }
188        Rule::extract_stmt => vec![statements::build_extract(inner)?],
189        Rule::if_block => vec![statements::build_if(inner)?],
190        Rule::for_block => vec![statements::build_for(inner)?],
191        Rule::save_stmt => vec![statements::build_save(inner)?],
192        Rule::evidence_stmt => vec![statements::build_evidence(inner)?],
193        Rule::flow_stmt => vec![statements::build_flow(inner)?],
194        Rule::retry_stmt => vec![statements::build_retry(inner)?],
195        Rule::sleep_stmt => {
196            let duration = inner
197                .into_inner()
198                .find(|p| p.as_rule() == Rule::duration)
199                .map(|p| p.as_str().to_string())
200                .unwrap_or_default();
201            vec![Stmt::Sleep(duration)]
202        }
203        rule => return Err(ParseError::UnexpectedRule(rule)),
204    })
205}
206
207#[cfg(test)]
208mod nesting_tests {
209    use super::*;
210
211    #[test]
212    fn deeply_nested_input_is_rejected_not_overflowed() {
213        // Without the depth guard this same input overflows the pest parser
214        // stack and aborts the process (depth 2000 ≈ 46 KiB does it). The
215        // guard must turn it into a graceful error instead.
216        let depth = 5000;
217        let mut src = String::new();
218        for _ in 0..depth {
219            src.push_str("if a.status == 200\n");
220        }
221        src.push_str("sleep 1s\n");
222        for _ in 0..depth {
223            src.push_str("end\n");
224        }
225        match parse(&src) {
226            Err(ParseError::Invalid(msg)) => assert!(msg.contains("nesting")),
227            other => panic!("expected Invalid nesting error, got {other:?}"),
228        }
229    }
230
231    #[test]
232    fn deeply_nested_objects_are_rejected() {
233        // Brace nesting via repeated `{` (object/block openers).
234        let mut src = String::from("http p ");
235        for _ in 0..200 {
236            src.push('{');
237        }
238        let _ = parse(&src).expect_err("over-nested braces must be rejected");
239        // Specifically the depth guard, not a downstream pest error.
240        assert!(matches!(
241            check_nesting_depth(&src),
242            Err(ParseError::Invalid(_))
243        ));
244    }
245
246    #[test]
247    fn normal_nesting_passes_the_guard() {
248        // A realistically nested script (a few levels) must not trip the
249        // guard. `end` and `}` close their constructs; `endpoint`/`iframe`
250        // must not be mistaken for `end`/`if`.
251        let src = r#"
252            set endpoint "/login"
253            http probe {
254                method get
255                path "/"
256                json { "iframe": "{{ endpoint }}" }
257            }
258            if probe.status == 200
259                for marker in ["ok", "yes"]
260                    match probe.body contains "{{ marker }}"
261                end
262            end
263        "#;
264        assert!(check_nesting_depth(src).is_ok());
265    }
266
267    #[test]
268    fn braces_inside_strings_and_comments_do_not_count() {
269        let src = r#"
270            # if for repeat { { { these are in a comment
271            set s "}}}}}}{{{{{{ if if if"
272            set r '}}}}}}{{{{{{'
273        "#;
274        assert!(check_nesting_depth(src).is_ok());
275    }
276
277    #[test]
278    fn repeat_is_no_longer_valid_syntax() {
279        // `repeat` was removed from the grammar entirely; it no longer parses.
280        let src = r#"
281            repeat 2
282                sleep 1s
283            end
284        "#;
285        assert!(parse(src).is_err(), "`repeat` must not parse");
286    }
287}