Skip to main content

ruso_runtime/runtime/
binary.rs

1//! Binary serialization of `BytecodeProgram` (magic `RUSO`, version 1).
2//!
3//! **Versioning:** the header carries a one-byte `VERSION`; the decoder accepts
4//! only that exact value, rejecting anything else up front with `BadVersion`
5//! (never a cryptic mid-decode `Corrupt`). Any change to the wire format must
6//! bump `VERSION` — a coordinated step, since the registry has to deploy the
7//! new runtime and serve re-compiled bytecode. See the Bytecode chapter at
8//! <https://docs.ruso.hopeless-labs.com/internals/bytecode.html>.
9//!
10//! The current v1 layout encodes `CmpValue::Number` as `u64` (earlier revisions
11//! silently truncated to `u32`), assigns HTTP method tags 5 and 6 to `Head` and
12//! `Options`, and bounds every untrusted list/count against the remaining
13//! buffer so a malicious or corrupt `.rbc` file cannot trigger OOM allocations.
14//! After decoding, [`validate_program`] bounds-checks every instruction operand
15//! against its pool so out-of-range indices surface as `Corrupt` rather than
16//! panicking the executor that indexes those pools directly.
17
18use std::collections::HashMap;
19
20use thiserror::Error;
21
22use crate::contract::{
23    BodyValue, CmpOp, CmpValue, EvidenceKind, ExtractSource, FieldKind, HttpMethod, InlinePart,
24    InlinePartBody, MatchPredicate, ObjectBody, QualifiedField, QualifiedMatch, Severity,
25};
26use crate::runtime::bytecode::{BytecodeProgram, Instr};
27use crate::runtime::spec::{CheckMetadata, HttpRequestSpec, ProbeKind, ProgramSpec};
28
29pub const MAGIC: &[u8; 4] = b"RUSO";
30pub const VERSION: u8 = 1;
31
32#[derive(Debug, Error)]
33pub enum BytecodeError {
34    #[error("bytecode too short")]
35    TooShort,
36    #[error("invalid magic")]
37    BadMagic,
38    #[error(
39        "unsupported bytecode version {found} (this build reads version {supported}); \
40         recompile the script or update ruso"
41    )]
42    BadVersion { found: u8, supported: u8 },
43    #[error("corrupt bytecode: {0}")]
44    Corrupt(&'static str),
45    #[error("invalid hex: {0}")]
46    InvalidHex(String),
47}
48
49pub fn encode(program: &BytecodeProgram) -> Vec<u8> {
50    let mut w = Writer::default();
51    w.bytes(MAGIC);
52    w.u8(VERSION);
53    write_metadata(&mut w, &program.spec.metadata);
54    write_probes(&mut w, &program.spec.probes);
55    write_strings(&mut w, &program.strings);
56    write_payloads(&mut w, &program.payloads);
57    write_matchers(&mut w, &program.matchers);
58    write_extracts(&mut w, &program.extracts);
59    write_evidence(&mut w, &program.evidence);
60    write_code(&mut w, &program.code);
61    w.0
62}
63
64pub fn decode(bytes: &[u8]) -> Result<BytecodeProgram, BytecodeError> {
65    let mut r = Reader::new(bytes);
66    r.consume_magic()?;
67    let version = r.u8()?;
68    if version != VERSION {
69        return Err(BytecodeError::BadVersion {
70            found: version,
71            supported: VERSION,
72        });
73    }
74    let metadata = read_metadata(&mut r)?;
75    let probes = read_probes(&mut r)?;
76    let strings = read_strings(&mut r)?;
77    let payloads = read_payloads(&mut r)?;
78    let matchers = read_matchers(&mut r)?;
79    let extracts = read_extracts(&mut r)?;
80    let evidence = read_evidence(&mut r)?;
81    let code = read_code(&mut r)?;
82    if r.remaining() != 0 {
83        return Err(BytecodeError::Corrupt("trailing bytes"));
84    }
85    let program = BytecodeProgram {
86        spec: ProgramSpec { probes, metadata },
87        code,
88        strings,
89        payloads,
90        matchers,
91        extracts,
92        evidence,
93    };
94    validate_program(&program)?;
95    Ok(program)
96}
97
98/// Bounds-check every instruction operand against the pool it indexes.
99///
100/// The decode helpers above guarantee no out-of-buffer reads and no OOM
101/// allocations, but they do **not** check that an instruction's operand
102/// indices (`strings[name]`, `payloads[id]`, `matchers[start..start+len]`,
103/// …) actually fall within the decoded pools — those indices are plain
104/// `u32`s in the code stream. The executor indexes the pools directly, so
105/// an unchecked out-of-range index would panic the worker thread. A
106/// malicious or corrupt `.rbc` (e.g. `ruso exec evil.rbc`) must surface as a
107/// clean `Corrupt` error, not a panic. This pass closes that gap so the
108/// "untrusted bytecode is safe to decode" guarantee holds end to end.
109///
110/// Jump targets (`else_pc`, `end_pc`) are deliberately *not* rejected when
111/// they point past `code`: the executor's main loop halts once `pc >=
112/// code.len()`, so an out-of-range jump simply ends execution rather than
113/// reading out of bounds.
114fn validate_program(p: &BytecodeProgram) -> Result<(), BytecodeError> {
115    let strings = p.strings.len();
116    let payloads = p.payloads.len();
117    let matchers = p.matchers.len();
118    let extracts = p.extracts.len();
119    let evidence = p.evidence.len();
120
121    // `idx < bound` with the index widened to usize so a u32 operand can
122    // never wrap; `range` additionally rejects start+len overflow.
123    let one = |idx: u32, bound: usize| -> Result<(), BytecodeError> {
124        if (idx as usize) < bound {
125            Ok(())
126        } else {
127            Err(BytecodeError::Corrupt("operand index out of range"))
128        }
129    };
130    let range = |start: u32, len: u16, bound: usize| -> Result<(), BytecodeError> {
131        let end = (start as usize)
132            .checked_add(len as usize)
133            .ok_or(BytecodeError::Corrupt("operand range overflow"))?;
134        if end <= bound {
135            Ok(())
136        } else {
137            Err(BytecodeError::Corrupt("operand range out of bounds"))
138        }
139    };
140
141    for instr in &p.code {
142        match instr {
143            Instr::Set { name, value } => {
144                one(*name, strings)?;
145                one(*value, strings)?;
146            }
147            Instr::SetList { name, start, len } => {
148                one(*name, strings)?;
149                range(*start, *len, strings)?;
150            }
151            Instr::Send { probe, payload } => {
152                one(*probe, strings)?;
153                if let Some(id) = payload {
154                    one(*id, payloads)?;
155                }
156            }
157            Instr::Match(m) | Instr::Assert(m) => one(*m, matchers)?,
158            Instr::MatchAll { start, len } | Instr::MatchAny { start, len } => {
159                range(*start, *len, matchers)?;
160            }
161            Instr::Extract { name, source } => {
162                one(*name, strings)?;
163                one(*source, extracts)?;
164            }
165            Instr::IfMatch { matcher, .. } => one(*matcher, matchers)?,
166            Instr::ForList {
167                item, start, len, ..
168            } => {
169                one(*item, strings)?;
170                range(*start, *len, strings)?;
171            }
172            Instr::ForVar { item, list, .. } => {
173                one(*item, strings)?;
174                one(*list, strings)?;
175            }
176            Instr::Save { from, to } => {
177                one(*from, strings)?;
178                one(*to, strings)?;
179            }
180            Instr::Evidence(k) => one(*k, evidence)?,
181            Instr::Retry { probe, .. } => one(*probe, strings)?,
182            Instr::RetryDelay(v) | Instr::Sleep(v) => one(*v, strings)?,
183            // Operand-free / jump-only instructions: nothing to bound here.
184            Instr::LoopBack
185            | Instr::Break
186            | Instr::Stop
187            | Instr::Fail
188            | Instr::Continue
189            | Instr::Exit => {}
190        }
191    }
192    Ok(())
193}
194
195pub fn bytes_to_hex(bytes: &[u8]) -> String {
196    use std::fmt::Write as _;
197    let mut out = String::with_capacity(bytes.len() * 2);
198    for b in bytes {
199        let _ = write!(out, "{b:02x}");
200    }
201    out
202}
203
204pub fn bytes_to_hex_dump(bytes: &[u8]) -> String {
205    use std::fmt::Write as _;
206    let mut out = String::new();
207    for (offset, chunk) in bytes.chunks(16).enumerate() {
208        let off = offset * 16;
209        let _ = write!(out, "{off:08x}: ");
210        for b in chunk {
211            let _ = write!(out, "{b:02x} ");
212        }
213        for _ in chunk.len()..16 {
214            out.push_str("   ");
215        }
216        out.push_str(" |");
217        for &b in chunk {
218            if b.is_ascii_graphic() || b == b' ' {
219                out.push(b as char);
220            } else {
221                out.push('.');
222            }
223        }
224        out.push_str("|\n");
225    }
226    out
227}
228
229pub fn hex_to_bytes(input: &str) -> Result<Vec<u8>, BytecodeError> {
230    let mut compact = String::with_capacity(input.len());
231    for c in input.chars() {
232        if c.is_ascii_whitespace() {
233            continue;
234        }
235        if !c.is_ascii_hexdigit() {
236            return Err(BytecodeError::InvalidHex(format!("non-hex char: {c:?}")));
237        }
238        compact.push(c);
239    }
240    if !compact.len().is_multiple_of(2) {
241        return Err(BytecodeError::InvalidHex("odd length".into()));
242    }
243    let bytes = compact.as_bytes();
244    let mut out = Vec::with_capacity(bytes.len() / 2);
245    for chunk in bytes.chunks_exact(2) {
246        // SAFETY: validated above as ascii hex.
247        let pair = std::str::from_utf8(chunk).expect("ascii hex");
248        let byte = u8::from_str_radix(pair, 16)
249            .map_err(|err| BytecodeError::InvalidHex(err.to_string()))?;
250        out.push(byte);
251    }
252    Ok(out)
253}
254
255/// Decode a hex string into raw bytecode bytes.
256///
257/// Earlier revisions accepted an `@path` prefix that would read the file as
258/// raw bytecode. That alternate entry point conflated "hex-decoded input"
259/// with "file IO" and provided a path-traversal sink for any caller passing
260/// less-trusted input (env vars, CI parameters, scripted wrappers). File IO
261/// is now the CLI's responsibility — runtime callers pass bytes directly via
262/// [`decode`] or hex via this function.
263pub fn load_bytecode_input(input: &str) -> Result<Vec<u8>, BytecodeError> {
264    hex_to_bytes(input.trim())
265}
266
267#[derive(Default)]
268struct Writer(Vec<u8>);
269
270impl Writer {
271    fn u8(&mut self, v: u8) {
272        self.0.push(v);
273    }
274
275    fn u16(&mut self, v: u16) {
276        self.0.extend_from_slice(&v.to_le_bytes());
277    }
278
279    fn u32(&mut self, v: u32) {
280        self.0.extend_from_slice(&v.to_le_bytes());
281    }
282
283    fn u64(&mut self, v: u64) {
284        self.0.extend_from_slice(&v.to_le_bytes());
285    }
286
287    fn bytes(&mut self, data: &[u8]) {
288        self.0.extend_from_slice(data);
289    }
290
291    fn str(&mut self, s: &str) {
292        let b = s.as_bytes();
293        self.u32(b.len() as u32);
294        self.bytes(b);
295    }
296
297    fn opt_str(&mut self, value: &Option<String>) {
298        match value {
299            Some(s) => {
300                self.u8(1);
301                self.str(s);
302            }
303            None => self.u8(0),
304        }
305    }
306
307    fn opt_bytes(&mut self, value: &Option<Vec<u8>>) {
308        match value {
309            Some(data) => {
310                self.u8(1);
311                self.u32(data.len() as u32);
312                self.bytes(data);
313            }
314            None => self.u8(0),
315        }
316    }
317
318    fn opt_u16(&mut self, value: Option<u16>) {
319        match value {
320            Some(v) => {
321                self.u8(1);
322                self.u16(v);
323            }
324            None => self.u8(0),
325        }
326    }
327}
328
329struct Reader<'a> {
330    data: &'a [u8],
331    pos: usize,
332}
333
334impl<'a> Reader<'a> {
335    fn new(data: &'a [u8]) -> Self {
336        Self { data, pos: 0 }
337    }
338
339    fn remaining(&self) -> usize {
340        self.data.len().saturating_sub(self.pos)
341    }
342
343    fn consume_magic(&mut self) -> Result<(), BytecodeError> {
344        let magic = self.need(4)?;
345        if magic != MAGIC {
346            return Err(BytecodeError::BadMagic);
347        }
348        Ok(())
349    }
350
351    fn need(&mut self, n: usize) -> Result<&'a [u8], BytecodeError> {
352        if self.pos + n > self.data.len() {
353            return Err(BytecodeError::Corrupt("unexpected end"));
354        }
355        let slice = &self.data[self.pos..self.pos + n];
356        self.pos += n;
357        Ok(slice)
358    }
359
360    fn u8(&mut self) -> Result<u8, BytecodeError> {
361        Ok(self.need(1)?[0])
362    }
363
364    fn u16(&mut self) -> Result<u16, BytecodeError> {
365        Ok(u16::from_le_bytes(self.need(2)?.try_into().unwrap()))
366    }
367
368    fn u32(&mut self) -> Result<u32, BytecodeError> {
369        Ok(u32::from_le_bytes(self.need(4)?.try_into().unwrap()))
370    }
371
372    fn u64(&mut self) -> Result<u64, BytecodeError> {
373        Ok(u64::from_le_bytes(self.need(8)?.try_into().unwrap()))
374    }
375
376    fn str(&mut self) -> Result<String, BytecodeError> {
377        let len = self.u32()? as usize;
378        // Reject lengths that overrun the buffer before allocating.
379        if len > self.remaining() {
380            return Err(BytecodeError::Corrupt("string length exceeds buffer"));
381        }
382        let bytes = self.need(len)?;
383        String::from_utf8(bytes.to_vec()).map_err(|_| BytecodeError::Corrupt("utf8"))
384    }
385
386    fn opt_str(&mut self) -> Result<Option<String>, BytecodeError> {
387        if self.u8()? == 0 {
388            Ok(None)
389        } else {
390            Ok(Some(self.str()?))
391        }
392    }
393
394    fn opt_bytes(&mut self) -> Result<Option<Vec<u8>>, BytecodeError> {
395        if self.u8()? == 0 {
396            Ok(None)
397        } else {
398            let len = self.u32()? as usize;
399            if len > self.remaining() {
400                return Err(BytecodeError::Corrupt("bytes length exceeds buffer"));
401            }
402            Ok(Some(self.need(len)?.to_vec()))
403        }
404    }
405
406    fn opt_u16(&mut self) -> Result<Option<u16>, BytecodeError> {
407        if self.u8()? == 0 {
408            Ok(None)
409        } else {
410            Ok(Some(self.u16()?))
411        }
412    }
413
414    /// Convert an untrusted `u32` count into a `usize`, bounded against the
415    /// remaining buffer.
416    ///
417    /// Every list/pool length in the bytecode is followed by at least one
418    /// byte per element (an opcode tag, a u8 discriminant, or a 4-byte
419    /// length prefix). The strict lower bound is `1` byte per item, so any
420    /// `count > remaining()` is unambiguously corrupt — and this check runs
421    /// **before** `Vec::with_capacity(count)`, so an attacker-controlled
422    /// `count = u32::MAX` cannot trigger a multi-GB allocation.
423    fn bounded_count(&self, raw: u32) -> Result<usize, BytecodeError> {
424        let count = raw as usize;
425        if count > self.remaining() {
426            return Err(BytecodeError::Corrupt(
427                "list length exceeds remaining bytes",
428            ));
429        }
430        Ok(count)
431    }
432}
433
434fn write_metadata(w: &mut Writer, metadata: &CheckMetadata) {
435    w.opt_str(&metadata.name);
436    w.opt_str(&metadata.description);
437    w.opt_str(&metadata.impact);
438    match &metadata.severity {
439        Some(s) => {
440            w.u8(1);
441            w.u8(severity_tag(s));
442        }
443        None => w.u8(0),
444    }
445    w.opt_str(&metadata.author);
446    w.opt_str(&metadata.report_title);
447    write_strings(w, &metadata.cve);
448    write_strings(w, &metadata.cwe);
449    write_strings(w, &metadata.references);
450    write_strings(w, &metadata.cvss);
451    write_strings(w, &metadata.cvss_score);
452    w.opt_str(&metadata.mitigation);
453    write_strings(w, &metadata.tags);
454    w.opt_str(&metadata.version);
455    // `family` is the last field of the v1 metadata block. It (and `tags` /
456    // `version` before it) was appended during early `0.1.0-dev` without a
457    // VERSION bump; any further wire-format change now bumps VERSION instead.
458    w.opt_str(&metadata.family);
459}
460
461fn read_metadata(r: &mut Reader<'_>) -> Result<CheckMetadata, BytecodeError> {
462    Ok(CheckMetadata {
463        name: r.opt_str()?,
464        description: r.opt_str()?,
465        impact: r.opt_str()?,
466        severity: if r.u8()? == 0 {
467            None
468        } else {
469            Some(read_severity(r)?)
470        },
471        author: r.opt_str()?,
472        report_title: r.opt_str()?,
473        cve: read_strings(r)?,
474        cwe: read_strings(r)?,
475        references: read_strings(r)?,
476        cvss: read_strings(r)?,
477        cvss_score: read_strings(r)?,
478        mitigation: r.opt_str()?,
479        tags: read_strings(r)?,
480        version: r.opt_str()?,
481        family: r.opt_str()?,
482    })
483}
484
485fn severity_tag(s: &Severity) -> u8 {
486    match s {
487        Severity::Low => 0,
488        Severity::Medium => 1,
489        Severity::High => 2,
490        Severity::Critical => 3,
491        Severity::Info => 4,
492    }
493}
494
495fn read_severity(r: &mut Reader<'_>) -> Result<Severity, BytecodeError> {
496    Ok(match r.u8()? {
497        0 => Severity::Low,
498        1 => Severity::Medium,
499        2 => Severity::High,
500        3 => Severity::Critical,
501        4 => Severity::Info,
502        _ => return Err(BytecodeError::Corrupt("severity")),
503    })
504}
505
506fn write_probes(w: &mut Writer, probes: &HashMap<String, ProbeKind>) {
507    let mut names: Vec<_> = probes.keys().cloned().collect();
508    names.sort();
509    w.u32(names.len() as u32);
510    for name in names {
511        w.str(&name);
512        write_probe_kind(w, probes.get(&name).expect("sorted key"));
513    }
514}
515
516fn read_probes(r: &mut Reader<'_>) -> Result<HashMap<String, ProbeKind>, BytecodeError> {
517    let raw = r.u32()?;
518    let count = r.bounded_count(raw)?;
519    let mut probes = HashMap::with_capacity(count);
520    for _ in 0..count {
521        let name = r.str()?;
522        let kind = read_probe_kind(r)?;
523        probes.insert(name, kind);
524    }
525    Ok(probes)
526}
527
528fn write_probe_kind(w: &mut Writer, kind: &ProbeKind) {
529    match kind {
530        ProbeKind::Http(spec) => {
531            w.u8(0);
532            write_http_spec(w, spec);
533        }
534        ProbeKind::Dns(spec) => {
535            w.u8(1);
536            write_socket_probe(w, spec);
537        }
538        ProbeKind::Tcp(spec) => {
539            w.u8(2);
540            write_socket_probe(w, spec);
541        }
542        ProbeKind::Udp(spec) => {
543            w.u8(3);
544            write_socket_probe(w, spec);
545        }
546    }
547}
548
549fn write_socket_probe(w: &mut Writer, spec: &crate::runtime::spec::SocketProbeSpec) {
550    w.str(&spec.host);
551    w.opt_u16(spec.port);
552    w.opt_bytes(&spec.payload);
553    w.u8(u8::from(spec.tls));
554    w.u8(u8::from(spec.session));
555    w.u32(spec.read_max);
556    w.u32(spec.read_idle_ms);
557}
558
559fn read_socket_probe(
560    r: &mut Reader<'_>,
561) -> Result<crate::runtime::spec::SocketProbeSpec, BytecodeError> {
562    Ok(crate::runtime::spec::SocketProbeSpec {
563        host: r.str()?,
564        port: r.opt_u16()?,
565        payload: r.opt_bytes()?,
566        tls: r.u8()? != 0,
567        session: r.u8()? != 0,
568        read_max: r.u32()?,
569        read_idle_ms: r.u32()?,
570    })
571}
572
573fn read_probe_kind(r: &mut Reader<'_>) -> Result<ProbeKind, BytecodeError> {
574    Ok(match r.u8()? {
575        0 => ProbeKind::Http(read_http_spec(r)?),
576        1 => ProbeKind::Dns(read_socket_probe(r)?),
577        2 => ProbeKind::Tcp(read_socket_probe(r)?),
578        3 => ProbeKind::Udp(read_socket_probe(r)?),
579        _ => return Err(BytecodeError::Corrupt("probe kind")),
580    })
581}
582
583fn write_http_spec(w: &mut Writer, spec: &HttpRequestSpec) {
584    w.u8(http_method_tag(&spec.method));
585    w.str(&spec.path);
586    w.opt_str(&spec.timeout);
587    write_opt_bool(w, &spec.follow_redirect);
588    write_opt_bool(w, &spec.verify_ssl);
589    w.opt_str(&spec.proxy);
590    w.opt_str(&spec.user_agent);
591    write_header_list(w, &spec.headers);
592    write_header_list(w, &spec.cookies);
593    write_header_list(w, &spec.queries);
594    write_opt_object(w, &spec.data_body);
595    write_opt_object(w, &spec.json_body);
596    w.opt_str(&spec.raw_body);
597    w.opt_str(&spec.body_bytes);
598    write_opt_object(w, &spec.multipart_body);
599}
600
601fn read_http_spec(r: &mut Reader<'_>) -> Result<HttpRequestSpec, BytecodeError> {
602    Ok(HttpRequestSpec {
603        method: read_http_method(r)?,
604        path: r.str()?,
605        timeout: r.opt_str()?,
606        follow_redirect: read_opt_bool(r)?,
607        verify_ssl: read_opt_bool(r)?,
608        proxy: r.opt_str()?,
609        user_agent: r.opt_str()?,
610        headers: read_header_list(r)?,
611        cookies: read_header_list(r)?,
612        queries: read_header_list(r)?,
613        data_body: read_opt_object(r)?,
614        json_body: read_opt_object(r)?,
615        raw_body: r.opt_str()?,
616        body_bytes: r.opt_str()?,
617        multipart_body: read_opt_object(r)?,
618    })
619}
620
621fn http_method_tag(m: &HttpMethod) -> u8 {
622    match m {
623        HttpMethod::Get => 0,
624        HttpMethod::Post => 1,
625        HttpMethod::Put => 2,
626        HttpMethod::Patch => 3,
627        HttpMethod::Delete => 4,
628        HttpMethod::Head => 5,
629        HttpMethod::Options => 6,
630    }
631}
632
633fn read_http_method(r: &mut Reader<'_>) -> Result<HttpMethod, BytecodeError> {
634    Ok(match r.u8()? {
635        0 => HttpMethod::Get,
636        1 => HttpMethod::Post,
637        2 => HttpMethod::Put,
638        3 => HttpMethod::Patch,
639        4 => HttpMethod::Delete,
640        5 => HttpMethod::Head,
641        6 => HttpMethod::Options,
642        _ => return Err(BytecodeError::Corrupt("http method")),
643    })
644}
645
646fn write_opt_bool(w: &mut Writer, value: &Option<bool>) {
647    match value {
648        Some(v) => {
649            w.u8(1);
650            w.u8(u8::from(*v));
651        }
652        None => w.u8(0),
653    }
654}
655
656fn read_opt_bool(r: &mut Reader<'_>) -> Result<Option<bool>, BytecodeError> {
657    if r.u8()? == 0 {
658        Ok(None)
659    } else {
660        Ok(Some(r.u8()? != 0))
661    }
662}
663
664fn write_header_list(w: &mut Writer, pairs: &[(String, String)]) {
665    w.u32(pairs.len() as u32);
666    for (k, v) in pairs {
667        w.str(k);
668        w.str(v);
669    }
670}
671
672fn read_header_list(r: &mut Reader<'_>) -> Result<Vec<(String, String)>, BytecodeError> {
673    let raw = r.u32()?;
674    let count = r.bounded_count(raw)?;
675    let mut pairs = Vec::with_capacity(count);
676    for _ in 0..count {
677        pairs.push((r.str()?, r.str()?));
678    }
679    Ok(pairs)
680}
681
682fn write_opt_object(w: &mut Writer, body: &Option<ObjectBody>) {
683    match body {
684        Some(obj) => {
685            w.u8(1);
686            write_object(w, obj);
687        }
688        None => w.u8(0),
689    }
690}
691
692fn read_opt_object(r: &mut Reader<'_>) -> Result<Option<ObjectBody>, BytecodeError> {
693    if r.u8()? == 0 {
694        Ok(None)
695    } else {
696        Ok(Some(read_object(r)?))
697    }
698}
699
700fn write_object(w: &mut Writer, obj: &ObjectBody) {
701    w.u32(obj.pairs.len() as u32);
702    for (key, value) in &obj.pairs {
703        w.str(key);
704        write_body_value(w, value);
705    }
706}
707
708fn read_object(r: &mut Reader<'_>) -> Result<ObjectBody, BytecodeError> {
709    let raw = r.u32()?;
710    let count = r.bounded_count(raw)?;
711    let mut pairs = Vec::with_capacity(count);
712    for _ in 0..count {
713        pairs.push((r.str()?, read_body_value(r)?));
714    }
715    Ok(ObjectBody { pairs })
716}
717
718fn write_body_value(w: &mut Writer, value: &BodyValue) {
719    match value {
720        BodyValue::String(s) => {
721            w.u8(0);
722            w.str(s);
723        }
724        BodyValue::Interpolation(s) => {
725            w.u8(1);
726            w.str(s);
727        }
728        BodyValue::Object(obj) => {
729            w.u8(2);
730            write_object(w, obj);
731        }
732        BodyValue::Bytes(hex) => {
733            w.u8(3);
734            w.str(hex);
735        }
736        BodyValue::Part(part) => {
737            w.u8(4);
738            w.opt_str(&part.filename);
739            match &part.body {
740                InlinePartBody::Text(t) => {
741                    w.u8(0);
742                    w.str(t);
743                }
744                InlinePartBody::Bytes(b) => {
745                    w.u8(1);
746                    w.str(b);
747                }
748            }
749        }
750    }
751}
752
753fn read_body_value(r: &mut Reader<'_>) -> Result<BodyValue, BytecodeError> {
754    Ok(match r.u8()? {
755        0 => BodyValue::String(r.str()?),
756        1 => BodyValue::Interpolation(r.str()?),
757        2 => BodyValue::Object(read_object(r)?),
758        3 => BodyValue::Bytes(r.str()?),
759        4 => BodyValue::Part(InlinePart {
760            filename: r.opt_str()?,
761            body: match r.u8()? {
762                1 => InlinePartBody::Bytes(r.str()?),
763                _ => InlinePartBody::Text(r.str()?),
764            },
765        }),
766        _ => return Err(BytecodeError::Corrupt("body value")),
767    })
768}
769
770fn write_strings(w: &mut Writer, strings: &[String]) {
771    w.u32(strings.len() as u32);
772    for s in strings {
773        w.str(s);
774    }
775}
776
777fn read_strings(r: &mut Reader<'_>) -> Result<Vec<String>, BytecodeError> {
778    let raw = r.u32()?;
779    let count = r.bounded_count(raw)?;
780    let mut strings = Vec::with_capacity(count);
781    for _ in 0..count {
782        strings.push(r.str()?);
783    }
784    Ok(strings)
785}
786
787fn write_payloads(w: &mut Writer, payloads: &[Vec<u8>]) {
788    w.u32(payloads.len() as u32);
789    for data in payloads {
790        w.u32(data.len() as u32);
791        w.bytes(data);
792    }
793}
794
795fn read_payloads(r: &mut Reader<'_>) -> Result<Vec<Vec<u8>>, BytecodeError> {
796    let raw = r.u32()?;
797    let count = r.bounded_count(raw)?;
798    let mut payloads = Vec::with_capacity(count);
799    for _ in 0..count {
800        let len = r.u32()? as usize;
801        if len > r.remaining() {
802            return Err(BytecodeError::Corrupt("payload length exceeds buffer"));
803        }
804        payloads.push(r.need(len)?.to_vec());
805    }
806    Ok(payloads)
807}
808
809fn write_matchers(w: &mut Writer, matchers: &[QualifiedMatch]) {
810    w.u32(matchers.len() as u32);
811    for m in matchers {
812        write_matcher(w, m);
813    }
814}
815
816fn read_matchers(r: &mut Reader<'_>) -> Result<Vec<QualifiedMatch>, BytecodeError> {
817    let raw = r.u32()?;
818    let count = r.bounded_count(raw)?;
819    let mut matchers = Vec::with_capacity(count);
820    for _ in 0..count {
821        matchers.push(read_matcher(r)?);
822    }
823    Ok(matchers)
824}
825
826fn write_matcher(w: &mut Writer, m: &QualifiedMatch) {
827    w.str(&m.field.target);
828    write_field_kind(w, &m.field.kind);
829    write_predicate(w, &m.predicate);
830}
831
832fn read_matcher(r: &mut Reader<'_>) -> Result<QualifiedMatch, BytecodeError> {
833    Ok(QualifiedMatch {
834        field: QualifiedField {
835            target: r.str()?,
836            kind: read_field_kind(r)?,
837        },
838        predicate: read_predicate(r)?,
839    })
840}
841
842fn write_field_kind(w: &mut Writer, kind: &FieldKind) {
843    match kind {
844        FieldKind::Status => w.u8(0),
845        FieldKind::Body => w.u8(1),
846        FieldKind::Header(name) => {
847            w.u8(2);
848            w.str(name);
849        }
850        FieldKind::ResponseTime => w.u8(3),
851        FieldKind::ResponseSize => w.u8(4),
852        FieldKind::Answer => w.u8(5),
853        FieldKind::Banner => w.u8(6),
854        FieldKind::Response => w.u8(7),
855    }
856}
857
858fn read_field_kind(r: &mut Reader<'_>) -> Result<FieldKind, BytecodeError> {
859    Ok(match r.u8()? {
860        0 => FieldKind::Status,
861        1 => FieldKind::Body,
862        2 => FieldKind::Header(r.str()?),
863        3 => FieldKind::ResponseTime,
864        4 => FieldKind::ResponseSize,
865        5 => FieldKind::Answer,
866        6 => FieldKind::Banner,
867        7 => FieldKind::Response,
868        _ => return Err(BytecodeError::Corrupt("field kind")),
869    })
870}
871
872fn write_predicate(w: &mut Writer, p: &MatchPredicate) {
873    match p {
874        MatchPredicate::Compare { op, value } => {
875            w.u8(0);
876            w.u8(cmp_op_tag(*op));
877            write_cmp_value(w, value);
878        }
879        MatchPredicate::Contains(s) => {
880            w.u8(1);
881            w.str(s);
882        }
883        MatchPredicate::NotContains(s) => {
884            w.u8(2);
885            w.str(s);
886        }
887        MatchPredicate::Regex(s) => {
888            w.u8(3);
889            w.str(s);
890        }
891    }
892}
893
894fn read_predicate(r: &mut Reader<'_>) -> Result<MatchPredicate, BytecodeError> {
895    Ok(match r.u8()? {
896        0 => MatchPredicate::Compare {
897            op: read_cmp_op(r)?,
898            value: read_cmp_value(r)?,
899        },
900        1 => MatchPredicate::Contains(r.str()?),
901        2 => MatchPredicate::NotContains(r.str()?),
902        3 => MatchPredicate::Regex(r.str()?),
903        _ => return Err(BytecodeError::Corrupt("predicate")),
904    })
905}
906
907fn cmp_op_tag(op: CmpOp) -> u8 {
908    match op {
909        CmpOp::Eq => 0,
910        CmpOp::Ne => 1,
911        CmpOp::Lt => 2,
912        CmpOp::Gt => 3,
913        CmpOp::Le => 4,
914        CmpOp::Ge => 5,
915    }
916}
917
918fn read_cmp_op(r: &mut Reader<'_>) -> Result<CmpOp, BytecodeError> {
919    Ok(match r.u8()? {
920        0 => CmpOp::Eq,
921        1 => CmpOp::Ne,
922        2 => CmpOp::Lt,
923        3 => CmpOp::Gt,
924        4 => CmpOp::Le,
925        5 => CmpOp::Ge,
926        _ => return Err(BytecodeError::Corrupt("cmp op")),
927    })
928}
929
930fn write_cmp_value(w: &mut Writer, value: &CmpValue) {
931    match value {
932        CmpValue::Number(n) => {
933            w.u8(0);
934            // Full u64 — earlier revisions truncated to u32, silently
935            // mangling comparisons against values above ~4.3 billion (e.g.
936            // `response_size > 5_000_000_000`).
937            w.u64(*n);
938        }
939        CmpValue::String(s) => {
940            w.u8(1);
941            w.str(s);
942        }
943        CmpValue::Duration(d) => {
944            w.u8(2);
945            w.str(d);
946        }
947    }
948}
949
950fn read_cmp_value(r: &mut Reader<'_>) -> Result<CmpValue, BytecodeError> {
951    Ok(match r.u8()? {
952        0 => CmpValue::Number(r.u64()?),
953        1 => CmpValue::String(r.str()?),
954        2 => CmpValue::Duration(r.str()?),
955        _ => return Err(BytecodeError::Corrupt("cmp value")),
956    })
957}
958
959fn write_extracts(w: &mut Writer, extracts: &[ExtractSource]) {
960    w.u32(extracts.len() as u32);
961    for e in extracts {
962        write_extract(w, e);
963    }
964}
965
966fn read_extracts(r: &mut Reader<'_>) -> Result<Vec<ExtractSource>, BytecodeError> {
967    let raw = r.u32()?;
968    let count = r.bounded_count(raw)?;
969    let mut extracts = Vec::with_capacity(count);
970    for _ in 0..count {
971        extracts.push(read_extract(r)?);
972    }
973    Ok(extracts)
974}
975
976fn write_extract(w: &mut Writer, e: &ExtractSource) {
977    match e {
978        ExtractSource::Body { target, regex } => {
979            w.u8(0);
980            w.str(target);
981            w.opt_str(regex);
982        }
983        ExtractSource::Header { target, name } => {
984            w.u8(1);
985            w.str(target);
986            w.str(name);
987        }
988    }
989}
990
991fn read_extract(r: &mut Reader<'_>) -> Result<ExtractSource, BytecodeError> {
992    Ok(match r.u8()? {
993        0 => ExtractSource::Body {
994            target: r.str()?,
995            regex: r.opt_str()?,
996        },
997        1 => ExtractSource::Header {
998            target: r.str()?,
999            name: r.str()?,
1000        },
1001        _ => return Err(BytecodeError::Corrupt("extract")),
1002    })
1003}
1004
1005fn write_evidence(w: &mut Writer, kinds: &[EvidenceKind]) {
1006    w.u32(kinds.len() as u32);
1007    for k in kinds {
1008        write_evidence_kind(w, k);
1009    }
1010}
1011
1012fn read_evidence(r: &mut Reader<'_>) -> Result<Vec<EvidenceKind>, BytecodeError> {
1013    let raw = r.u32()?;
1014    let count = r.bounded_count(raw)?;
1015    let mut kinds = Vec::with_capacity(count);
1016    for _ in 0..count {
1017        kinds.push(read_evidence_kind(r)?);
1018    }
1019    Ok(kinds)
1020}
1021
1022fn write_evidence_kind(w: &mut Writer, k: &EvidenceKind) {
1023    match k {
1024        EvidenceKind::BodyRef(target) => {
1025            w.u8(0);
1026            w.str(target);
1027        }
1028        EvidenceKind::ResponseRef(target) => {
1029            w.u8(2);
1030            w.str(target);
1031        }
1032        EvidenceKind::Regex { target, pattern } => {
1033            w.u8(1);
1034            w.str(target);
1035            w.str(pattern);
1036        }
1037    }
1038}
1039
1040fn read_evidence_kind(r: &mut Reader<'_>) -> Result<EvidenceKind, BytecodeError> {
1041    Ok(match r.u8()? {
1042        0 => EvidenceKind::BodyRef(r.str()?),
1043        1 => EvidenceKind::Regex {
1044            target: r.str()?,
1045            pattern: r.str()?,
1046        },
1047        2 => EvidenceKind::ResponseRef(r.str()?),
1048        _ => return Err(BytecodeError::Corrupt("evidence")),
1049    })
1050}
1051
1052const OP_SET: u8 = 1;
1053const OP_SEND: u8 = 2;
1054const OP_MATCH: u8 = 3;
1055const OP_MATCH_ALL: u8 = 4;
1056const OP_MATCH_ANY: u8 = 5;
1057const OP_ASSERT: u8 = 6;
1058const OP_EXTRACT: u8 = 7;
1059const OP_IF_MATCH: u8 = 8;
1060const OP_SAVE: u8 = 9;
1061const OP_EVIDENCE: u8 = 10;
1062const OP_RETRY: u8 = 11;
1063const OP_RETRY_DELAY: u8 = 12;
1064const OP_SLEEP: u8 = 13;
1065const OP_STOP: u8 = 14;
1066const OP_FAIL: u8 = 15;
1067const OP_CONTINUE: u8 = 16;
1068const OP_EXIT: u8 = 17;
1069// 18 reserved: was `Repeat`, removed. Decoding it now yields an unknown-opcode
1070// error (no published bytecode uses it).
1071const OP_LOOP_BACK: u8 = 19;
1072const OP_BREAK: u8 = 20;
1073const OP_SET_LIST: u8 = 21;
1074const OP_FOR_LIST: u8 = 22;
1075const OP_FOR_VAR: u8 = 23;
1076
1077fn write_code(w: &mut Writer, code: &[Instr]) {
1078    w.u32(code.len() as u32);
1079    for instr in code {
1080        write_instr(w, instr);
1081    }
1082}
1083
1084fn read_code(r: &mut Reader<'_>) -> Result<Vec<Instr>, BytecodeError> {
1085    let raw = r.u32()?;
1086    let count = r.bounded_count(raw)?;
1087    let mut code = Vec::with_capacity(count);
1088    for _ in 0..count {
1089        code.push(read_instr(r)?);
1090    }
1091    Ok(code)
1092}
1093
1094fn write_instr(w: &mut Writer, instr: &Instr) {
1095    match instr {
1096        Instr::Set { name, value } => {
1097            w.u8(OP_SET);
1098            w.u32(*name);
1099            w.u32(*value);
1100        }
1101        Instr::SetList { name, start, len } => {
1102            w.u8(OP_SET_LIST);
1103            w.u32(*name);
1104            w.u32(*start);
1105            w.u16(*len);
1106        }
1107        Instr::Send { probe, payload } => {
1108            w.u8(OP_SEND);
1109            w.u32(*probe);
1110            match payload {
1111                Some(id) => {
1112                    w.u8(1);
1113                    w.u32(*id);
1114                }
1115                None => w.u8(0),
1116            }
1117        }
1118        Instr::Match(matcher) => {
1119            w.u8(OP_MATCH);
1120            w.u32(*matcher);
1121        }
1122        Instr::MatchAll { start, len } => {
1123            w.u8(OP_MATCH_ALL);
1124            w.u32(*start);
1125            w.u16(*len);
1126        }
1127        Instr::MatchAny { start, len } => {
1128            w.u8(OP_MATCH_ANY);
1129            w.u32(*start);
1130            w.u16(*len);
1131        }
1132        Instr::Assert(matcher) => {
1133            w.u8(OP_ASSERT);
1134            w.u32(*matcher);
1135        }
1136        Instr::Extract { name, source } => {
1137            w.u8(OP_EXTRACT);
1138            w.u32(*name);
1139            w.u32(*source);
1140        }
1141        Instr::IfMatch { matcher, else_pc } => {
1142            w.u8(OP_IF_MATCH);
1143            w.u32(*matcher);
1144            w.u32(*else_pc);
1145        }
1146        Instr::ForList {
1147            item,
1148            start,
1149            len,
1150            end_pc,
1151        } => {
1152            w.u8(OP_FOR_LIST);
1153            w.u32(*item);
1154            w.u32(*start);
1155            w.u16(*len);
1156            w.u32(*end_pc);
1157        }
1158        Instr::ForVar { item, list, end_pc } => {
1159            w.u8(OP_FOR_VAR);
1160            w.u32(*item);
1161            w.u32(*list);
1162            w.u32(*end_pc);
1163        }
1164        Instr::LoopBack => w.u8(OP_LOOP_BACK),
1165        Instr::Break => w.u8(OP_BREAK),
1166        Instr::Save { from, to } => {
1167            w.u8(OP_SAVE);
1168            w.u32(*from);
1169            w.u32(*to);
1170        }
1171        Instr::Evidence(kind) => {
1172            w.u8(OP_EVIDENCE);
1173            w.u32(*kind);
1174        }
1175        Instr::Retry { probe, count } => {
1176            w.u8(OP_RETRY);
1177            w.u32(*probe);
1178            w.u32(*count);
1179        }
1180        Instr::RetryDelay(value) => {
1181            w.u8(OP_RETRY_DELAY);
1182            w.u32(*value);
1183        }
1184        Instr::Sleep(value) => {
1185            w.u8(OP_SLEEP);
1186            w.u32(*value);
1187        }
1188        Instr::Stop => w.u8(OP_STOP),
1189        Instr::Fail => w.u8(OP_FAIL),
1190        Instr::Continue => w.u8(OP_CONTINUE),
1191        Instr::Exit => w.u8(OP_EXIT),
1192    }
1193}
1194
1195fn read_instr(r: &mut Reader<'_>) -> Result<Instr, BytecodeError> {
1196    Ok(match r.u8()? {
1197        OP_SET => Instr::Set {
1198            name: r.u32()?,
1199            value: r.u32()?,
1200        },
1201        OP_SET_LIST => Instr::SetList {
1202            name: r.u32()?,
1203            start: r.u32()?,
1204            len: r.u16()?,
1205        },
1206        OP_SEND => {
1207            let probe = r.u32()?;
1208            let payload = if r.u8()? == 0 { None } else { Some(r.u32()?) };
1209            Instr::Send { probe, payload }
1210        }
1211        OP_MATCH => Instr::Match(r.u32()?),
1212        OP_MATCH_ALL => Instr::MatchAll {
1213            start: r.u32()?,
1214            len: r.u16()?,
1215        },
1216        OP_MATCH_ANY => Instr::MatchAny {
1217            start: r.u32()?,
1218            len: r.u16()?,
1219        },
1220        OP_ASSERT => Instr::Assert(r.u32()?),
1221        OP_EXTRACT => Instr::Extract {
1222            name: r.u32()?,
1223            source: r.u32()?,
1224        },
1225        OP_IF_MATCH => Instr::IfMatch {
1226            matcher: r.u32()?,
1227            else_pc: r.u32()?,
1228        },
1229        OP_FOR_LIST => Instr::ForList {
1230            item: r.u32()?,
1231            start: r.u32()?,
1232            len: r.u16()?,
1233            end_pc: r.u32()?,
1234        },
1235        OP_FOR_VAR => Instr::ForVar {
1236            item: r.u32()?,
1237            list: r.u32()?,
1238            end_pc: r.u32()?,
1239        },
1240        OP_LOOP_BACK => Instr::LoopBack,
1241        OP_BREAK => Instr::Break,
1242        OP_SAVE => Instr::Save {
1243            from: r.u32()?,
1244            to: r.u32()?,
1245        },
1246        OP_EVIDENCE => Instr::Evidence(r.u32()?),
1247        OP_RETRY => Instr::Retry {
1248            probe: r.u32()?,
1249            count: r.u32()?,
1250        },
1251        OP_RETRY_DELAY => Instr::RetryDelay(r.u32()?),
1252        OP_SLEEP => Instr::Sleep(r.u32()?),
1253        OP_STOP => Instr::Stop,
1254        OP_FAIL => Instr::Fail,
1255        OP_CONTINUE => Instr::Continue,
1256        OP_EXIT => Instr::Exit,
1257        _ => return Err(BytecodeError::Corrupt("opcode")),
1258    })
1259}
1260
1261#[cfg(test)]
1262mod tests {
1263    use super::*;
1264
1265    #[test]
1266    fn hex_roundtrip() {
1267        let bytes = vec![0x52, 0x55, 0x53, 0x4f, 0x01, 0xff];
1268        let hex = bytes_to_hex(&bytes);
1269        assert_eq!(hex_to_bytes(&hex).unwrap(), bytes);
1270    }
1271
1272    #[test]
1273    fn hex_rejects_non_hex_chars() {
1274        match hex_to_bytes("zz") {
1275            Err(BytecodeError::InvalidHex(_)) => {}
1276            other => panic!("expected InvalidHex, got {other:?}"),
1277        }
1278    }
1279
1280    #[test]
1281    fn read_severity_rejects_unknown_byte() {
1282        let mut r = Reader::new(&[0x99]);
1283        match read_severity(&mut r) {
1284            Err(BytecodeError::Corrupt("severity")) => {}
1285            other => panic!("expected Corrupt(severity), got {other:?}"),
1286        }
1287    }
1288
1289    #[test]
1290    fn read_severity_accepts_known_bytes() {
1291        for (byte, expected) in [
1292            (0u8, Severity::Low),
1293            (1, Severity::Medium),
1294            (2, Severity::High),
1295            (3, Severity::Critical),
1296            (4, Severity::Info),
1297        ] {
1298            let data = [byte];
1299            let mut r = Reader::new(&data);
1300            assert_eq!(read_severity(&mut r).unwrap(), expected);
1301        }
1302    }
1303
1304    #[test]
1305    fn read_http_method_rejects_unknown_byte() {
1306        let mut r = Reader::new(&[0xff]);
1307        assert!(matches!(
1308            read_http_method(&mut r),
1309            Err(BytecodeError::Corrupt("http method"))
1310        ));
1311    }
1312
1313    #[test]
1314    fn read_http_method_accepts_head_and_options() {
1315        for (byte, expected) in [(5u8, HttpMethod::Head), (6, HttpMethod::Options)] {
1316            let data = [byte];
1317            let mut r = Reader::new(&data);
1318            assert_eq!(read_http_method(&mut r).unwrap(), expected);
1319        }
1320    }
1321
1322    #[test]
1323    fn read_cmp_op_rejects_unknown_byte() {
1324        let mut r = Reader::new(&[0xfe]);
1325        assert!(matches!(
1326            read_cmp_op(&mut r),
1327            Err(BytecodeError::Corrupt("cmp op"))
1328        ));
1329    }
1330
1331    #[test]
1332    fn read_cmp_value_rejects_unknown_byte() {
1333        let mut r = Reader::new(&[0x77]);
1334        assert!(matches!(
1335            read_cmp_value(&mut r),
1336            Err(BytecodeError::Corrupt("cmp value"))
1337        ));
1338    }
1339
1340    #[test]
1341    fn cmp_number_roundtrips_full_u64() {
1342        // Regression for the u32 → u64 widening: writing wrapped to u32 and
1343        // silently truncated large numbers. The wire format preserves the
1344        // full u64.
1345        let mut w = Writer::default();
1346        let value = CmpValue::Number(u64::MAX - 5);
1347        write_cmp_value(&mut w, &value);
1348        let mut r = Reader::new(&w.0);
1349        assert_eq!(read_cmp_value(&mut r).unwrap(), value);
1350    }
1351
1352    #[test]
1353    fn bounded_count_rejects_oversized_count() {
1354        // Attacker writes count = u32::MAX with only a few bytes following.
1355        // Without bounding this triggers a multi-GB `Vec::with_capacity`.
1356        let mut payload = Vec::new();
1357        payload.extend_from_slice(&u32::MAX.to_le_bytes());
1358        let mut r = Reader::new(&payload);
1359        let raw = r.u32().unwrap();
1360        assert!(matches!(
1361            r.bounded_count(raw),
1362            Err(BytecodeError::Corrupt(
1363                "list length exceeds remaining bytes"
1364            ))
1365        ));
1366    }
1367
1368    #[test]
1369    fn read_strings_rejects_oversized_count() {
1370        // Bytecode: "RUSO" + version + huge string count + nothing else.
1371        let mut bad = Vec::new();
1372        bad.extend_from_slice(MAGIC);
1373        bad.push(VERSION);
1374        // skip past metadata/probes by hand-crafting minimal valid prefix
1375        // — instead exercise read_strings directly.
1376        let mut buf = Vec::new();
1377        buf.extend_from_slice(&u32::MAX.to_le_bytes());
1378        let mut r = Reader::new(&buf);
1379        assert!(matches!(
1380            read_strings(&mut r),
1381            Err(BytecodeError::Corrupt(_))
1382        ));
1383    }
1384
1385    #[test]
1386    fn read_payloads_rejects_oversized_count() {
1387        let mut buf = Vec::new();
1388        buf.extend_from_slice(&u32::MAX.to_le_bytes());
1389        let mut r = Reader::new(&buf);
1390        assert!(matches!(
1391            read_payloads(&mut r),
1392            Err(BytecodeError::Corrupt(_))
1393        ));
1394    }
1395
1396    #[test]
1397    fn read_payloads_rejects_oversized_payload_length() {
1398        // count = 1 (valid), but the single payload claims a huge length.
1399        let mut buf = Vec::new();
1400        buf.extend_from_slice(&1u32.to_le_bytes()); // count
1401        buf.extend_from_slice(&u32::MAX.to_le_bytes()); // payload length
1402        let mut r = Reader::new(&buf);
1403        assert!(matches!(
1404            read_payloads(&mut r),
1405            Err(BytecodeError::Corrupt(_))
1406        ));
1407    }
1408
1409    #[test]
1410    fn read_str_rejects_oversized_length() {
1411        let mut buf = Vec::new();
1412        buf.extend_from_slice(&u32::MAX.to_le_bytes());
1413        let mut r = Reader::new(&buf);
1414        assert!(matches!(r.str(), Err(BytecodeError::Corrupt(_))));
1415    }
1416
1417    #[test]
1418    fn decode_rejects_bad_version() {
1419        let mut buf = Vec::new();
1420        buf.extend_from_slice(MAGIC);
1421        buf.push(99); // unsupported version
1422        assert!(matches!(
1423            decode(&buf),
1424            Err(BytecodeError::BadVersion { found: 99, .. })
1425        ));
1426    }
1427
1428    #[test]
1429    fn decode_rejects_bad_magic() {
1430        let buf = [0u8, 0u8, 0u8, 0u8, VERSION];
1431        assert!(matches!(decode(&buf), Err(BytecodeError::BadMagic)));
1432    }
1433
1434    #[test]
1435    fn metadata_roundtrip_preserves_tags_and_lists() {
1436        let metadata = CheckMetadata {
1437            name: Some("Check".into()),
1438            description: None,
1439            impact: None,
1440            severity: Some(Severity::High),
1441            author: None,
1442            report_title: None,
1443            cve: vec!["CVE-2024-1".into()],
1444            cwe: vec!["CWE-79".into()],
1445            references: vec!["https://example.com".into()],
1446            cvss: vec![],
1447            cvss_score: vec![],
1448            mitigation: Some("patch".into()),
1449            tags: vec!["auth".into(), "rce".into()],
1450            version: Some("1.2.3".into()),
1451            family: Some("web".into()),
1452        };
1453        let mut w = Writer::default();
1454        write_metadata(&mut w, &metadata);
1455        let mut r = Reader::new(&w.0);
1456        let decoded = read_metadata(&mut r).unwrap();
1457        assert_eq!(decoded.tags, vec!["auth", "rce"]);
1458        assert_eq!(decoded.cve, vec!["CVE-2024-1"]);
1459        assert_eq!(decoded.mitigation.as_deref(), Some("patch"));
1460        assert_eq!(decoded.severity, Some(Severity::High));
1461        assert_eq!(decoded.version.as_deref(), Some("1.2.3"));
1462        assert_eq!(decoded.family.as_deref(), Some("web"));
1463    }
1464
1465    #[test]
1466    fn decode_rejects_out_of_range_string_index() {
1467        // A `Set { name: 7, value: 0 }` over an empty string pool would
1468        // panic the executor with an out-of-bounds index. decode() must
1469        // reject it as Corrupt instead.
1470        let program = BytecodeProgram {
1471            spec: ProgramSpec {
1472                probes: Default::default(),
1473                metadata: CheckMetadata::default(),
1474            },
1475            code: vec![Instr::Set { name: 7, value: 0 }],
1476            strings: vec![],
1477            payloads: vec![],
1478            matchers: vec![],
1479            extracts: vec![],
1480            evidence: vec![],
1481        };
1482        let bytes = encode(&program);
1483        match decode(&bytes) {
1484            Err(BytecodeError::Corrupt("operand index out of range")) => {}
1485            other => panic!("expected operand-index Corrupt, got {other:?}"),
1486        }
1487    }
1488
1489    #[test]
1490    fn decode_rejects_out_of_range_match_slice() {
1491        let program = BytecodeProgram {
1492            spec: ProgramSpec {
1493                probes: Default::default(),
1494                metadata: CheckMetadata::default(),
1495            },
1496            // MatchAll over [0,3) but the matcher pool is empty.
1497            code: vec![Instr::MatchAll { start: 0, len: 3 }],
1498            strings: vec![],
1499            payloads: vec![],
1500            matchers: vec![],
1501            extracts: vec![],
1502            evidence: vec![],
1503        };
1504        let bytes = encode(&program);
1505        assert!(matches!(decode(&bytes), Err(BytecodeError::Corrupt(_))));
1506    }
1507
1508    #[test]
1509    fn decode_accepts_in_range_operands() {
1510        let program = BytecodeProgram {
1511            spec: ProgramSpec {
1512                probes: Default::default(),
1513                metadata: CheckMetadata::default(),
1514            },
1515            code: vec![Instr::Set { name: 0, value: 1 }],
1516            strings: vec!["host".into(), "value".into()],
1517            payloads: vec![],
1518            matchers: vec![],
1519            extracts: vec![],
1520            evidence: vec![],
1521        };
1522        let bytes = encode(&program);
1523        let decoded = decode(&bytes).expect("valid operands round-trip");
1524        assert_eq!(decoded.code.len(), 1);
1525    }
1526
1527    #[test]
1528    fn load_bytecode_input_no_longer_reads_files() {
1529        // Earlier revisions accepted `@/path/to/file` to read raw bytecode.
1530        // That entry point is gone; `@…` should now be treated as hex input
1531        // and fail because `@` is not a hex digit.
1532        match load_bytecode_input("@/etc/passwd") {
1533            Err(BytecodeError::InvalidHex(_)) => {}
1534            other => panic!("expected InvalidHex, got {other:?}"),
1535        }
1536    }
1537}