Files
owncast-IRC-bridge/src/irc_format.rs
cottongin e2fbd52009 fix: format IRC ACTION messages as * nick action * and strip CTCP delimiters
CTCP ACTION messages (/me) were relayed with raw \x01 bytes, rendering
as boxed-X characters in OwnCast. Detect the ACTION pattern, extract the
body, and format it like traditional IRC clients. Also strip \x01 in
irc_format as a safety net for other CTCP leakage.

Made-with: Cursor
2026-03-12 16:37:46 -04:00

173 lines
4.6 KiB
Rust

/// Strip mIRC-style formatting control codes from a string.
///
/// Removes CTCP delimiter (\x01), bold (\x02), color (\x03 + optional fg[,bg]
/// digits), reset (\x0F), monospace (\x11), reverse (\x16), italic (\x1D),
/// strikethrough (\x1E), and underline (\x1F).
pub fn strip_formatting(input: &str) -> String {
let bytes = input.as_bytes();
let len = bytes.len();
let mut out: Vec<u8> = Vec::with_capacity(len);
let mut i = 0;
while i < len {
match bytes[i] {
b'\x01' | b'\x02' | b'\x0F' | b'\x11' | b'\x16' | b'\x1D' | b'\x1E' | b'\x1F' => {
i += 1;
}
b'\x03' => {
i += 1;
let mut digits = 0;
while i < len && digits < 2 && bytes[i].is_ascii_digit() {
i += 1;
digits += 1;
}
if i < len && bytes[i] == b',' && i + 1 < len && bytes[i + 1].is_ascii_digit() {
i += 1;
digits = 0;
while i < len && digits < 2 && bytes[i].is_ascii_digit() {
i += 1;
digits += 1;
}
}
}
b => {
out.push(b);
i += 1;
}
}
}
// IRC control codes are single-byte ASCII (< 0x80) so removing them from
// valid UTF-8 always yields valid UTF-8.
String::from_utf8(out).expect("stripping ASCII control codes preserves UTF-8")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn clean_text_unchanged() {
assert_eq!(strip_formatting("hello world"), "hello world");
}
#[test]
fn strips_bold() {
assert_eq!(strip_formatting("\x02bold\x02"), "bold");
}
#[test]
fn strips_italic() {
assert_eq!(strip_formatting("\x1Ditalic\x1D"), "italic");
}
#[test]
fn strips_underline() {
assert_eq!(strip_formatting("\x1Funderline\x1F"), "underline");
}
#[test]
fn strips_reset() {
assert_eq!(strip_formatting("styled\x0F plain"), "styled plain");
}
#[test]
fn strips_color_no_params() {
assert_eq!(strip_formatting("\x03hello"), "hello");
}
#[test]
fn strips_color_fg_only() {
assert_eq!(strip_formatting("\x034red text"), "red text");
}
#[test]
fn strips_color_two_digit_fg() {
assert_eq!(strip_formatting("\x0312blue text"), "blue text");
}
#[test]
fn strips_color_fg_and_bg() {
assert_eq!(strip_formatting("\x034,2red on blue"), "red on blue");
}
#[test]
fn strips_color_two_digit_fg_and_bg() {
assert_eq!(strip_formatting("\x0304,12colored"), "colored");
}
#[test]
fn color_comma_without_bg_digit_preserves_comma() {
// \x03 followed by digit then comma but no bg digit — comma is kept
assert_eq!(strip_formatting("\x034,text"), ",text");
}
#[test]
fn mixed_codes() {
assert_eq!(
strip_formatting("\x02\x034,5bold color\x0F normal"),
"bold color normal"
);
}
#[test]
fn color_at_end_of_string() {
assert_eq!(strip_formatting("text\x03"), "text");
assert_eq!(strip_formatting("text\x034"), "text");
assert_eq!(strip_formatting("text\x0304,"), "text,");
assert_eq!(strip_formatting("text\x0304,1"), "text");
}
#[test]
fn strips_monospace() {
assert_eq!(strip_formatting("\x11code\x11"), "code");
}
#[test]
fn strips_reverse() {
assert_eq!(strip_formatting("\x16reversed\x16"), "reversed");
}
#[test]
fn strips_strikethrough() {
assert_eq!(strip_formatting("\x1Estruck\x1E"), "struck");
}
#[test]
fn empty_input() {
assert_eq!(strip_formatting(""), "");
}
#[test]
fn preserves_multibyte_utf8() {
assert_eq!(strip_formatting("✨ hello ⚡"), "✨ hello ⚡");
}
#[test]
fn strips_codes_around_emoji() {
// \x034 = color fg 4; \x0333 = color fg 33, leaving trailing "3"
assert_eq!(
strip_formatting("\x02\x02 boosted \x034⚡\x03333 sats"),
"✨ boosted ⚡3 sats"
);
}
#[test]
fn preserves_cjk_and_accented_chars() {
assert_eq!(strip_formatting("\x02café\x02 日本語"), "café 日本語");
}
#[test]
fn strips_ctcp_delimiter() {
assert_eq!(strip_formatting("\x01ACTION sniffs\x01"), "ACTION sniffs");
}
#[test]
fn strips_ctcp_delimiter_mixed_with_formatting() {
assert_eq!(
strip_formatting("\x01\x02ACTION bold\x02\x01"),
"ACTION bold"
);
}
}