From 65471fb9fc8d716744b0d10e5d404b115a916d59 Mon Sep 17 00:00:00 2001 From: cottongin Date: Thu, 12 Mar 2026 14:07:01 -0400 Subject: [PATCH] feat: strip IRC formatting codes from messages sent to Owncast Add irc_format module that removes mIRC control codes (bold, color, italic, underline, reverse, strikethrough, monospace, reset) before forwarding to Owncast. Color codes with fg/bg digit params are consumed correctly. Multi-byte UTF-8 (emoji, accented chars, CJK) is preserved. Made-with: Cursor --- ...2026-03-12_strip-irc-formatting-summary.md | 17 ++ src/irc_format.rs | 159 ++++++++++++++++++ src/irc_task.rs | 2 +- src/main.rs | 9 +- 4 files changed, 184 insertions(+), 3 deletions(-) create mode 100644 chat-summaries/2026-03-12_strip-irc-formatting-summary.md create mode 100644 src/irc_format.rs diff --git a/chat-summaries/2026-03-12_strip-irc-formatting-summary.md b/chat-summaries/2026-03-12_strip-irc-formatting-summary.md new file mode 100644 index 0000000..08ff2af --- /dev/null +++ b/chat-summaries/2026-03-12_strip-irc-formatting-summary.md @@ -0,0 +1,17 @@ +# Strip IRC Formatting Codes from Owncast-bound Messages + +**Date:** 2026-03-12 + +## Task + +IRC messages forwarded to Owncast contained mIRC formatting control codes (`\x02`, `\x03`, `\x0F`, etc.) that rendered as garbage glyphs since Owncast doesn't support any message styling. + +## Changes + +- **New file: `src/irc_format.rs`** — `strip_formatting()` function that removes all IRC formatting control codes: bold, color (with fg/bg digit parameters), reset, monospace, reverse, italic, strikethrough, and underline. Includes 18 unit tests. +- **`src/irc_task.rs`** — Call `strip_formatting()` on the message body before constructing the `BridgeEvent`, so all downstream consumers see clean text. +- **`src/main.rs`** — Registered the new `irc_format` module. + +## Follow-up + +- None identified. No new dependencies added. diff --git a/src/irc_format.rs b/src/irc_format.rs new file mode 100644 index 0000000..e062310 --- /dev/null +++ b/src/irc_format.rs @@ -0,0 +1,159 @@ +/// Strip mIRC-style formatting control codes from a string. +/// +/// Removes bold (\x02), color (\x03 + optional fg[,bg] digits), reset (\x0F), +/// monospace (\x11), reverse (\x16), italic (\x1D), strikethrough (\x1E), +/// and underline (\x1F). +pub fn strip_formatting(input: &str) -> String { + let bytes = input.as_bytes(); + let len = bytes.len(); + let mut out: Vec = Vec::with_capacity(len); + let mut i = 0; + + while i < len { + match bytes[i] { + b'\x02' | b'\x0F' | b'\x11' | b'\x16' | b'\x1D' | b'\x1E' | b'\x1F' => { + i += 1; + } + b'\x03' => { + i += 1; + let mut digits = 0; + while i < len && digits < 2 && bytes[i].is_ascii_digit() { + i += 1; + digits += 1; + } + if i < len && bytes[i] == b',' && i + 1 < len && bytes[i + 1].is_ascii_digit() { + i += 1; + digits = 0; + while i < len && digits < 2 && bytes[i].is_ascii_digit() { + i += 1; + digits += 1; + } + } + } + b => { + out.push(b); + i += 1; + } + } + } + + // IRC control codes are single-byte ASCII (< 0x80) so removing them from + // valid UTF-8 always yields valid UTF-8. + String::from_utf8(out).expect("stripping ASCII control codes preserves UTF-8") +} + +#[cfg(test)] +mod tests { + use super::*; + + #[test] + fn clean_text_unchanged() { + assert_eq!(strip_formatting("hello world"), "hello world"); + } + + #[test] + fn strips_bold() { + assert_eq!(strip_formatting("\x02bold\x02"), "bold"); + } + + #[test] + fn strips_italic() { + assert_eq!(strip_formatting("\x1Ditalic\x1D"), "italic"); + } + + #[test] + fn strips_underline() { + assert_eq!(strip_formatting("\x1Funderline\x1F"), "underline"); + } + + #[test] + fn strips_reset() { + assert_eq!(strip_formatting("styled\x0F plain"), "styled plain"); + } + + #[test] + fn strips_color_no_params() { + assert_eq!(strip_formatting("\x03hello"), "hello"); + } + + #[test] + fn strips_color_fg_only() { + assert_eq!(strip_formatting("\x034red text"), "red text"); + } + + #[test] + fn strips_color_two_digit_fg() { + assert_eq!(strip_formatting("\x0312blue text"), "blue text"); + } + + #[test] + fn strips_color_fg_and_bg() { + assert_eq!(strip_formatting("\x034,2red on blue"), "red on blue"); + } + + #[test] + fn strips_color_two_digit_fg_and_bg() { + assert_eq!(strip_formatting("\x0304,12colored"), "colored"); + } + + #[test] + fn color_comma_without_bg_digit_preserves_comma() { + // \x03 followed by digit then comma but no bg digit — comma is kept + assert_eq!(strip_formatting("\x034,text"), ",text"); + } + + #[test] + fn mixed_codes() { + assert_eq!( + strip_formatting("\x02\x034,5bold color\x0F normal"), + "bold color normal" + ); + } + + #[test] + fn color_at_end_of_string() { + assert_eq!(strip_formatting("text\x03"), "text"); + assert_eq!(strip_formatting("text\x034"), "text"); + assert_eq!(strip_formatting("text\x0304,"), "text,"); + assert_eq!(strip_formatting("text\x0304,1"), "text"); + } + + #[test] + fn strips_monospace() { + assert_eq!(strip_formatting("\x11code\x11"), "code"); + } + + #[test] + fn strips_reverse() { + assert_eq!(strip_formatting("\x16reversed\x16"), "reversed"); + } + + #[test] + fn strips_strikethrough() { + assert_eq!(strip_formatting("\x1Estruck\x1E"), "struck"); + } + + #[test] + fn empty_input() { + assert_eq!(strip_formatting(""), ""); + } + + #[test] + fn preserves_multibyte_utf8() { + assert_eq!(strip_formatting("✨ hello ⚡"), "✨ hello ⚡"); + } + + #[test] + fn strips_codes_around_emoji() { + // \x034 = color fg 4; \x0333 = color fg 33, leaving trailing "3" + assert_eq!( + strip_formatting("\x02✨\x02 boosted \x034⚡\x03333 sats"), + "✨ boosted ⚡3 sats" + ); + } + + #[test] + fn preserves_cjk_and_accented_chars() { + assert_eq!(strip_formatting("\x02café\x02 日本語"), "café 日本語"); + } +} diff --git a/src/irc_task.rs b/src/irc_task.rs index 0b1d5cb..77ead5f 100644 --- a/src/irc_task.rs +++ b/src/irc_task.rs @@ -74,7 +74,7 @@ async fn connect_and_run( let event = BridgeEvent::ChatMessage { source: Source::Irc, username: nick, - body: text.clone(), + body: crate::irc_format::strip_formatting(text), id: None, }; if event_tx.send(event).await.is_err() { diff --git a/src/main.rs b/src/main.rs index fa16771..6bf288a 100644 --- a/src/main.rs +++ b/src/main.rs @@ -3,6 +3,7 @@ mod control; mod events; mod health; mod html; +mod irc_format; mod irc_task; mod owncast_api; mod router; @@ -69,11 +70,15 @@ async fn main() -> anyhow::Result<()> { }); let _ws_handle = if config.owncast.websocket_enabled { - let ws_url = config.owncast.url.clone(); + let ws_api = owncast_api::OwncastApiClient::new( + config.owncast.url.clone(), + String::new(), + ); + let ws_display_name = config.owncast.ws_display_name.clone(); let ws_event_tx = event_tx.clone(); let ws_shutdown = shutdown_rx.clone(); Some(tokio::spawn(async move { - websocket::run_websocket_task(ws_url, ws_event_tx, ws_shutdown).await; + websocket::run_websocket_task(ws_api, ws_display_name, ws_event_tx, ws_shutdown).await; })) } else { None