feat: strip IRC formatting codes from messages sent to Owncast

Add irc_format module that removes mIRC control codes (bold, color,
italic, underline, reverse, strikethrough, monospace, reset) before
forwarding to Owncast. Color codes with fg/bg digit params are
consumed correctly. Multi-byte UTF-8 (emoji, accented chars, CJK)
is preserved.

Made-with: Cursor
This commit is contained in:
cottongin
2026-03-12 14:07:01 -04:00
parent 196997f728
commit 65471fb9fc
4 changed files with 184 additions and 3 deletions

View File

@@ -0,0 +1,17 @@
# Strip IRC Formatting Codes from Owncast-bound Messages
**Date:** 2026-03-12
## Task
IRC messages forwarded to Owncast contained mIRC formatting control codes (`\x02`, `\x03`, `\x0F`, etc.) that rendered as garbage glyphs since Owncast doesn't support any message styling.
## Changes
- **New file: `src/irc_format.rs`** — `strip_formatting()` function that removes all IRC formatting control codes: bold, color (with fg/bg digit parameters), reset, monospace, reverse, italic, strikethrough, and underline. Includes 18 unit tests.
- **`src/irc_task.rs`** — Call `strip_formatting()` on the message body before constructing the `BridgeEvent`, so all downstream consumers see clean text.
- **`src/main.rs`** — Registered the new `irc_format` module.
## Follow-up
- None identified. No new dependencies added.

159
src/irc_format.rs Normal file
View File

@@ -0,0 +1,159 @@
/// Strip mIRC-style formatting control codes from a string.
///
/// Removes bold (\x02), color (\x03 + optional fg[,bg] digits), reset (\x0F),
/// monospace (\x11), reverse (\x16), italic (\x1D), strikethrough (\x1E),
/// and underline (\x1F).
pub fn strip_formatting(input: &str) -> String {
let bytes = input.as_bytes();
let len = bytes.len();
let mut out: Vec<u8> = Vec::with_capacity(len);
let mut i = 0;
while i < len {
match bytes[i] {
b'\x02' | b'\x0F' | b'\x11' | b'\x16' | b'\x1D' | b'\x1E' | b'\x1F' => {
i += 1;
}
b'\x03' => {
i += 1;
let mut digits = 0;
while i < len && digits < 2 && bytes[i].is_ascii_digit() {
i += 1;
digits += 1;
}
if i < len && bytes[i] == b',' && i + 1 < len && bytes[i + 1].is_ascii_digit() {
i += 1;
digits = 0;
while i < len && digits < 2 && bytes[i].is_ascii_digit() {
i += 1;
digits += 1;
}
}
}
b => {
out.push(b);
i += 1;
}
}
}
// IRC control codes are single-byte ASCII (< 0x80) so removing them from
// valid UTF-8 always yields valid UTF-8.
String::from_utf8(out).expect("stripping ASCII control codes preserves UTF-8")
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn clean_text_unchanged() {
assert_eq!(strip_formatting("hello world"), "hello world");
}
#[test]
fn strips_bold() {
assert_eq!(strip_formatting("\x02bold\x02"), "bold");
}
#[test]
fn strips_italic() {
assert_eq!(strip_formatting("\x1Ditalic\x1D"), "italic");
}
#[test]
fn strips_underline() {
assert_eq!(strip_formatting("\x1Funderline\x1F"), "underline");
}
#[test]
fn strips_reset() {
assert_eq!(strip_formatting("styled\x0F plain"), "styled plain");
}
#[test]
fn strips_color_no_params() {
assert_eq!(strip_formatting("\x03hello"), "hello");
}
#[test]
fn strips_color_fg_only() {
assert_eq!(strip_formatting("\x034red text"), "red text");
}
#[test]
fn strips_color_two_digit_fg() {
assert_eq!(strip_formatting("\x0312blue text"), "blue text");
}
#[test]
fn strips_color_fg_and_bg() {
assert_eq!(strip_formatting("\x034,2red on blue"), "red on blue");
}
#[test]
fn strips_color_two_digit_fg_and_bg() {
assert_eq!(strip_formatting("\x0304,12colored"), "colored");
}
#[test]
fn color_comma_without_bg_digit_preserves_comma() {
// \x03 followed by digit then comma but no bg digit — comma is kept
assert_eq!(strip_formatting("\x034,text"), ",text");
}
#[test]
fn mixed_codes() {
assert_eq!(
strip_formatting("\x02\x034,5bold color\x0F normal"),
"bold color normal"
);
}
#[test]
fn color_at_end_of_string() {
assert_eq!(strip_formatting("text\x03"), "text");
assert_eq!(strip_formatting("text\x034"), "text");
assert_eq!(strip_formatting("text\x0304,"), "text,");
assert_eq!(strip_formatting("text\x0304,1"), "text");
}
#[test]
fn strips_monospace() {
assert_eq!(strip_formatting("\x11code\x11"), "code");
}
#[test]
fn strips_reverse() {
assert_eq!(strip_formatting("\x16reversed\x16"), "reversed");
}
#[test]
fn strips_strikethrough() {
assert_eq!(strip_formatting("\x1Estruck\x1E"), "struck");
}
#[test]
fn empty_input() {
assert_eq!(strip_formatting(""), "");
}
#[test]
fn preserves_multibyte_utf8() {
assert_eq!(strip_formatting("✨ hello ⚡"), "✨ hello ⚡");
}
#[test]
fn strips_codes_around_emoji() {
// \x034 = color fg 4; \x0333 = color fg 33, leaving trailing "3"
assert_eq!(
strip_formatting("\x02\x02 boosted \x034⚡\x03333 sats"),
"✨ boosted ⚡3 sats"
);
}
#[test]
fn preserves_cjk_and_accented_chars() {
assert_eq!(strip_formatting("\x02café\x02 日本語"), "café 日本語");
}
}

View File

@@ -74,7 +74,7 @@ async fn connect_and_run(
let event = BridgeEvent::ChatMessage {
source: Source::Irc,
username: nick,
body: text.clone(),
body: crate::irc_format::strip_formatting(text),
id: None,
};
if event_tx.send(event).await.is_err() {

View File

@@ -3,6 +3,7 @@ mod control;
mod events;
mod health;
mod html;
mod irc_format;
mod irc_task;
mod owncast_api;
mod router;
@@ -69,11 +70,15 @@ async fn main() -> anyhow::Result<()> {
});
let _ws_handle = if config.owncast.websocket_enabled {
let ws_url = config.owncast.url.clone();
let ws_api = owncast_api::OwncastApiClient::new(
config.owncast.url.clone(),
String::new(),
);
let ws_display_name = config.owncast.ws_display_name.clone();
let ws_event_tx = event_tx.clone();
let ws_shutdown = shutdown_rx.clone();
Some(tokio::spawn(async move {
websocket::run_websocket_task(ws_url, ws_event_tx, ws_shutdown).await;
websocket::run_websocket_task(ws_api, ws_display_name, ws_event_tx, ws_shutdown).await;
}))
} else {
None