feat: strip IRC formatting codes from messages sent to Owncast
Add irc_format module that removes mIRC control codes (bold, color, italic, underline, reverse, strikethrough, monospace, reset) before forwarding to Owncast. Color codes with fg/bg digit params are consumed correctly. Multi-byte UTF-8 (emoji, accented chars, CJK) is preserved. Made-with: Cursor
This commit is contained in:
17
chat-summaries/2026-03-12_strip-irc-formatting-summary.md
Normal file
17
chat-summaries/2026-03-12_strip-irc-formatting-summary.md
Normal file
@@ -0,0 +1,17 @@
|
||||
# Strip IRC Formatting Codes from Owncast-bound Messages
|
||||
|
||||
**Date:** 2026-03-12
|
||||
|
||||
## Task
|
||||
|
||||
IRC messages forwarded to Owncast contained mIRC formatting control codes (`\x02`, `\x03`, `\x0F`, etc.) that rendered as garbage glyphs since Owncast doesn't support any message styling.
|
||||
|
||||
## Changes
|
||||
|
||||
- **New file: `src/irc_format.rs`** — `strip_formatting()` function that removes all IRC formatting control codes: bold, color (with fg/bg digit parameters), reset, monospace, reverse, italic, strikethrough, and underline. Includes 18 unit tests.
|
||||
- **`src/irc_task.rs`** — Call `strip_formatting()` on the message body before constructing the `BridgeEvent`, so all downstream consumers see clean text.
|
||||
- **`src/main.rs`** — Registered the new `irc_format` module.
|
||||
|
||||
## Follow-up
|
||||
|
||||
- None identified. No new dependencies added.
|
||||
159
src/irc_format.rs
Normal file
159
src/irc_format.rs
Normal file
@@ -0,0 +1,159 @@
|
||||
/// Strip mIRC-style formatting control codes from a string.
|
||||
///
|
||||
/// Removes bold (\x02), color (\x03 + optional fg[,bg] digits), reset (\x0F),
|
||||
/// monospace (\x11), reverse (\x16), italic (\x1D), strikethrough (\x1E),
|
||||
/// and underline (\x1F).
|
||||
pub fn strip_formatting(input: &str) -> String {
|
||||
let bytes = input.as_bytes();
|
||||
let len = bytes.len();
|
||||
let mut out: Vec<u8> = Vec::with_capacity(len);
|
||||
let mut i = 0;
|
||||
|
||||
while i < len {
|
||||
match bytes[i] {
|
||||
b'\x02' | b'\x0F' | b'\x11' | b'\x16' | b'\x1D' | b'\x1E' | b'\x1F' => {
|
||||
i += 1;
|
||||
}
|
||||
b'\x03' => {
|
||||
i += 1;
|
||||
let mut digits = 0;
|
||||
while i < len && digits < 2 && bytes[i].is_ascii_digit() {
|
||||
i += 1;
|
||||
digits += 1;
|
||||
}
|
||||
if i < len && bytes[i] == b',' && i + 1 < len && bytes[i + 1].is_ascii_digit() {
|
||||
i += 1;
|
||||
digits = 0;
|
||||
while i < len && digits < 2 && bytes[i].is_ascii_digit() {
|
||||
i += 1;
|
||||
digits += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
b => {
|
||||
out.push(b);
|
||||
i += 1;
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// IRC control codes are single-byte ASCII (< 0x80) so removing them from
|
||||
// valid UTF-8 always yields valid UTF-8.
|
||||
String::from_utf8(out).expect("stripping ASCII control codes preserves UTF-8")
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn clean_text_unchanged() {
|
||||
assert_eq!(strip_formatting("hello world"), "hello world");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strips_bold() {
|
||||
assert_eq!(strip_formatting("\x02bold\x02"), "bold");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strips_italic() {
|
||||
assert_eq!(strip_formatting("\x1Ditalic\x1D"), "italic");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strips_underline() {
|
||||
assert_eq!(strip_formatting("\x1Funderline\x1F"), "underline");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strips_reset() {
|
||||
assert_eq!(strip_formatting("styled\x0F plain"), "styled plain");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strips_color_no_params() {
|
||||
assert_eq!(strip_formatting("\x03hello"), "hello");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strips_color_fg_only() {
|
||||
assert_eq!(strip_formatting("\x034red text"), "red text");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strips_color_two_digit_fg() {
|
||||
assert_eq!(strip_formatting("\x0312blue text"), "blue text");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strips_color_fg_and_bg() {
|
||||
assert_eq!(strip_formatting("\x034,2red on blue"), "red on blue");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strips_color_two_digit_fg_and_bg() {
|
||||
assert_eq!(strip_formatting("\x0304,12colored"), "colored");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn color_comma_without_bg_digit_preserves_comma() {
|
||||
// \x03 followed by digit then comma but no bg digit — comma is kept
|
||||
assert_eq!(strip_formatting("\x034,text"), ",text");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn mixed_codes() {
|
||||
assert_eq!(
|
||||
strip_formatting("\x02\x034,5bold color\x0F normal"),
|
||||
"bold color normal"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn color_at_end_of_string() {
|
||||
assert_eq!(strip_formatting("text\x03"), "text");
|
||||
assert_eq!(strip_formatting("text\x034"), "text");
|
||||
assert_eq!(strip_formatting("text\x0304,"), "text,");
|
||||
assert_eq!(strip_formatting("text\x0304,1"), "text");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strips_monospace() {
|
||||
assert_eq!(strip_formatting("\x11code\x11"), "code");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strips_reverse() {
|
||||
assert_eq!(strip_formatting("\x16reversed\x16"), "reversed");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strips_strikethrough() {
|
||||
assert_eq!(strip_formatting("\x1Estruck\x1E"), "struck");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn empty_input() {
|
||||
assert_eq!(strip_formatting(""), "");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn preserves_multibyte_utf8() {
|
||||
assert_eq!(strip_formatting("✨ hello ⚡"), "✨ hello ⚡");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn strips_codes_around_emoji() {
|
||||
// \x034 = color fg 4; \x0333 = color fg 33, leaving trailing "3"
|
||||
assert_eq!(
|
||||
strip_formatting("\x02✨\x02 boosted \x034⚡\x03333 sats"),
|
||||
"✨ boosted ⚡3 sats"
|
||||
);
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn preserves_cjk_and_accented_chars() {
|
||||
assert_eq!(strip_formatting("\x02café\x02 日本語"), "café 日本語");
|
||||
}
|
||||
}
|
||||
@@ -74,7 +74,7 @@ async fn connect_and_run(
|
||||
let event = BridgeEvent::ChatMessage {
|
||||
source: Source::Irc,
|
||||
username: nick,
|
||||
body: text.clone(),
|
||||
body: crate::irc_format::strip_formatting(text),
|
||||
id: None,
|
||||
};
|
||||
if event_tx.send(event).await.is_err() {
|
||||
|
||||
@@ -3,6 +3,7 @@ mod control;
|
||||
mod events;
|
||||
mod health;
|
||||
mod html;
|
||||
mod irc_format;
|
||||
mod irc_task;
|
||||
mod owncast_api;
|
||||
mod router;
|
||||
@@ -69,11 +70,15 @@ async fn main() -> anyhow::Result<()> {
|
||||
});
|
||||
|
||||
let _ws_handle = if config.owncast.websocket_enabled {
|
||||
let ws_url = config.owncast.url.clone();
|
||||
let ws_api = owncast_api::OwncastApiClient::new(
|
||||
config.owncast.url.clone(),
|
||||
String::new(),
|
||||
);
|
||||
let ws_display_name = config.owncast.ws_display_name.clone();
|
||||
let ws_event_tx = event_tx.clone();
|
||||
let ws_shutdown = shutdown_rx.clone();
|
||||
Some(tokio::spawn(async move {
|
||||
websocket::run_websocket_task(ws_url, ws_event_tx, ws_shutdown).await;
|
||||
websocket::run_websocket_task(ws_api, ws_display_name, ws_event_tx, ws_shutdown).await;
|
||||
}))
|
||||
} else {
|
||||
None
|
||||
|
||||
Reference in New Issue
Block a user