/// Strip mIRC-style formatting control codes from a string. /// /// Removes CTCP delimiter (\x01), bold (\x02), color (\x03 + optional fg[,bg] /// digits), reset (\x0F), monospace (\x11), reverse (\x16), italic (\x1D), /// strikethrough (\x1E), and underline (\x1F). pub fn strip_formatting(input: &str) -> String { let bytes = input.as_bytes(); let len = bytes.len(); let mut out: Vec = Vec::with_capacity(len); let mut i = 0; while i < len { match bytes[i] { b'\x01' | b'\x02' | b'\x0F' | b'\x11' | b'\x16' | b'\x1D' | b'\x1E' | b'\x1F' => { i += 1; } b'\x03' => { i += 1; let mut digits = 0; while i < len && digits < 2 && bytes[i].is_ascii_digit() { i += 1; digits += 1; } if i < len && bytes[i] == b',' && i + 1 < len && bytes[i + 1].is_ascii_digit() { i += 1; digits = 0; while i < len && digits < 2 && bytes[i].is_ascii_digit() { i += 1; digits += 1; } } } b => { out.push(b); i += 1; } } } // IRC control codes are single-byte ASCII (< 0x80) so removing them from // valid UTF-8 always yields valid UTF-8. String::from_utf8(out).expect("stripping ASCII control codes preserves UTF-8") } #[cfg(test)] mod tests { use super::*; #[test] fn clean_text_unchanged() { assert_eq!(strip_formatting("hello world"), "hello world"); } #[test] fn strips_bold() { assert_eq!(strip_formatting("\x02bold\x02"), "bold"); } #[test] fn strips_italic() { assert_eq!(strip_formatting("\x1Ditalic\x1D"), "italic"); } #[test] fn strips_underline() { assert_eq!(strip_formatting("\x1Funderline\x1F"), "underline"); } #[test] fn strips_reset() { assert_eq!(strip_formatting("styled\x0F plain"), "styled plain"); } #[test] fn strips_color_no_params() { assert_eq!(strip_formatting("\x03hello"), "hello"); } #[test] fn strips_color_fg_only() { assert_eq!(strip_formatting("\x034red text"), "red text"); } #[test] fn strips_color_two_digit_fg() { assert_eq!(strip_formatting("\x0312blue text"), "blue text"); } #[test] fn strips_color_fg_and_bg() { assert_eq!(strip_formatting("\x034,2red on blue"), "red on blue"); } #[test] fn strips_color_two_digit_fg_and_bg() { assert_eq!(strip_formatting("\x0304,12colored"), "colored"); } #[test] fn color_comma_without_bg_digit_preserves_comma() { // \x03 followed by digit then comma but no bg digit — comma is kept assert_eq!(strip_formatting("\x034,text"), ",text"); } #[test] fn mixed_codes() { assert_eq!( strip_formatting("\x02\x034,5bold color\x0F normal"), "bold color normal" ); } #[test] fn color_at_end_of_string() { assert_eq!(strip_formatting("text\x03"), "text"); assert_eq!(strip_formatting("text\x034"), "text"); assert_eq!(strip_formatting("text\x0304,"), "text,"); assert_eq!(strip_formatting("text\x0304,1"), "text"); } #[test] fn strips_monospace() { assert_eq!(strip_formatting("\x11code\x11"), "code"); } #[test] fn strips_reverse() { assert_eq!(strip_formatting("\x16reversed\x16"), "reversed"); } #[test] fn strips_strikethrough() { assert_eq!(strip_formatting("\x1Estruck\x1E"), "struck"); } #[test] fn empty_input() { assert_eq!(strip_formatting(""), ""); } #[test] fn preserves_multibyte_utf8() { assert_eq!(strip_formatting("✨ hello ⚡"), "✨ hello ⚡"); } #[test] fn strips_codes_around_emoji() { // \x034 = color fg 4; \x0333 = color fg 33, leaving trailing "3" assert_eq!( strip_formatting("\x02✨\x02 boosted \x034⚡\x03333 sats"), "✨ boosted ⚡3 sats" ); } #[test] fn preserves_cjk_and_accented_chars() { assert_eq!(strip_formatting("\x02café\x02 日本語"), "café 日本語"); } #[test] fn strips_ctcp_delimiter() { assert_eq!(strip_formatting("\x01ACTION sniffs\x01"), "ACTION sniffs"); } #[test] fn strips_ctcp_delimiter_mixed_with_formatting() { assert_eq!( strip_formatting("\x01\x02ACTION bold\x02\x01"), "ACTION bold" ); } }