160 lines
4.3 KiB
Rust
160 lines
4.3 KiB
Rust
|
|
/// Strip mIRC-style formatting control codes from a string.
|
||
|
|
///
|
||
|
|
/// Removes bold (\x02), color (\x03 + optional fg[,bg] digits), reset (\x0F),
|
||
|
|
/// monospace (\x11), reverse (\x16), italic (\x1D), strikethrough (\x1E),
|
||
|
|
/// and underline (\x1F).
|
||
|
|
pub fn strip_formatting(input: &str) -> String {
|
||
|
|
let bytes = input.as_bytes();
|
||
|
|
let len = bytes.len();
|
||
|
|
let mut out: Vec<u8> = Vec::with_capacity(len);
|
||
|
|
let mut i = 0;
|
||
|
|
|
||
|
|
while i < len {
|
||
|
|
match bytes[i] {
|
||
|
|
b'\x02' | b'\x0F' | b'\x11' | b'\x16' | b'\x1D' | b'\x1E' | b'\x1F' => {
|
||
|
|
i += 1;
|
||
|
|
}
|
||
|
|
b'\x03' => {
|
||
|
|
i += 1;
|
||
|
|
let mut digits = 0;
|
||
|
|
while i < len && digits < 2 && bytes[i].is_ascii_digit() {
|
||
|
|
i += 1;
|
||
|
|
digits += 1;
|
||
|
|
}
|
||
|
|
if i < len && bytes[i] == b',' && i + 1 < len && bytes[i + 1].is_ascii_digit() {
|
||
|
|
i += 1;
|
||
|
|
digits = 0;
|
||
|
|
while i < len && digits < 2 && bytes[i].is_ascii_digit() {
|
||
|
|
i += 1;
|
||
|
|
digits += 1;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
b => {
|
||
|
|
out.push(b);
|
||
|
|
i += 1;
|
||
|
|
}
|
||
|
|
}
|
||
|
|
}
|
||
|
|
|
||
|
|
// IRC control codes are single-byte ASCII (< 0x80) so removing them from
|
||
|
|
// valid UTF-8 always yields valid UTF-8.
|
||
|
|
String::from_utf8(out).expect("stripping ASCII control codes preserves UTF-8")
|
||
|
|
}
|
||
|
|
|
||
|
|
#[cfg(test)]
|
||
|
|
mod tests {
|
||
|
|
use super::*;
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn clean_text_unchanged() {
|
||
|
|
assert_eq!(strip_formatting("hello world"), "hello world");
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn strips_bold() {
|
||
|
|
assert_eq!(strip_formatting("\x02bold\x02"), "bold");
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn strips_italic() {
|
||
|
|
assert_eq!(strip_formatting("\x1Ditalic\x1D"), "italic");
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn strips_underline() {
|
||
|
|
assert_eq!(strip_formatting("\x1Funderline\x1F"), "underline");
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn strips_reset() {
|
||
|
|
assert_eq!(strip_formatting("styled\x0F plain"), "styled plain");
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn strips_color_no_params() {
|
||
|
|
assert_eq!(strip_formatting("\x03hello"), "hello");
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn strips_color_fg_only() {
|
||
|
|
assert_eq!(strip_formatting("\x034red text"), "red text");
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn strips_color_two_digit_fg() {
|
||
|
|
assert_eq!(strip_formatting("\x0312blue text"), "blue text");
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn strips_color_fg_and_bg() {
|
||
|
|
assert_eq!(strip_formatting("\x034,2red on blue"), "red on blue");
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn strips_color_two_digit_fg_and_bg() {
|
||
|
|
assert_eq!(strip_formatting("\x0304,12colored"), "colored");
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn color_comma_without_bg_digit_preserves_comma() {
|
||
|
|
// \x03 followed by digit then comma but no bg digit — comma is kept
|
||
|
|
assert_eq!(strip_formatting("\x034,text"), ",text");
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn mixed_codes() {
|
||
|
|
assert_eq!(
|
||
|
|
strip_formatting("\x02\x034,5bold color\x0F normal"),
|
||
|
|
"bold color normal"
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn color_at_end_of_string() {
|
||
|
|
assert_eq!(strip_formatting("text\x03"), "text");
|
||
|
|
assert_eq!(strip_formatting("text\x034"), "text");
|
||
|
|
assert_eq!(strip_formatting("text\x0304,"), "text,");
|
||
|
|
assert_eq!(strip_formatting("text\x0304,1"), "text");
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn strips_monospace() {
|
||
|
|
assert_eq!(strip_formatting("\x11code\x11"), "code");
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn strips_reverse() {
|
||
|
|
assert_eq!(strip_formatting("\x16reversed\x16"), "reversed");
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn strips_strikethrough() {
|
||
|
|
assert_eq!(strip_formatting("\x1Estruck\x1E"), "struck");
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn empty_input() {
|
||
|
|
assert_eq!(strip_formatting(""), "");
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn preserves_multibyte_utf8() {
|
||
|
|
assert_eq!(strip_formatting("✨ hello ⚡"), "✨ hello ⚡");
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn strips_codes_around_emoji() {
|
||
|
|
// \x034 = color fg 4; \x0333 = color fg 33, leaving trailing "3"
|
||
|
|
assert_eq!(
|
||
|
|
strip_formatting("\x02✨\x02 boosted \x034⚡\x03333 sats"),
|
||
|
|
"✨ boosted ⚡3 sats"
|
||
|
|
);
|
||
|
|
}
|
||
|
|
|
||
|
|
#[test]
|
||
|
|
fn preserves_cjk_and_accented_chars() {
|
||
|
|
assert_eq!(strip_formatting("\x02café\x02 日本語"), "café 日本語");
|
||
|
|
}
|
||
|
|
}
|