feat: add HTML stripping utility for Owncast emoji and markup

Made-with: Cursor
This commit is contained in:
cottongin
2026-03-10 21:53:26 -04:00
parent f2e3c88b60
commit 788634fb83
2 changed files with 88 additions and 0 deletions

87
src/html.rs Normal file
View File

@@ -0,0 +1,87 @@
use std::borrow::Cow;
/// Extracts alt text from <img> tags (for Owncast emoji) and strips all other HTML.
pub fn strip_html(input: &str) -> String {
let mut result = String::with_capacity(input.len());
let mut chars = input.chars().peekable();
while let Some(&ch) = chars.peek() {
if ch == '<' {
chars.next(); // consume '<'
let tag: String = chars.by_ref().take_while(|&c| c != '>').collect();
if tag.starts_with("img ") || tag.starts_with("img\t") {
if let Some(alt) = extract_attr(&tag, "alt") {
result.push_str(&alt);
}
}
} else if ch == '&' {
chars.next(); // consume '&'
let entity_name: String = chars.by_ref().take_while(|&c| c != ';').collect();
let full_entity = format!("&{}", entity_name);
result.push_str(&decode_entity(&full_entity));
} else {
result.push(ch);
chars.next();
}
}
result
}
fn extract_attr(tag: &str, attr_name: &str) -> Option<String> {
let pattern = format!("{}=\"", attr_name);
let start = tag.find(&pattern)? + pattern.len();
let rest = &tag[start..];
let end = rest.find('"')?;
Some(rest[..end].to_string())
}
fn decode_entity(entity: &str) -> Cow<'static, str> {
match entity {
"&amp" => Cow::Borrowed("&"),
"&lt" => Cow::Borrowed("<"),
"&gt" => Cow::Borrowed(">"),
"&quot" => Cow::Borrowed("\""),
"&#39" | "&apos" => Cow::Borrowed("'"),
"&nbsp" => Cow::Borrowed(" "),
other => Cow::Owned(other.to_string()),
}
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_plain_text_unchanged() {
assert_eq!(strip_html("hello world"), "hello world");
}
#[test]
fn test_strips_basic_tags() {
assert_eq!(strip_html("<b>bold</b> text"), "bold text");
}
#[test]
fn test_emoji_img_to_alt_text() {
let input = r#"hello <img class="emoji" alt=":beerparrot:" title=":beerparrot:" src="/img/emoji/beerparrot.gif"> world"#;
assert_eq!(strip_html(input), "hello :beerparrot: world");
}
#[test]
fn test_multiple_emoji() {
let input = r#"<img class="emoji" alt=":a:" src="/a.gif"><img class="emoji" alt=":b:" src="/b.gif">"#;
assert_eq!(strip_html(input), ":a::b:");
}
#[test]
fn test_strips_links() {
let input = r#"check <a href="https://example.com">this link</a>"#;
assert_eq!(strip_html(input), "check this link");
}
#[test]
fn test_decodes_html_entities() {
assert_eq!(strip_html("a &amp; b &lt; c"), "a & b < c");
}
}

View File

@@ -1,5 +1,6 @@
mod config;
mod events;
mod html;
fn main() {
println!("owncast-irc-bridge");