feat: add HTML stripping utility for Owncast emoji and markup
Made-with: Cursor
This commit is contained in:
87
src/html.rs
Normal file
87
src/html.rs
Normal file
@@ -0,0 +1,87 @@
|
||||
use std::borrow::Cow;
|
||||
|
||||
/// Extracts alt text from <img> tags (for Owncast emoji) and strips all other HTML.
|
||||
pub fn strip_html(input: &str) -> String {
|
||||
let mut result = String::with_capacity(input.len());
|
||||
let mut chars = input.chars().peekable();
|
||||
|
||||
while let Some(&ch) = chars.peek() {
|
||||
if ch == '<' {
|
||||
chars.next(); // consume '<'
|
||||
let tag: String = chars.by_ref().take_while(|&c| c != '>').collect();
|
||||
if tag.starts_with("img ") || tag.starts_with("img\t") {
|
||||
if let Some(alt) = extract_attr(&tag, "alt") {
|
||||
result.push_str(&alt);
|
||||
}
|
||||
}
|
||||
} else if ch == '&' {
|
||||
chars.next(); // consume '&'
|
||||
let entity_name: String = chars.by_ref().take_while(|&c| c != ';').collect();
|
||||
let full_entity = format!("&{}", entity_name);
|
||||
result.push_str(&decode_entity(&full_entity));
|
||||
} else {
|
||||
result.push(ch);
|
||||
chars.next();
|
||||
}
|
||||
}
|
||||
|
||||
result
|
||||
}
|
||||
|
||||
fn extract_attr(tag: &str, attr_name: &str) -> Option<String> {
|
||||
let pattern = format!("{}=\"", attr_name);
|
||||
let start = tag.find(&pattern)? + pattern.len();
|
||||
let rest = &tag[start..];
|
||||
let end = rest.find('"')?;
|
||||
Some(rest[..end].to_string())
|
||||
}
|
||||
|
||||
fn decode_entity(entity: &str) -> Cow<'static, str> {
|
||||
match entity {
|
||||
"&" => Cow::Borrowed("&"),
|
||||
"<" => Cow::Borrowed("<"),
|
||||
">" => Cow::Borrowed(">"),
|
||||
""" => Cow::Borrowed("\""),
|
||||
"'" | "&apos" => Cow::Borrowed("'"),
|
||||
" " => Cow::Borrowed(" "),
|
||||
other => Cow::Owned(other.to_string()),
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use super::*;
|
||||
|
||||
#[test]
|
||||
fn test_plain_text_unchanged() {
|
||||
assert_eq!(strip_html("hello world"), "hello world");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_strips_basic_tags() {
|
||||
assert_eq!(strip_html("<b>bold</b> text"), "bold text");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_emoji_img_to_alt_text() {
|
||||
let input = r#"hello <img class="emoji" alt=":beerparrot:" title=":beerparrot:" src="/img/emoji/beerparrot.gif"> world"#;
|
||||
assert_eq!(strip_html(input), "hello :beerparrot: world");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_multiple_emoji() {
|
||||
let input = r#"<img class="emoji" alt=":a:" src="/a.gif"><img class="emoji" alt=":b:" src="/b.gif">"#;
|
||||
assert_eq!(strip_html(input), ":a::b:");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_strips_links() {
|
||||
let input = r#"check <a href="https://example.com">this link</a>"#;
|
||||
assert_eq!(strip_html(input), "check this link");
|
||||
}
|
||||
|
||||
#[test]
|
||||
fn test_decodes_html_entities() {
|
||||
assert_eq!(strip_html("a & b < c"), "a & b < c");
|
||||
}
|
||||
}
|
||||
@@ -1,5 +1,6 @@
|
||||
mod config;
|
||||
mod events;
|
||||
mod html;
|
||||
|
||||
fn main() {
|
||||
println!("owncast-irc-bridge");
|
||||
|
||||
Reference in New Issue
Block a user