feat: add HTML stripping utility for Owncast emoji and markup
Made-with: Cursor
This commit is contained in:
87
src/html.rs
Normal file
87
src/html.rs
Normal file
@@ -0,0 +1,87 @@
|
|||||||
|
use std::borrow::Cow;
|
||||||
|
|
||||||
|
/// Extracts alt text from <img> tags (for Owncast emoji) and strips all other HTML.
|
||||||
|
pub fn strip_html(input: &str) -> String {
|
||||||
|
let mut result = String::with_capacity(input.len());
|
||||||
|
let mut chars = input.chars().peekable();
|
||||||
|
|
||||||
|
while let Some(&ch) = chars.peek() {
|
||||||
|
if ch == '<' {
|
||||||
|
chars.next(); // consume '<'
|
||||||
|
let tag: String = chars.by_ref().take_while(|&c| c != '>').collect();
|
||||||
|
if tag.starts_with("img ") || tag.starts_with("img\t") {
|
||||||
|
if let Some(alt) = extract_attr(&tag, "alt") {
|
||||||
|
result.push_str(&alt);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
} else if ch == '&' {
|
||||||
|
chars.next(); // consume '&'
|
||||||
|
let entity_name: String = chars.by_ref().take_while(|&c| c != ';').collect();
|
||||||
|
let full_entity = format!("&{}", entity_name);
|
||||||
|
result.push_str(&decode_entity(&full_entity));
|
||||||
|
} else {
|
||||||
|
result.push(ch);
|
||||||
|
chars.next();
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
result
|
||||||
|
}
|
||||||
|
|
||||||
|
fn extract_attr(tag: &str, attr_name: &str) -> Option<String> {
|
||||||
|
let pattern = format!("{}=\"", attr_name);
|
||||||
|
let start = tag.find(&pattern)? + pattern.len();
|
||||||
|
let rest = &tag[start..];
|
||||||
|
let end = rest.find('"')?;
|
||||||
|
Some(rest[..end].to_string())
|
||||||
|
}
|
||||||
|
|
||||||
|
fn decode_entity(entity: &str) -> Cow<'static, str> {
|
||||||
|
match entity {
|
||||||
|
"&" => Cow::Borrowed("&"),
|
||||||
|
"<" => Cow::Borrowed("<"),
|
||||||
|
">" => Cow::Borrowed(">"),
|
||||||
|
""" => Cow::Borrowed("\""),
|
||||||
|
"'" | "&apos" => Cow::Borrowed("'"),
|
||||||
|
" " => Cow::Borrowed(" "),
|
||||||
|
other => Cow::Owned(other.to_string()),
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
#[cfg(test)]
|
||||||
|
mod tests {
|
||||||
|
use super::*;
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_plain_text_unchanged() {
|
||||||
|
assert_eq!(strip_html("hello world"), "hello world");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_strips_basic_tags() {
|
||||||
|
assert_eq!(strip_html("<b>bold</b> text"), "bold text");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_emoji_img_to_alt_text() {
|
||||||
|
let input = r#"hello <img class="emoji" alt=":beerparrot:" title=":beerparrot:" src="/img/emoji/beerparrot.gif"> world"#;
|
||||||
|
assert_eq!(strip_html(input), "hello :beerparrot: world");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_multiple_emoji() {
|
||||||
|
let input = r#"<img class="emoji" alt=":a:" src="/a.gif"><img class="emoji" alt=":b:" src="/b.gif">"#;
|
||||||
|
assert_eq!(strip_html(input), ":a::b:");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_strips_links() {
|
||||||
|
let input = r#"check <a href="https://example.com">this link</a>"#;
|
||||||
|
assert_eq!(strip_html(input), "check this link");
|
||||||
|
}
|
||||||
|
|
||||||
|
#[test]
|
||||||
|
fn test_decodes_html_entities() {
|
||||||
|
assert_eq!(strip_html("a & b < c"), "a & b < c");
|
||||||
|
}
|
||||||
|
}
|
||||||
@@ -1,5 +1,6 @@
|
|||||||
mod config;
|
mod config;
|
||||||
mod events;
|
mod events;
|
||||||
|
mod html;
|
||||||
|
|
||||||
fn main() {
|
fn main() {
|
||||||
println!("owncast-irc-bridge");
|
println!("owncast-irc-bridge");
|
||||||
|
|||||||
Reference in New Issue
Block a user