diff --git a/src/html.rs b/src/html.rs
new file mode 100644
index 0000000..1a54bb4
--- /dev/null
+++ b/src/html.rs
@@ -0,0 +1,87 @@
+use std::borrow::Cow;
+
+/// Extracts alt text from
tags (for Owncast emoji) and strips all other HTML.
+pub fn strip_html(input: &str) -> String {
+ let mut result = String::with_capacity(input.len());
+ let mut chars = input.chars().peekable();
+
+ while let Some(&ch) = chars.peek() {
+ if ch == '<' {
+ chars.next(); // consume '<'
+ let tag: String = chars.by_ref().take_while(|&c| c != '>').collect();
+ if tag.starts_with("img ") || tag.starts_with("img\t") {
+ if let Some(alt) = extract_attr(&tag, "alt") {
+ result.push_str(&alt);
+ }
+ }
+ } else if ch == '&' {
+ chars.next(); // consume '&'
+ let entity_name: String = chars.by_ref().take_while(|&c| c != ';').collect();
+ let full_entity = format!("&{}", entity_name);
+ result.push_str(&decode_entity(&full_entity));
+ } else {
+ result.push(ch);
+ chars.next();
+ }
+ }
+
+ result
+}
+
+fn extract_attr(tag: &str, attr_name: &str) -> Option {
+ let pattern = format!("{}=\"", attr_name);
+ let start = tag.find(&pattern)? + pattern.len();
+ let rest = &tag[start..];
+ let end = rest.find('"')?;
+ Some(rest[..end].to_string())
+}
+
+fn decode_entity(entity: &str) -> Cow<'static, str> {
+ match entity {
+ "&" => Cow::Borrowed("&"),
+ "<" => Cow::Borrowed("<"),
+ ">" => Cow::Borrowed(">"),
+ """ => Cow::Borrowed("\""),
+ "'" | "&apos" => Cow::Borrowed("'"),
+ " " => Cow::Borrowed(" "),
+ other => Cow::Owned(other.to_string()),
+ }
+}
+
+#[cfg(test)]
+mod tests {
+ use super::*;
+
+ #[test]
+ fn test_plain_text_unchanged() {
+ assert_eq!(strip_html("hello world"), "hello world");
+ }
+
+ #[test]
+ fn test_strips_basic_tags() {
+ assert_eq!(strip_html("bold text"), "bold text");
+ }
+
+ #[test]
+ fn test_emoji_img_to_alt_text() {
+ let input = r#"hello
world"#;
+ assert_eq!(strip_html(input), "hello :beerparrot: world");
+ }
+
+ #[test]
+ fn test_multiple_emoji() {
+ let input = r#"
"#;
+ assert_eq!(strip_html(input), ":a::b:");
+ }
+
+ #[test]
+ fn test_strips_links() {
+ let input = r#"check this link"#;
+ assert_eq!(strip_html(input), "check this link");
+ }
+
+ #[test]
+ fn test_decodes_html_entities() {
+ assert_eq!(strip_html("a & b < c"), "a & b < c");
+ }
+}
diff --git a/src/main.rs b/src/main.rs
index 86ab3db..b8695e8 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,5 +1,6 @@
mod config;
mod events;
+mod html;
fn main() {
println!("owncast-irc-bridge");