use scraper::{ElementRef, Html, Node}; use iced::widget::markdown; /* The goal here is to flatten the DOM as much as possible. paragraphs with fancy formatting are turned into markdown, same with */ //Supported content #[derive(Debug,Clone)] pub enum Content { Markdown(String), MarkdownParsed(Vec), Image(String), Audio(String), Video(String), Ignore } pub fn parse_content(c: &str) -> Vec{ process_content(itemize_content(c)).into_iter().map(|i| { match i { Content::Markdown(s) => { Content::MarkdownParsed(markdown::parse(&s).collect()) } _ => {i} } }).collect() } fn markdownify_child(item: &Item) -> String { let mut result = "".to_owned(); match markdown_content(&item) { Content::Markdown(s) => { result = result + &s; }, _ => {} } result } fn process_children(children: &Vec) -> String { let mut result = "".to_owned(); for c in children{ result = result + &markdownify_child(c); } result } fn markdown_content(item: &Item) -> Content { let mut markdown = String::new(); match item { Item::Title(n,children) => { markdown = markdown + &"#".repeat(*n) + " " +&process_children(children); }, Item::BoldedText(children) => { markdown = format!("**{}**",process_children(children)); }, Item::EmphasisText(children) => { markdown = format!("*{}*",process_children(children)); } Item::Text(s) => { markdown = markdown + s; }, Item::Link(href, children) => { markdown = markdown + &format!("[{}]({})",process_children(children),href); } Item::Paragraph(children) => { markdown = markdown + &process_children(children); } Item::UnorderedList(children) => { markdown = markdown + &process_children(children); } Item::OrderedList(children) => { markdown = markdown + &process_children(children); } Item::ListItem(children) => { markdown = "\n- ".to_owned() + &process_children(children); } _ => {} } Content::Markdown(markdown) } fn media_content(_: &Item) -> Content{ Content::Markdown("Media not supported yet".to_owned()) } fn process_content(items: Vec) -> Vec { let mut result: Vec = Vec::new(); //println!("Converting {} items into Content",items.len()); for i in &items { match i { Item::Title(_,_) => { result.push(markdown_content(i)); } Item::Paragraph(_) => { result.push(markdown_content(i)); }, Item::Link(_,_) => { result.push(markdown_content(i)) } Item::UnorderedList(_) => { result.push(markdown_content(i)); } Item::OrderedList(_) => { result.push(markdown_content(i)); } Item::ListItem(_) => { result.push(markdown_content(i)); } Item::Image(src) => { result.push(Content::Image(src.to_owned())); } Item::Video(_) => { result.push(media_content(i)); } Item::Audio(_) => { result.push(media_content(i)); } _ => { result.push(Content::Ignore); } } } result } #[derive(Debug,Clone)] enum Item { Ignore, Title(usize,Vec), Text(String), //text, links, formatting are all markdown //arguably, for better control it will be best to turn markdown into its own set of items Image(String), Svg(String),// wont' support for a while I think. Video(Vec), Audio(Vec), Source(String), BoldedText(Vec), EmphasisText(Vec), UnorderedList(Vec), OrderedList(Vec), ListItem(Vec), Paragraph(Vec),//gotta replace this with specific items, needlessly flexible Link(String,Vec), Table(Vec) } fn itemize_content(content: &str) -> Vec { let frag = Html::parse_fragment(content); frag.root_element().children().map(|e|{ parse_items(e) }).collect() } fn get_children(el: &ElementRef) -> Vec{ el.children().map(|c|{parse_items(c)}).collect() } fn parse_items(n: ego_tree::NodeRef<'_,Node>) -> Item{ if n.value().is_text(){ return Item::Text((&n.value().as_text().unwrap()).to_string()) } if n.value().is_element(){ let el = ElementRef::wrap(n).unwrap(); let tag_name = el.value().name(); match tag_name { "h1" => {return Item::Title(1, get_children(&el))}, "h2" => {return Item::Title(2, get_children(&el))}, "h3" => {return Item::Title(3, get_children(&el))}, "h4" => {return Item::Title(4, get_children(&el))}, "h5" => {return Item::Title(5, get_children(&el))}, "h6" => {return Item::Title(6, get_children(&el))}, "strong" => {return Item::BoldedText(get_children(&el))}, "em" => {return Item::EmphasisText(get_children(&el))}, "br" => {return Item::Text("\n".to_owned())}, "hr" => {return Item::Text("---".to_owned())} "p" => { return Item::Paragraph(get_children(&el)) }, "a" => { let href = match el.attr("href") { Some(link) => {link} None => {""} }; return Item::Link(href.to_owned(),get_children(&el)) } "img" => { match el.attr("src") { Some(src) => { return Item::Image(src.to_owned()) }, None => {return Item::Ignore} } } "source" => { match el.attr("src") { Some(src) => { return Item::Source(src.to_owned()) }, None => {return Item::Ignore} } } "video" => { return Item::Video(get_children(&el)) } "ol" => { return Item::OrderedList(get_children(&el)) } "ul" => { return Item::UnorderedList(get_children(&el)) } "li" => { return Item::ListItem(get_children(&el)) } _ => {} }; } Item::Ignore } /* Ideally I would verify what works and write tests for it. I also need a function to process markdown items. */ /* pub fn add(left: u64, right: u64) -> u64 { left + right } #[cfg(test)] mod tests { use super::*; #[test] fn it_works() { let result = add(2, 2); assert_eq!(result, 4); } } */ #[cfg(test)] mod tests;