use scraper::{ElementRef, Html, Node};
use iced::widget::markdown;
/*
The goal here is to flatten the DOM as much as possible.
paragraphs with fancy formatting are turned into markdown, same with
*/
//Supported content
#[derive(Debug,Clone)]
pub enum Content {
Markdown(String),
MarkdownParsed(Vec),
Image(String),
Audio(String),
Video(String),
Ignore
}
pub fn parse_content(c: &str) -> Vec{
process_content(itemize_content(c)).into_iter().map(|i| {
match i {
Content::Markdown(s) => {
Content::MarkdownParsed(markdown::parse(&s).collect())
}
_ => {i}
}
}).collect()
}
fn markdownify_child(item: &Item) -> String {
let mut result = "".to_owned();
match markdown_content(&item) {
Content::Markdown(s) => {
result = result + &s;
},
_ => {}
}
result
}
fn process_children(children: &Vec- ) -> String {
let mut result = "".to_owned();
for c in children{
result = result + &markdownify_child(c);
}
result
}
fn markdown_content(item: &Item) -> Content {
let mut markdown = String::new();
match item {
Item::Title(n,children) => {
markdown = markdown + &"#".repeat(*n) + " " +&process_children(children);
},
Item::BoldedText(children) => {
markdown = format!("**{}**",process_children(children));
},
Item::EmphasisText(children) => {
markdown = format!("*{}*",process_children(children));
}
Item::Text(s) => {
markdown = markdown + s;
},
Item::Link(href, children) => {
markdown = markdown + &format!("[{}]({})",process_children(children),href);
}
Item::Paragraph(children) => {
markdown = markdown + &process_children(children);
}
Item::UnorderedList(children) => {
markdown = markdown + &process_children(children);
}
Item::OrderedList(children) => {
markdown = markdown + &process_children(children);
}
Item::ListItem(children) => {
markdown = "\n- ".to_owned() + &process_children(children);
}
_ => {}
}
Content::Markdown(markdown)
}
fn media_content(item: &Item) -> Content{
Content::Markdown("Media not supported yet".to_owned())
}
fn process_content(items: Vec
- ) -> Vec {
let mut result: Vec = Vec::new();
//println!("Converting {} items into Content",items.len());
for i in &items {
match i {
Item::Title(_,_) => {
result.push(markdown_content(i));
}
Item::Paragraph(children) => {
result.push(markdown_content(i));
},
Item::Link(href,children) => {
result.push(markdown_content(i))
}
Item::UnorderedList(children) => {
result.push(markdown_content(i));
}
Item::OrderedList(children) => {
result.push(markdown_content(i));
}
Item::ListItem(children) => {
result.push(markdown_content(i));
}
Item::Image(src) => {
result.push(Content::Image(src.to_owned()));
}
Item::Video(children) => {
result.push(media_content(i));
}
Item::Audio(children) => {
result.push(media_content(i));
}
_ => {
result.push(Content::Ignore);
}
}
}
result
}
#[derive(Debug,Clone)]
enum Item {
Ignore,
Title(usize,Vec
- ),
Text(String),
//text, links, formatting are all markdown
//arguably, for better control it will be best to turn markdown into its own set of items
Image(String),
Svg(String),// wont' support for a while I think.
Video(Vec
- ),
Audio(Vec
- ),
Source(String),
BoldedText(Vec
- ),
EmphasisText(Vec
- ),
UnorderedList(Vec
- ),
OrderedList(Vec
- ),
ListItem(Vec
- ),
Paragraph(Vec
- ),//gotta replace this with specific items, needlessly flexible
Link(String,Vec
- ),
Table(Vec
- )
}
fn itemize_content(content: &str) -> Vec
- {
let frag = Html::parse_fragment(content);
frag.root_element().children().map(|e|{
parse_items(e)
}).collect()
}
fn get_children(el: &ElementRef) -> Vec
- {
el.children().map(|c|{parse_items(c)}).collect()
}
fn parse_items(n: ego_tree::NodeRef<'_,Node>) -> Item{
if n.value().is_text(){
return Item::Text((&n.value().as_text().unwrap()).to_string())
}
if n.value().is_element(){
let el = ElementRef::wrap(n).unwrap();
let tag_name = el.value().name();
match tag_name {
"h1" => {return Item::Title(1, get_children(&el))},
"h2" => {return Item::Title(2, get_children(&el))},
"h3" => {return Item::Title(3, get_children(&el))},
"h4" => {return Item::Title(4, get_children(&el))},
"h5" => {return Item::Title(5, get_children(&el))},
"h6" => {return Item::Title(6, get_children(&el))},
"strong" => {return Item::BoldedText(get_children(&el))},
"em" => {return Item::EmphasisText(get_children(&el))},
"br" => {return Item::Text("\n".to_owned())},
"hr" => {return Item::Text("---".to_owned())}
"p" => {
return Item::Paragraph(get_children(&el))
},
"a" => {
let href = match el.attr("href") {
Some(link) => {link}
None => {""}
};
return Item::Link(href.to_owned(),get_children(&el))
}
"img" => {
match el.attr("src") {
Some(src) => {
return Item::Image(src.to_owned())
},
None => {return Item::Ignore}
}
}
"source" => {
match el.attr("src") {
Some(src) => {
return Item::Source(src.to_owned())
},
None => {return Item::Ignore}
}
}
"video" => {
return Item::Video(get_children(&el))
}
"ol" => {
return Item::OrderedList(get_children(&el))
}
"ul" => {
return Item::UnorderedList(get_children(&el))
}
"li" => {
return Item::ListItem(get_children(&el))
}
_ => {}
};
}
Item::Ignore
}
/*
Ideally I would verify what works and write tests for it.
I also need a function to process markdown items.
*/
/*
pub fn add(left: u64, right: u64) -> u64 {
left + right
}
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn it_works() {
let result = add(2, 2);
assert_eq!(result, 4);
}
}
*/
#[cfg(test)]
mod tests;