2025-07-10 18:58:11 -04:00
|
|
|
use scraper::{ElementRef, Html, Node};
|
2025-07-22 12:53:08 -04:00
|
|
|
use iced::widget::markdown;
|
2025-07-10 18:58:11 -04:00
|
|
|
|
|
|
|
/*
|
|
|
|
The goal here is to flatten the DOM as much as possible.
|
|
|
|
paragraphs with fancy formatting are turned into markdown, same with
|
|
|
|
*/
|
2025-07-11 10:00:21 -04:00
|
|
|
|
2025-07-15 09:59:38 -04:00
|
|
|
//Supported content
|
|
|
|
#[derive(Debug,Clone)]
|
|
|
|
pub enum Content {
|
|
|
|
Markdown(String),
|
2025-07-22 12:53:08 -04:00
|
|
|
MarkdownParsed(Vec<markdown::Item>),
|
2025-07-15 09:59:38 -04:00
|
|
|
Image(String),
|
|
|
|
Audio(String),
|
2025-07-18 11:23:30 -04:00
|
|
|
Video(String),
|
|
|
|
Ignore
|
2025-07-15 09:59:38 -04:00
|
|
|
}
|
|
|
|
|
2025-07-19 11:43:42 -04:00
|
|
|
pub fn parse_content(c: &str) -> Vec<Content>{
|
2025-07-22 12:53:08 -04:00
|
|
|
process_content(itemize_content(c)).into_iter().map(|i| {
|
|
|
|
match i {
|
|
|
|
Content::Markdown(s) => {
|
|
|
|
Content::MarkdownParsed(markdown::parse(&s).collect())
|
|
|
|
}
|
|
|
|
_ => {i}
|
|
|
|
}
|
|
|
|
}).collect()
|
2025-07-19 11:43:42 -04:00
|
|
|
}
|
|
|
|
|
2025-07-19 11:40:37 -04:00
|
|
|
fn markdownify_child(item: &Item) -> String {
|
|
|
|
let mut result = "".to_owned();
|
|
|
|
match markdown_content(&item) {
|
2025-07-15 09:59:38 -04:00
|
|
|
Content::Markdown(s) => {
|
2025-07-19 11:40:37 -04:00
|
|
|
result = result + &s;
|
|
|
|
},
|
|
|
|
_ => {}
|
2025-07-15 09:59:38 -04:00
|
|
|
}
|
2025-07-19 11:40:37 -04:00
|
|
|
result
|
2025-07-15 09:59:38 -04:00
|
|
|
|
|
|
|
}
|
|
|
|
|
2025-07-19 11:40:37 -04:00
|
|
|
fn process_children(children: &Vec<Item>) -> String {
|
|
|
|
let mut result = "".to_owned();
|
|
|
|
for c in children{
|
|
|
|
result = result + &markdownify_child(c);
|
|
|
|
}
|
|
|
|
result
|
|
|
|
}
|
|
|
|
|
2025-07-15 09:59:38 -04:00
|
|
|
fn markdown_content(item: &Item) -> Content {
|
|
|
|
let mut markdown = String::new();
|
|
|
|
match item {
|
2025-07-19 11:40:37 -04:00
|
|
|
Item::Title(n,children) => {
|
|
|
|
markdown = markdown + &"#".repeat(*n) + " " +&process_children(children);
|
2025-07-15 09:59:38 -04:00
|
|
|
},
|
2025-07-19 14:46:45 -04:00
|
|
|
Item::BoldedText(children) => {
|
|
|
|
markdown = format!("**{}**",process_children(children));
|
2025-07-15 09:59:38 -04:00
|
|
|
},
|
2025-07-19 14:46:45 -04:00
|
|
|
Item::EmphasisText(children) => {
|
|
|
|
markdown = format!("*{}*",process_children(children));
|
2025-07-15 09:59:38 -04:00
|
|
|
}
|
|
|
|
Item::Text(s) => {
|
|
|
|
markdown = markdown + s;
|
|
|
|
},
|
|
|
|
Item::Link(href, children) => {
|
2025-07-19 11:40:37 -04:00
|
|
|
markdown = markdown + &format!("[{}]({})",process_children(children),href);
|
2025-07-15 09:59:38 -04:00
|
|
|
}
|
2025-07-19 11:40:37 -04:00
|
|
|
Item::Paragraph(children) => {
|
|
|
|
markdown = markdown + &process_children(children);
|
2025-07-15 09:59:38 -04:00
|
|
|
}
|
2025-07-19 14:46:45 -04:00
|
|
|
Item::UnorderedList(children) => {
|
|
|
|
markdown = markdown + &process_children(children);
|
|
|
|
}
|
|
|
|
Item::OrderedList(children) => {
|
|
|
|
markdown = markdown + &process_children(children);
|
2025-07-15 09:59:38 -04:00
|
|
|
}
|
2025-07-19 14:46:45 -04:00
|
|
|
Item::ListItem(children) => {
|
|
|
|
markdown = "\n- ".to_owned() + &process_children(children);
|
2025-07-15 09:59:38 -04:00
|
|
|
}
|
|
|
|
_ => {}
|
|
|
|
}
|
|
|
|
Content::Markdown(markdown)
|
|
|
|
}
|
|
|
|
|
2025-07-22 14:28:51 -04:00
|
|
|
fn media_content(_: &Item) -> Content{
|
2025-07-15 09:59:38 -04:00
|
|
|
Content::Markdown("Media not supported yet".to_owned())
|
|
|
|
}
|
|
|
|
|
|
|
|
|
2025-07-19 11:45:31 -04:00
|
|
|
fn process_content(items: Vec<Item>) -> Vec<Content> {
|
2025-07-15 09:59:38 -04:00
|
|
|
let mut result: Vec<Content> = Vec::new();
|
2025-07-19 11:40:37 -04:00
|
|
|
//println!("Converting {} items into Content",items.len());
|
|
|
|
for i in &items {
|
2025-07-15 09:59:38 -04:00
|
|
|
match i {
|
2025-07-19 11:40:37 -04:00
|
|
|
Item::Title(_,_) => {
|
|
|
|
result.push(markdown_content(i));
|
|
|
|
}
|
2025-07-22 14:28:51 -04:00
|
|
|
Item::Paragraph(_) => {
|
2025-07-15 09:59:38 -04:00
|
|
|
result.push(markdown_content(i));
|
|
|
|
},
|
2025-07-22 14:28:51 -04:00
|
|
|
Item::Link(_,_) => {
|
2025-07-19 11:40:37 -04:00
|
|
|
result.push(markdown_content(i))
|
|
|
|
}
|
2025-07-22 14:28:51 -04:00
|
|
|
Item::UnorderedList(_) => {
|
2025-07-15 09:59:38 -04:00
|
|
|
result.push(markdown_content(i));
|
|
|
|
}
|
2025-07-22 14:28:51 -04:00
|
|
|
Item::OrderedList(_) => {
|
2025-07-15 09:59:38 -04:00
|
|
|
result.push(markdown_content(i));
|
|
|
|
}
|
2025-07-22 14:28:51 -04:00
|
|
|
Item::ListItem(_) => {
|
2025-07-19 14:46:45 -04:00
|
|
|
result.push(markdown_content(i));
|
|
|
|
}
|
2025-07-15 09:59:38 -04:00
|
|
|
Item::Image(src) => {
|
|
|
|
result.push(Content::Image(src.to_owned()));
|
|
|
|
}
|
2025-07-22 14:28:51 -04:00
|
|
|
Item::Video(_) => {
|
2025-07-15 09:59:38 -04:00
|
|
|
result.push(media_content(i));
|
|
|
|
}
|
2025-07-22 14:28:51 -04:00
|
|
|
Item::Audio(_) => {
|
2025-07-15 09:59:38 -04:00
|
|
|
result.push(media_content(i));
|
|
|
|
}
|
2025-07-18 11:23:30 -04:00
|
|
|
_ => {
|
|
|
|
result.push(Content::Ignore);
|
|
|
|
}
|
2025-07-15 09:59:38 -04:00
|
|
|
}
|
2025-07-19 11:40:37 -04:00
|
|
|
}
|
2025-07-15 09:59:38 -04:00
|
|
|
|
2025-07-18 11:23:30 -04:00
|
|
|
result
|
2025-07-15 09:59:38 -04:00
|
|
|
}
|
2025-07-11 10:00:21 -04:00
|
|
|
|
2025-07-19 11:40:37 -04:00
|
|
|
#[derive(Debug,Clone)]
|
2025-07-15 09:59:38 -04:00
|
|
|
enum Item {
|
2025-07-11 10:00:21 -04:00
|
|
|
Ignore,
|
2025-07-15 09:59:38 -04:00
|
|
|
Title(usize,Vec<Item>),
|
2025-07-11 10:00:21 -04:00
|
|
|
Text(String),
|
|
|
|
//text, links, formatting are all markdown
|
|
|
|
//arguably, for better control it will be best to turn markdown into its own set of items
|
|
|
|
Image(String),
|
|
|
|
Svg(String),// wont' support for a while I think.
|
|
|
|
Video(Vec<Item>),
|
|
|
|
Audio(Vec<Item>),
|
|
|
|
Source(String),
|
|
|
|
BoldedText(Vec<Item>),
|
|
|
|
EmphasisText(Vec<Item>),
|
|
|
|
UnorderedList(Vec<Item>),
|
|
|
|
OrderedList(Vec<Item>),
|
|
|
|
ListItem(Vec<Item>),
|
|
|
|
Paragraph(Vec<Item>),//gotta replace this with specific items, needlessly flexible
|
|
|
|
Link(String,Vec<Item>),
|
2025-07-15 09:59:38 -04:00
|
|
|
Table(Vec<Item>)
|
2025-07-11 10:00:21 -04:00
|
|
|
}
|
|
|
|
|
|
|
|
|
2025-07-19 11:43:42 -04:00
|
|
|
fn itemize_content(content: &str) -> Vec<Item> {
|
2025-07-10 18:58:11 -04:00
|
|
|
let frag = Html::parse_fragment(content);
|
|
|
|
frag.root_element().children().map(|e|{
|
|
|
|
parse_items(e)
|
|
|
|
}).collect()
|
|
|
|
}
|
|
|
|
|
2025-07-15 09:59:38 -04:00
|
|
|
fn get_children(el: &ElementRef) -> Vec<Item>{
|
2025-07-10 18:58:11 -04:00
|
|
|
el.children().map(|c|{parse_items(c)}).collect()
|
|
|
|
}
|
|
|
|
|
|
|
|
fn parse_items(n: ego_tree::NodeRef<'_,Node>) -> Item{
|
|
|
|
if n.value().is_text(){
|
|
|
|
return Item::Text((&n.value().as_text().unwrap()).to_string())
|
|
|
|
}
|
|
|
|
if n.value().is_element(){
|
|
|
|
let el = ElementRef::wrap(n).unwrap();
|
|
|
|
let tag_name = el.value().name();
|
|
|
|
match tag_name {
|
2025-07-15 09:59:38 -04:00
|
|
|
"h1" => {return Item::Title(1, get_children(&el))},
|
|
|
|
"h2" => {return Item::Title(2, get_children(&el))},
|
|
|
|
"h3" => {return Item::Title(3, get_children(&el))},
|
|
|
|
"h4" => {return Item::Title(4, get_children(&el))},
|
|
|
|
"h5" => {return Item::Title(5, get_children(&el))},
|
|
|
|
"h6" => {return Item::Title(6, get_children(&el))},
|
|
|
|
"strong" => {return Item::BoldedText(get_children(&el))},
|
|
|
|
"em" => {return Item::EmphasisText(get_children(&el))},
|
2025-07-10 18:58:11 -04:00
|
|
|
"br" => {return Item::Text("\n".to_owned())},
|
|
|
|
"hr" => {return Item::Text("---".to_owned())}
|
|
|
|
"p" => {
|
|
|
|
return Item::Paragraph(get_children(&el))
|
|
|
|
},
|
|
|
|
"a" => {
|
|
|
|
let href = match el.attr("href") {
|
|
|
|
Some(link) => {link}
|
|
|
|
None => {""}
|
|
|
|
};
|
2025-07-11 10:00:21 -04:00
|
|
|
return Item::Link(href.to_owned(),get_children(&el))
|
|
|
|
|
|
|
|
|
2025-07-10 18:58:11 -04:00
|
|
|
}
|
|
|
|
"img" => {
|
|
|
|
match el.attr("src") {
|
|
|
|
Some(src) => {
|
|
|
|
return Item::Image(src.to_owned())
|
|
|
|
},
|
|
|
|
None => {return Item::Ignore}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
"source" => {
|
|
|
|
match el.attr("src") {
|
|
|
|
Some(src) => {
|
|
|
|
return Item::Source(src.to_owned())
|
|
|
|
},
|
|
|
|
None => {return Item::Ignore}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
"video" => {
|
2025-07-11 10:00:21 -04:00
|
|
|
return Item::Video(get_children(&el))
|
2025-07-10 18:58:11 -04:00
|
|
|
}
|
|
|
|
"ol" => {
|
|
|
|
return Item::OrderedList(get_children(&el))
|
|
|
|
}
|
|
|
|
"ul" => {
|
|
|
|
return Item::UnorderedList(get_children(&el))
|
|
|
|
}
|
|
|
|
"li" => {
|
|
|
|
return Item::ListItem(get_children(&el))
|
|
|
|
}
|
|
|
|
|
|
|
|
_ => {}
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
|
|
|
Item::Ignore
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
Ideally I would verify what works and write tests for it.
|
|
|
|
I also need a function to process markdown items.
|
|
|
|
*/
|
|
|
|
/*
|
|
|
|
pub fn add(left: u64, right: u64) -> u64 {
|
|
|
|
left + right
|
|
|
|
}
|
|
|
|
|
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::*;
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn it_works() {
|
|
|
|
let result = add(2, 2);
|
|
|
|
assert_eq!(result, 4);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
*/
|
|
|
|
#[cfg(test)]
|
2025-07-22 10:16:46 -04:00
|
|
|
mod tests;
|