move markdown handling to this crate
This commit is contained in:
parent
d6fa7898ab
commit
dbb3dd5d27
4 changed files with 135 additions and 10 deletions
1
.gitignore
vendored
1
.gitignore
vendored
|
@ -1 +1,2 @@
|
|||
/target
|
||||
.vscode
|
2
Cargo.lock
generated
2
Cargo.lock
generated
|
@ -1193,7 +1193,7 @@ dependencies = [
|
|||
|
||||
[[package]]
|
||||
name = "rss_content"
|
||||
version = "0.1.2"
|
||||
version = "0.1.3"
|
||||
dependencies = [
|
||||
"ego-tree",
|
||||
"reqwest",
|
||||
|
|
|
@ -1,6 +1,6 @@
|
|||
[package]
|
||||
name = "rss_content"
|
||||
version = "0.1.2"
|
||||
version = "0.1.3"
|
||||
edition = "2024"
|
||||
|
||||
[dependencies]
|
||||
|
|
140
src/lib.rs
140
src/lib.rs
|
@ -5,15 +5,115 @@ use scraper::{ElementRef, Html, Node};
|
|||
paragraphs with fancy formatting are turned into markdown, same with
|
||||
*/
|
||||
|
||||
//Supported content
|
||||
#[derive(Debug,Clone)]
|
||||
pub enum Content {
|
||||
Markdown(String),
|
||||
Image(String),
|
||||
Audio(String),
|
||||
Video(String)
|
||||
|
||||
}
|
||||
|
||||
//double recursion? This seems dumb.
|
||||
fn markdownify(item: &Item) -> String{
|
||||
match markdown_content(item) {
|
||||
Content::Markdown(s) => {
|
||||
s.to_owned()
|
||||
}
|
||||
_ => {"".to_owned()}
|
||||
}
|
||||
|
||||
}
|
||||
|
||||
fn markdown_content(item: &Item) -> Content {
|
||||
let mut markdown = String::new();
|
||||
match item {
|
||||
Item::Title(n,t) => {
|
||||
markdown = markdown + &"#".repeat(*n);
|
||||
let _ = t.iter().map(|i|{
|
||||
markdown = "".to_owned() + &markdown + &markdownify(i);
|
||||
});
|
||||
},
|
||||
Item::BoldedText(b) => {
|
||||
let _ = b.iter().map(|i|{
|
||||
markdown = "**".to_owned() + &markdown + &markdownify(i) + "**";
|
||||
});
|
||||
},
|
||||
Item::EmphasisText(e) => {
|
||||
let _ = e.iter().map(|i|{
|
||||
markdown = "*".to_owned() + &markdown + &markdownify(i) + "*";
|
||||
});
|
||||
}
|
||||
Item::Text(s) => {
|
||||
markdown = markdown + s;
|
||||
},
|
||||
Item::Link(href, children) => {
|
||||
markdown = markdown + &markdownify(item);
|
||||
}
|
||||
Item::Paragraph(p) => {
|
||||
let _ = p.iter().map(|i|{
|
||||
markdown = "".to_owned() + &markdown + &markdownify(i);
|
||||
});
|
||||
}
|
||||
Item::UnorderedList(u) => {
|
||||
let _ = u.iter().map(|i|{
|
||||
markdown = "".to_owned() + &markdown + &markdownify(i);
|
||||
});
|
||||
}
|
||||
Item::OrderedList(o) => {
|
||||
let _ = o.iter().map(|i|{
|
||||
markdown = "".to_owned() + &markdown + &markdownify(i);
|
||||
});
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
Content::Markdown(markdown)
|
||||
}
|
||||
|
||||
fn media_content(item: &Item) -> Content{
|
||||
Content::Markdown("Media not supported yet".to_owned())
|
||||
}
|
||||
|
||||
|
||||
pub fn process_content(content: &str) -> Vec<Content> {
|
||||
let items = itemize_content(content);
|
||||
let mut result: Vec<Content> = Vec::new();
|
||||
let _ = items.iter().map(|i| {
|
||||
match i {
|
||||
Item::Paragraph(children) => {
|
||||
result.push(markdown_content(i));
|
||||
},
|
||||
Item::UnorderedList(children) => {
|
||||
result.push(markdown_content(i));
|
||||
}
|
||||
Item::OrderedList(children) => {
|
||||
result.push(markdown_content(i));
|
||||
}
|
||||
Item::Image(src) => {
|
||||
result.push(Content::Image(src.to_owned()));
|
||||
}
|
||||
Item::Video(children) => {
|
||||
result.push(media_content(i));
|
||||
}
|
||||
Item::Audio(children) => {
|
||||
result.push(media_content(i));
|
||||
}
|
||||
_ => {}
|
||||
}
|
||||
});
|
||||
|
||||
[Content::Markdown("Ayy lmao".to_owned())].to_vec()
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
pub enum Item {
|
||||
enum Item {
|
||||
Ignore,
|
||||
Title(usize,Vec<Item>),
|
||||
Text(String),
|
||||
//text, links, formatting are all markdown
|
||||
//arguably, for better control it will be best to turn markdown into its own set of items
|
||||
Image(String),
|
||||
Gif(String), //can't detect gif from image, has to be handled on front-end
|
||||
Svg(String),// wont' support for a while I think.
|
||||
Video(Vec<Item>),
|
||||
Audio(Vec<Item>),
|
||||
|
@ -25,6 +125,7 @@ pub enum Item {
|
|||
ListItem(Vec<Item>),
|
||||
Paragraph(Vec<Item>),//gotta replace this with specific items, needlessly flexible
|
||||
Link(String,Vec<Item>),
|
||||
Table(Vec<Item>)
|
||||
}
|
||||
|
||||
|
||||
|
@ -35,7 +136,7 @@ pub fn itemize_content(content: &str) -> Vec<Item> {
|
|||
}).collect()
|
||||
}
|
||||
|
||||
pub fn get_children(el: &ElementRef) -> Vec<Item>{
|
||||
fn get_children(el: &ElementRef) -> Vec<Item>{
|
||||
el.children().map(|c|{parse_items(c)}).collect()
|
||||
}
|
||||
|
||||
|
@ -49,6 +150,14 @@ fn parse_items(n: ego_tree::NodeRef<'_,Node>) -> Item{
|
|||
let tag_name = el.value().name();
|
||||
let mut item: Item;
|
||||
match tag_name {
|
||||
"h1" => {return Item::Title(1, get_children(&el))},
|
||||
"h2" => {return Item::Title(2, get_children(&el))},
|
||||
"h3" => {return Item::Title(3, get_children(&el))},
|
||||
"h4" => {return Item::Title(4, get_children(&el))},
|
||||
"h5" => {return Item::Title(5, get_children(&el))},
|
||||
"h6" => {return Item::Title(6, get_children(&el))},
|
||||
"strong" => {return Item::BoldedText(get_children(&el))},
|
||||
"em" => {return Item::EmphasisText(get_children(&el))},
|
||||
"br" => {return Item::Text("\n".to_owned())},
|
||||
"hr" => {return Item::Text("---".to_owned())}
|
||||
"p" => {
|
||||
|
@ -124,15 +233,17 @@ mod tests {
|
|||
*/
|
||||
#[cfg(test)]
|
||||
mod tests {
|
||||
use core::panic;
|
||||
use rss::Channel;
|
||||
mod example_data;
|
||||
use crate::{itemize_content, process_content, tests::example_data::FEEDS};
|
||||
|
||||
use crate::{itemize_content, tests::example_data::FEEDS};
|
||||
|
||||
fn get_feed(u: &str) -> rss::Channel {
|
||||
rss::Channel::read_from(u.as_bytes()).unwrap()
|
||||
}
|
||||
#[test]
|
||||
fn real_feeds(){
|
||||
fn itemize_feeds(){
|
||||
let _ = FEEDS.map(|u|{
|
||||
let feed = rss::Channel::read_from(u.as_bytes()).unwrap();
|
||||
let feed = get_feed(u);
|
||||
let results: Vec<_> = feed.items.into_iter().map(|item| {
|
||||
itemize_content(&item.content.unwrap());
|
||||
}).collect();
|
||||
|
@ -140,4 +251,17 @@ mod tests {
|
|||
println!("Evaluated feed\nScanned {} items without errors",results.len())
|
||||
});
|
||||
}
|
||||
#[test]
|
||||
fn markdownify_feeds(){
|
||||
let _ = FEEDS.map(|u|{
|
||||
let feed = get_feed(u);
|
||||
let results: Vec<_> = feed.items.into_iter().map(|item|{
|
||||
process_content(&item.content.unwrap());
|
||||
}).collect();
|
||||
println!("Processed {} items without errors",results.len())
|
||||
});
|
||||
}
|
||||
|
||||
|
||||
|
||||
}
|
Loading…
Reference in a new issue