From dbb3dd5d2776637560364f04f1a33be527eb7aec Mon Sep 17 00:00:00 2001 From: Gabriel Date: Tue, 15 Jul 2025 09:59:38 -0400 Subject: [PATCH] move markdown handling to this crate --- .gitignore | 1 + Cargo.lock | 2 +- Cargo.toml | 2 +- src/lib.rs | 140 ++++++++++++++++++++++++++++++++++++++++++++++++++--- 4 files changed, 135 insertions(+), 10 deletions(-) diff --git a/.gitignore b/.gitignore index ea8c4bf..ccb5166 100644 --- a/.gitignore +++ b/.gitignore @@ -1 +1,2 @@ /target +.vscode \ No newline at end of file diff --git a/Cargo.lock b/Cargo.lock index 2b0aec7..5be084f 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -1193,7 +1193,7 @@ dependencies = [ [[package]] name = "rss_content" -version = "0.1.2" +version = "0.1.3" dependencies = [ "ego-tree", "reqwest", diff --git a/Cargo.toml b/Cargo.toml index c47d561..817ee79 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,6 +1,6 @@ [package] name = "rss_content" -version = "0.1.2" +version = "0.1.3" edition = "2024" [dependencies] diff --git a/src/lib.rs b/src/lib.rs index c3f97e2..b02c416 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -5,15 +5,115 @@ use scraper::{ElementRef, Html, Node}; paragraphs with fancy formatting are turned into markdown, same with */ +//Supported content +#[derive(Debug,Clone)] +pub enum Content { + Markdown(String), + Image(String), + Audio(String), + Video(String) + +} + +//double recursion? This seems dumb. +fn markdownify(item: &Item) -> String{ + match markdown_content(item) { + Content::Markdown(s) => { + s.to_owned() + } + _ => {"".to_owned()} + } + +} + +fn markdown_content(item: &Item) -> Content { + let mut markdown = String::new(); + match item { + Item::Title(n,t) => { + markdown = markdown + &"#".repeat(*n); + let _ = t.iter().map(|i|{ + markdown = "".to_owned() + &markdown + &markdownify(i); + }); + }, + Item::BoldedText(b) => { + let _ = b.iter().map(|i|{ + markdown = "**".to_owned() + &markdown + &markdownify(i) + "**"; + }); + }, + Item::EmphasisText(e) => { + let _ = e.iter().map(|i|{ + markdown = "*".to_owned() + &markdown + &markdownify(i) + "*"; + }); + } + Item::Text(s) => { + markdown = markdown + s; + }, + Item::Link(href, children) => { + markdown = markdown + &markdownify(item); + } + Item::Paragraph(p) => { + let _ = p.iter().map(|i|{ + markdown = "".to_owned() + &markdown + &markdownify(i); + }); + } + Item::UnorderedList(u) => { + let _ = u.iter().map(|i|{ + markdown = "".to_owned() + &markdown + &markdownify(i); + }); + } + Item::OrderedList(o) => { + let _ = o.iter().map(|i|{ + markdown = "".to_owned() + &markdown + &markdownify(i); + }); + } + _ => {} + } + Content::Markdown(markdown) +} + +fn media_content(item: &Item) -> Content{ + Content::Markdown("Media not supported yet".to_owned()) +} + + +pub fn process_content(content: &str) -> Vec { + let items = itemize_content(content); + let mut result: Vec = Vec::new(); + let _ = items.iter().map(|i| { + match i { + Item::Paragraph(children) => { + result.push(markdown_content(i)); + }, + Item::UnorderedList(children) => { + result.push(markdown_content(i)); + } + Item::OrderedList(children) => { + result.push(markdown_content(i)); + } + Item::Image(src) => { + result.push(Content::Image(src.to_owned())); + } + Item::Video(children) => { + result.push(media_content(i)); + } + Item::Audio(children) => { + result.push(media_content(i)); + } + _ => {} + } + }); + + [Content::Markdown("Ayy lmao".to_owned())].to_vec() +} #[derive(Debug)] -pub enum Item { +enum Item { Ignore, + Title(usize,Vec), Text(String), //text, links, formatting are all markdown //arguably, for better control it will be best to turn markdown into its own set of items Image(String), - Gif(String), //can't detect gif from image, has to be handled on front-end Svg(String),// wont' support for a while I think. Video(Vec), Audio(Vec), @@ -25,6 +125,7 @@ pub enum Item { ListItem(Vec), Paragraph(Vec),//gotta replace this with specific items, needlessly flexible Link(String,Vec), + Table(Vec) } @@ -35,7 +136,7 @@ pub fn itemize_content(content: &str) -> Vec { }).collect() } -pub fn get_children(el: &ElementRef) -> Vec{ +fn get_children(el: &ElementRef) -> Vec{ el.children().map(|c|{parse_items(c)}).collect() } @@ -49,6 +150,14 @@ fn parse_items(n: ego_tree::NodeRef<'_,Node>) -> Item{ let tag_name = el.value().name(); let mut item: Item; match tag_name { + "h1" => {return Item::Title(1, get_children(&el))}, + "h2" => {return Item::Title(2, get_children(&el))}, + "h3" => {return Item::Title(3, get_children(&el))}, + "h4" => {return Item::Title(4, get_children(&el))}, + "h5" => {return Item::Title(5, get_children(&el))}, + "h6" => {return Item::Title(6, get_children(&el))}, + "strong" => {return Item::BoldedText(get_children(&el))}, + "em" => {return Item::EmphasisText(get_children(&el))}, "br" => {return Item::Text("\n".to_owned())}, "hr" => {return Item::Text("---".to_owned())} "p" => { @@ -124,15 +233,17 @@ mod tests { */ #[cfg(test)] mod tests { - use core::panic; - use rss::Channel; mod example_data; + use crate::{itemize_content, process_content, tests::example_data::FEEDS}; - use crate::{itemize_content, tests::example_data::FEEDS}; + + fn get_feed(u: &str) -> rss::Channel { + rss::Channel::read_from(u.as_bytes()).unwrap() + } #[test] - fn real_feeds(){ + fn itemize_feeds(){ let _ = FEEDS.map(|u|{ - let feed = rss::Channel::read_from(u.as_bytes()).unwrap(); + let feed = get_feed(u); let results: Vec<_> = feed.items.into_iter().map(|item| { itemize_content(&item.content.unwrap()); }).collect(); @@ -140,4 +251,17 @@ mod tests { println!("Evaluated feed\nScanned {} items without errors",results.len()) }); } + #[test] + fn markdownify_feeds(){ + let _ = FEEDS.map(|u|{ + let feed = get_feed(u); + let results: Vec<_> = feed.items.into_iter().map(|item|{ + process_content(&item.content.unwrap()); + }).collect(); + println!("Processed {} items without errors",results.len()) + }); + } + + + } \ No newline at end of file