Instead of failing silently I fixed parsing logic

This commit is contained in:
Gabriel 2025-07-19 11:40:37 -04:00
parent 4d16431646
commit ac858bdf0a
2 changed files with 62 additions and 30 deletions

View file

@ -15,56 +15,59 @@ pub enum Content {
Ignore Ignore
} }
fn markdownify_child(item: &Item) -> String {
//double recursion? This seems dumb. let mut result = "".to_owned();
fn markdownify(item: &Item) -> String{ match markdown_content(&item) {
match markdown_content(item) {
Content::Markdown(s) => { Content::Markdown(s) => {
s.to_owned() result = result + &s;
} },
_ => {"".to_owned()} _ => {}
} }
result
} }
fn process_children(children: &Vec<Item>) -> String {
let mut result = "".to_owned();
for c in children{
result = result + &markdownify_child(c);
}
result
}
fn markdown_content(item: &Item) -> Content { fn markdown_content(item: &Item) -> Content {
let mut markdown = String::new(); let mut markdown = String::new();
match item { match item {
Item::Title(n,t) => { Item::Title(n,children) => {
markdown = markdown + &"#".repeat(*n); markdown = markdown + &"#".repeat(*n) + " " +&process_children(children);
let _ = t.iter().map(|i|{
markdown = "".to_owned() + &markdown + &markdownify(i);
});
}, },
Item::BoldedText(b) => { Item::BoldedText(b) => {
let _ = b.iter().map(|i|{ let _ = b.iter().map(|i|{
markdown = "**".to_owned() + &markdown + &markdownify(i) + "**"; markdown = "**".to_owned() + &markdown + "**";
}); });
}, },
Item::EmphasisText(e) => { Item::EmphasisText(e) => {
let _ = e.iter().map(|i|{ let _ = e.iter().map(|i|{
markdown = "*".to_owned() + &markdown + &markdownify(i) + "*"; markdown = "*".to_owned() + &markdown + "*";
}); });
} }
Item::Text(s) => { Item::Text(s) => {
markdown = markdown + s; markdown = markdown + s;
}, },
Item::Link(href, children) => { Item::Link(href, children) => {
markdown = markdown + &markdownify(item); markdown = markdown + &format!("[{}]({})",process_children(children),href);
} }
Item::Paragraph(p) => { Item::Paragraph(children) => {
let _ = p.iter().map(|i|{ markdown = markdown + &process_children(children);
markdown = "".to_owned() + &markdown + &markdownify(i);
});
} }
Item::UnorderedList(u) => { Item::UnorderedList(u) => {
let _ = u.iter().map(|i|{ let _ = u.iter().map(|i|{
markdown = "".to_owned() + &markdown + &markdownify(i); markdown = "".to_owned() + &markdown ;
}); });
} }
Item::OrderedList(o) => { Item::OrderedList(o) => {
let _ = o.iter().map(|i|{ let _ = o.iter().map(|i|{
markdown = "".to_owned() + &markdown + &markdownify(i); markdown = "".to_owned() + &markdown;
}); });
} }
_ => {} _ => {}
@ -77,14 +80,20 @@ fn media_content(item: &Item) -> Content{
} }
pub fn process_content(content: &str) -> Vec<Content> { pub fn process_content(items: Vec<Item>) -> Vec<Content> {
let items = itemize_content(content);
let mut result: Vec<Content> = Vec::new(); let mut result: Vec<Content> = Vec::new();
let _ = items.iter().map(|i| { //println!("Converting {} items into Content",items.len());
for i in &items {
match i { match i {
Item::Title(_,_) => {
result.push(markdown_content(i));
}
Item::Paragraph(children) => { Item::Paragraph(children) => {
result.push(markdown_content(i)); result.push(markdown_content(i));
}, },
Item::Link(href,children) => {
result.push(markdown_content(i))
}
Item::UnorderedList(children) => { Item::UnorderedList(children) => {
result.push(markdown_content(i)); result.push(markdown_content(i));
} }
@ -104,12 +113,12 @@ pub fn process_content(content: &str) -> Vec<Content> {
result.push(Content::Ignore); result.push(Content::Ignore);
} }
} }
}); }
result result
} }
#[derive(Debug)] #[derive(Debug,Clone)]
enum Item { enum Item {
Ignore, Ignore,
Title(usize,Vec<Item>), Title(usize,Vec<Item>),
@ -147,11 +156,9 @@ fn parse_items(n: ego_tree::NodeRef<'_,Node>) -> Item{
if n.value().is_text(){ if n.value().is_text(){
return Item::Text((&n.value().as_text().unwrap()).to_string()) return Item::Text((&n.value().as_text().unwrap()).to_string())
} }
if n.value().is_element(){ if n.value().is_element(){
let el = ElementRef::wrap(n).unwrap(); let el = ElementRef::wrap(n).unwrap();
let tag_name = el.value().name(); let tag_name = el.value().name();
let mut item: Item;
match tag_name { match tag_name {
"h1" => {return Item::Title(1, get_children(&el))}, "h1" => {return Item::Title(1, get_children(&el))},
"h2" => {return Item::Title(2, get_children(&el))}, "h2" => {return Item::Title(2, get_children(&el))},
@ -237,12 +244,33 @@ mod tests {
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
mod example_data; mod example_data;
use crate::{itemize_content, process_content, tests::example_data::FEEDS}; use crate::{itemize_content, process_content, tests::example_data::FEEDS,Content,Item};
fn get_feed(u: &str) -> rss::Channel { fn get_feed(u: &str) -> rss::Channel {
rss::Channel::read_from(u.as_bytes()).unwrap() rss::Channel::read_from(u.as_bytes()).unwrap()
} }
#[test]
pub fn content_test(){
let example_text = Item::Text("Example.com".to_owned());
let example_link = Item::Link("https://example.com".to_owned(),[example_text].to_vec());
let result = process_content([example_link].to_vec());
println!("Items to content parse result:\n{:#?}",result);
}
#[test]
fn content_display() {
let feed = get_feed(example_data::GABE_ROCKS);
let content: Vec<_> = process_content(
itemize_content(feed.items.first().unwrap().content().unwrap())
);
println!("Content: {:#?}",content)
}
#[test] #[test]
fn itemize_feeds(){ fn itemize_feeds(){
let _ = FEEDS.map(|u|{ let _ = FEEDS.map(|u|{
@ -259,7 +287,9 @@ mod tests {
let _ = FEEDS.map(|u|{ let _ = FEEDS.map(|u|{
let feed = get_feed(u); let feed = get_feed(u);
let results: Vec<_> = feed.items.into_iter().map(|item|{ let results: Vec<_> = feed.items.into_iter().map(|item|{
process_content(&item.content.unwrap()); process_content(
itemize_content(&item.content.unwrap())
);
}).collect(); }).collect();
println!("Processed {} items without errors",results.len()) println!("Processed {} items without errors",results.len())
}); });

View file

@ -3,6 +3,8 @@ pub const FEEDS: [&str; 2] = [
GABE_ROCKS GABE_ROCKS
]; ];
pub const LSN: &str = r#" pub const LSN: &str = r#"
<?xml-stylesheet href="/feed-style.xsl" type="text/xsl"?> <?xml-stylesheet href="/feed-style.xsl" type="text/xsl"?>
<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:media="http://search.yahoo.com/mrss/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:podcast="https://github.com/Podcastindex-org/podcast-namespace/blob/main/docs/1.0.md" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd"> <rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:media="http://search.yahoo.com/mrss/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:podcast="https://github.com/Podcastindex-org/podcast-namespace/blob/main/docs/1.0.md" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">