Instead of failing silently I fixed parsing logic

This commit is contained in:
Gabriel 2025-07-19 11:40:37 -04:00
parent 4d16431646
commit ac858bdf0a
2 changed files with 62 additions and 30 deletions

View file

@ -15,56 +15,59 @@ pub enum Content {
Ignore
}
//double recursion? This seems dumb.
fn markdownify(item: &Item) -> String{
match markdown_content(item) {
fn markdownify_child(item: &Item) -> String {
let mut result = "".to_owned();
match markdown_content(&item) {
Content::Markdown(s) => {
s.to_owned()
}
_ => {"".to_owned()}
result = result + &s;
},
_ => {}
}
result
}
fn process_children(children: &Vec<Item>) -> String {
let mut result = "".to_owned();
for c in children{
result = result + &markdownify_child(c);
}
result
}
fn markdown_content(item: &Item) -> Content {
let mut markdown = String::new();
match item {
Item::Title(n,t) => {
markdown = markdown + &"#".repeat(*n);
let _ = t.iter().map(|i|{
markdown = "".to_owned() + &markdown + &markdownify(i);
});
Item::Title(n,children) => {
markdown = markdown + &"#".repeat(*n) + " " +&process_children(children);
},
Item::BoldedText(b) => {
let _ = b.iter().map(|i|{
markdown = "**".to_owned() + &markdown + &markdownify(i) + "**";
markdown = "**".to_owned() + &markdown + "**";
});
},
Item::EmphasisText(e) => {
let _ = e.iter().map(|i|{
markdown = "*".to_owned() + &markdown + &markdownify(i) + "*";
markdown = "*".to_owned() + &markdown + "*";
});
}
Item::Text(s) => {
markdown = markdown + s;
},
Item::Link(href, children) => {
markdown = markdown + &markdownify(item);
markdown = markdown + &format!("[{}]({})",process_children(children),href);
}
Item::Paragraph(p) => {
let _ = p.iter().map(|i|{
markdown = "".to_owned() + &markdown + &markdownify(i);
});
Item::Paragraph(children) => {
markdown = markdown + &process_children(children);
}
Item::UnorderedList(u) => {
let _ = u.iter().map(|i|{
markdown = "".to_owned() + &markdown + &markdownify(i);
markdown = "".to_owned() + &markdown ;
});
}
Item::OrderedList(o) => {
let _ = o.iter().map(|i|{
markdown = "".to_owned() + &markdown + &markdownify(i);
markdown = "".to_owned() + &markdown;
});
}
_ => {}
@ -77,14 +80,20 @@ fn media_content(item: &Item) -> Content{
}
pub fn process_content(content: &str) -> Vec<Content> {
let items = itemize_content(content);
pub fn process_content(items: Vec<Item>) -> Vec<Content> {
let mut result: Vec<Content> = Vec::new();
let _ = items.iter().map(|i| {
//println!("Converting {} items into Content",items.len());
for i in &items {
match i {
Item::Title(_,_) => {
result.push(markdown_content(i));
}
Item::Paragraph(children) => {
result.push(markdown_content(i));
},
Item::Link(href,children) => {
result.push(markdown_content(i))
}
Item::UnorderedList(children) => {
result.push(markdown_content(i));
}
@ -104,12 +113,12 @@ pub fn process_content(content: &str) -> Vec<Content> {
result.push(Content::Ignore);
}
}
});
}
result
}
#[derive(Debug)]
#[derive(Debug,Clone)]
enum Item {
Ignore,
Title(usize,Vec<Item>),
@ -147,11 +156,9 @@ fn parse_items(n: ego_tree::NodeRef<'_,Node>) -> Item{
if n.value().is_text(){
return Item::Text((&n.value().as_text().unwrap()).to_string())
}
if n.value().is_element(){
let el = ElementRef::wrap(n).unwrap();
let tag_name = el.value().name();
let mut item: Item;
match tag_name {
"h1" => {return Item::Title(1, get_children(&el))},
"h2" => {return Item::Title(2, get_children(&el))},
@ -237,12 +244,33 @@ mod tests {
#[cfg(test)]
mod tests {
mod example_data;
use crate::{itemize_content, process_content, tests::example_data::FEEDS};
use crate::{itemize_content, process_content, tests::example_data::FEEDS,Content,Item};
fn get_feed(u: &str) -> rss::Channel {
rss::Channel::read_from(u.as_bytes()).unwrap()
}
#[test]
pub fn content_test(){
let example_text = Item::Text("Example.com".to_owned());
let example_link = Item::Link("https://example.com".to_owned(),[example_text].to_vec());
let result = process_content([example_link].to_vec());
println!("Items to content parse result:\n{:#?}",result);
}
#[test]
fn content_display() {
let feed = get_feed(example_data::GABE_ROCKS);
let content: Vec<_> = process_content(
itemize_content(feed.items.first().unwrap().content().unwrap())
);
println!("Content: {:#?}",content)
}
#[test]
fn itemize_feeds(){
let _ = FEEDS.map(|u|{
@ -259,7 +287,9 @@ mod tests {
let _ = FEEDS.map(|u|{
let feed = get_feed(u);
let results: Vec<_> = feed.items.into_iter().map(|item|{
process_content(&item.content.unwrap());
process_content(
itemize_content(&item.content.unwrap())
);
}).collect();
println!("Processed {} items without errors",results.len())
});

View file

@ -3,6 +3,8 @@ pub const FEEDS: [&str; 2] = [
GABE_ROCKS
];
pub const LSN: &str = r#"
<?xml-stylesheet href="/feed-style.xsl" type="text/xsl"?>
<rss version="2.0" xmlns:dc="http://purl.org/dc/elements/1.1/" xmlns:media="http://search.yahoo.com/mrss/" xmlns:content="http://purl.org/rss/1.0/modules/content/" xmlns:podcast="https://github.com/Podcastindex-org/podcast-namespace/blob/main/docs/1.0.md" xmlns:itunes="http://www.itunes.com/dtds/podcast-1.0.dtd">