Use clokwerk for scheduling

This commit is contained in:
Denis-Cosmin Nutiu 2024-12-21 16:24:31 +02:00
parent 1212120a91
commit 99d91ba303
4 changed files with 26 additions and 27 deletions

View file

@ -9,3 +9,4 @@ tokio = { version = "1", features = ["full"] }
anyhow = "1.0" anyhow = "1.0"
reqwest = "0.12.9" reqwest = "0.12.9"
scraper = "0.22.0" scraper = "0.22.0"
clokwerk = "0.4.0"

View file

@ -1,16 +1,27 @@
use crate::scrapper::gfourmedia::G4Media; use crate::scrapper::gfourmedia::G4Media;
use crate::scrapper::WebScrapperEngine; use crate::scrapper::WebScrapperEngine;
use clokwerk::{AsyncScheduler, TimeUnits};
use std::time::Duration;
mod scrapper; mod scrapper;
#[tokio::main] #[tokio::main]
async fn main() -> Result<(), anyhow::Error> { async fn main() -> Result<(), anyhow::Error> {
env_logger::init(); env_logger::init();
println!("Hello, world!"); let mut scheduler = AsyncScheduler::new();
scheduler.every(60.seconds()).run(|| async {
let scrapper = WebScrapperEngine::new(G4Media::default()).await?; let posts = WebScrapperEngine::get_posts(G4Media::default())
let posts = scrapper.get_posts().await?; .await
.expect("failed to get posts");
posts.iter().for_each(|p| println!("{:?}", p)); posts
.iter()
.filter(|p| p.is_complete())
.for_each(|p| println!("{:?}", p));
});
// Manually run the scheduler forever
loop {
scheduler.run_pending().await;
tokio::time::sleep(Duration::from_millis(10)).await;
}
Ok(()) Ok(())
} }

View file

@ -9,8 +9,6 @@ pub struct NewsPost {
pub title: Option<String>, pub title: Option<String>,
/// A summary of the post. /// A summary of the post.
pub summary: Option<String>, pub summary: Option<String>,
/// The content of the post.
pub content: Option<String>,
/// A link to the post. /// A link to the post.
pub link: Option<String>, pub link: Option<String>,
/// The author of the post. /// The author of the post.
@ -31,26 +29,16 @@ pub(crate) trait ScrappableWebPage {
} }
/// The web scraper engine is used to scrape web pages. /// The web scraper engine is used to scrape web pages.
pub struct WebScrapperEngine<P> pub struct WebScrapperEngine;
where
P: ScrappableWebPage,
{
web_page: P,
}
impl<P> WebScrapperEngine<P> impl WebScrapperEngine {
where pub async fn get_posts<P>(web_page: P) -> Result<Vec<NewsPost>, anyhow::Error>
P: ScrappableWebPage, where
{ P: ScrappableWebPage,
/// Creates a new instance of WebScrapperEngine {
pub async fn new(web_page: P) -> Result<Self, anyhow::Error> { let body = reqwest::get(web_page.get_url()).await?.text().await?;
Ok(WebScrapperEngine { web_page })
}
pub async fn get_posts(&self) -> Result<Vec<NewsPost>, anyhow::Error> { let results = web_page.get_posts(body)?;
let body = reqwest::get(self.web_page.get_url()).await?.text().await?;
let results = self.web_page.get_posts(body)?;
Ok(results) Ok(results)
} }
} }

View file

@ -45,7 +45,6 @@ impl ScrappableWebPage for G4Media {
image: None, image: None,
title: None, title: None,
summary: None, summary: None,
content: None,
link: None, link: None,
author: None, author: None,
}; };