From 99d91ba303cde6bd2b75d3380e2d0bc31fa40ee0 Mon Sep 17 00:00:00 2001 From: Denis Nutiu Date: Sat, 21 Dec 2024 16:24:31 +0200 Subject: [PATCH] Use clokwerk for scheduling --- Cargo.toml | 1 + src/main.rs | 23 +++++++++++++++++------ src/scrapper.rs | 28 ++++++++-------------------- src/scrapper/gfourmedia.rs | 1 - 4 files changed, 26 insertions(+), 27 deletions(-) diff --git a/Cargo.toml b/Cargo.toml index 4f51417..4e1150f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -9,3 +9,4 @@ tokio = { version = "1", features = ["full"] } anyhow = "1.0" reqwest = "0.12.9" scraper = "0.22.0" +clokwerk = "0.4.0" diff --git a/src/main.rs b/src/main.rs index 34d13bb..56dc51d 100644 --- a/src/main.rs +++ b/src/main.rs @@ -1,16 +1,27 @@ use crate::scrapper::gfourmedia::G4Media; use crate::scrapper::WebScrapperEngine; +use clokwerk::{AsyncScheduler, TimeUnits}; +use std::time::Duration; mod scrapper; #[tokio::main] async fn main() -> Result<(), anyhow::Error> { env_logger::init(); - println!("Hello, world!"); - - let scrapper = WebScrapperEngine::new(G4Media::default()).await?; - let posts = scrapper.get_posts().await?; - - posts.iter().for_each(|p| println!("{:?}", p)); + let mut scheduler = AsyncScheduler::new(); + scheduler.every(60.seconds()).run(|| async { + let posts = WebScrapperEngine::get_posts(G4Media::default()) + .await + .expect("failed to get posts"); + posts + .iter() + .filter(|p| p.is_complete()) + .for_each(|p| println!("{:?}", p)); + }); + // Manually run the scheduler forever + loop { + scheduler.run_pending().await; + tokio::time::sleep(Duration::from_millis(10)).await; + } Ok(()) } diff --git a/src/scrapper.rs b/src/scrapper.rs index 94be1f9..b65d6c8 100644 --- a/src/scrapper.rs +++ b/src/scrapper.rs @@ -9,8 +9,6 @@ pub struct NewsPost { pub title: Option, /// A summary of the post. pub summary: Option, - /// The content of the post. - pub content: Option, /// A link to the post. pub link: Option, /// The author of the post. @@ -31,26 +29,16 @@ pub(crate) trait ScrappableWebPage { } /// The web scraper engine is used to scrape web pages. -pub struct WebScrapperEngine

-where - P: ScrappableWebPage, -{ - web_page: P, -} +pub struct WebScrapperEngine; -impl

WebScrapperEngine

-where - P: ScrappableWebPage, -{ - /// Creates a new instance of WebScrapperEngine - pub async fn new(web_page: P) -> Result { - Ok(WebScrapperEngine { web_page }) - } +impl WebScrapperEngine { + pub async fn get_posts

(web_page: P) -> Result, anyhow::Error> + where + P: ScrappableWebPage, + { + let body = reqwest::get(web_page.get_url()).await?.text().await?; - pub async fn get_posts(&self) -> Result, anyhow::Error> { - let body = reqwest::get(self.web_page.get_url()).await?.text().await?; - - let results = self.web_page.get_posts(body)?; + let results = web_page.get_posts(body)?; Ok(results) } } diff --git a/src/scrapper/gfourmedia.rs b/src/scrapper/gfourmedia.rs index d3efdac..0e5fd8c 100644 --- a/src/scrapper/gfourmedia.rs +++ b/src/scrapper/gfourmedia.rs @@ -45,7 +45,6 @@ impl ScrappableWebPage for G4Media { image: None, title: None, summary: None, - content: None, link: None, author: None, };