Use clokwerk for scheduling

2024-12-21 16:24:31 +02:00 · 2024-12-21 16:24:31 +02:00 · 99d91ba303
commit 99d91ba303
parent 1212120a91
4 changed files with 26 additions and 27 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -9,3 +9,4 @@ tokio = { version = "1", features = ["full"] }
 anyhow = "1.0"
 reqwest = "0.12.9"
 scraper = "0.22.0"
+clokwerk = "0.4.0"
--- a/src/main.rs
+++ b/src/main.rs
@ -1,16 +1,27 @@
 use crate::scrapper::gfourmedia::G4Media;
 use crate::scrapper::WebScrapperEngine;
+use clokwerk::{AsyncScheduler, TimeUnits};
+use std::time::Duration;
 mod scrapper;

 #[tokio::main]
 async fn main() -> Result<(), anyhow::Error> {
    env_logger::init();
-    println!("Hello, world!");
-
-    let scrapper = WebScrapperEngine::new(G4Media::default()).await?;
-    let posts = scrapper.get_posts().await?;
-
-    posts.iter().for_each(|p| println!("{:?}", p));
+    let mut scheduler = AsyncScheduler::new();
+    scheduler.every(60.seconds()).run(|| async {
+        let posts = WebScrapperEngine::get_posts(G4Media::default())
+            .await
+            .expect("failed to get posts");
+        posts
+            .iter()
+            .filter(|p| p.is_complete())
+            .for_each(|p| println!("{:?}", p));
+    });
+    // Manually run the scheduler forever
+    loop {
+        scheduler.run_pending().await;
+        tokio::time::sleep(Duration::from_millis(10)).await;
+    }

    Ok(())
 }
--- a/src/scrapper.rs
+++ b/src/scrapper.rs
@ -9,8 +9,6 @@ pub struct NewsPost {
    pub title: Option<String>,
    /// A summary of the post.
    pub summary: Option<String>,
-    /// The content of the post.
-    pub content: Option<String>,
    /// A link to the post.
    pub link: Option<String>,
    /// The author of the post.
@ -31,26 +29,16 @@ pub(crate) trait ScrappableWebPage {
 }

 /// The web scraper engine is used to scrape web pages.
-pub struct WebScrapperEngine<P>
-where
-    P: ScrappableWebPage,
-{
-    web_page: P,
-}
+pub struct WebScrapperEngine;

-impl<P> WebScrapperEngine<P>
-where
-    P: ScrappableWebPage,
-{
-    /// Creates a new instance of WebScrapperEngine
-    pub async fn new(web_page: P) -> Result<Self, anyhow::Error> {
-        Ok(WebScrapperEngine { web_page })
-    }
+impl WebScrapperEngine {
+    pub async fn get_posts<P>(web_page: P) -> Result<Vec<NewsPost>, anyhow::Error>
+    where
+        P: ScrappableWebPage,
+    {
+        let body = reqwest::get(web_page.get_url()).await?.text().await?;

-    pub async fn get_posts(&self) -> Result<Vec<NewsPost>, anyhow::Error> {
-        let body = reqwest::get(self.web_page.get_url()).await?.text().await?;
-
-        let results = self.web_page.get_posts(body)?;
+        let results = web_page.get_posts(body)?;
        Ok(results)
    }
 }
--- a/src/scrapper/gfourmedia.rs
+++ b/src/scrapper/gfourmedia.rs
@ -45,7 +45,6 @@ impl ScrappableWebPage for G4Media {
                image: None,
                title: None,
                summary: None,
-                content: None,
                link: None,
                author: None,
            };