Use clokwerk for scheduling

2024-12-21 16:24:31 +02:00 · 2024-12-21 16:24:31 +02:00 · 99d91ba303
commit 99d91ba303
parent 1212120a91
4 changed files with 26 additions and 27 deletions
--- a/Cargo.toml
+++ b/Cargo.toml
@ -9,3 +9,4 @@ tokio = { version = "1", features = ["full"] }
 anyhow = "1.0"
 reqwest = "0.12.9"
 scraper = "0.22.0"
 clokwerk = "0.4.0"
--- a/src/main.rs
+++ b/src/main.rs
@ -1,16 +1,27 @@
 use crate::scrapper::gfourmedia::G4Media;
 use crate::scrapper::WebScrapperEngine;
 use clokwerk::{AsyncScheduler, TimeUnits};
 use std::time::Duration;
 mod scrapper;
 #[tokio::main]
 async fn main() -> Result<(), anyhow::Error> {
    env_logger::init();
-    println!("Hello, world!");
+    let mut scheduler = AsyncScheduler::new();
-
+    scheduler.every(60.seconds()).run(|| async {
-    let scrapper = WebScrapperEngine::new(G4Media::default()).await?;
+        let posts = WebScrapperEngine::get_posts(G4Media::default())
-    let posts = scrapper.get_posts().await?;
+            .await
-
+            .expect("failed to get posts");
-    posts.iter().for_each(|p| println!("{:?}", p));
+        posts
            .iter()
            .filter(|p| p.is_complete())
            .for_each(|p| println!("{:?}", p));
    });
    // Manually run the scheduler forever
    loop {
        scheduler.run_pending().await;
        tokio::time::sleep(Duration::from_millis(10)).await;
    }
    Ok(())
 }
--- a/src/scrapper.rs
+++ b/src/scrapper.rs
@ -9,8 +9,6 @@ pub struct NewsPost {
    pub title: Option<String>,
    /// A summary of the post.
    pub summary: Option<String>,
    /// The content of the post.
    pub content: Option<String>,
    /// A link to the post.
    pub link: Option<String>,
    /// The author of the post.
@ -31,26 +29,16 @@ pub(crate) trait ScrappableWebPage {
 }
 /// The web scraper engine is used to scrape web pages.
-pub struct WebScrapperEngine<P>
+pub struct WebScrapperEngine;
 where
    P: ScrappableWebPage,
 {
    web_page: P,
 }
-impl<P> WebScrapperEngine<P>
+impl WebScrapperEngine {
-where
+    pub async fn get_posts<P>(web_page: P) -> Result<Vec<NewsPost>, anyhow::Error>
-    P: ScrappableWebPage,
+    where
-{
+        P: ScrappableWebPage,
-    /// Creates a new instance of WebScrapperEngine
+    {
-    pub async fn new(web_page: P) -> Result<Self, anyhow::Error> {
+        let body = reqwest::get(web_page.get_url()).await?.text().await?;
        Ok(WebScrapperEngine { web_page })
    }
-    pub async fn get_posts(&self) -> Result<Vec<NewsPost>, anyhow::Error> {
+        let results = web_page.get_posts(body)?;
        let body = reqwest::get(self.web_page.get_url()).await?.text().await?;
        let results = self.web_page.get_posts(body)?;
        Ok(results)
    }
 }
--- a/src/scrapper/gfourmedia.rs
+++ b/src/scrapper/gfourmedia.rs
@ -45,7 +45,6 @@ impl ScrappableWebPage for G4Media {
                image: None,
                title: None,
                summary: None,
                content: None,
                link: None,
                author: None,
            };