Use clokwerk for scheduling

This commit is contained in:
Denis-Cosmin Nutiu 2024-12-21 16:24:31 +02:00
parent 1212120a91
commit 99d91ba303
4 changed files with 26 additions and 27 deletions

View file

@ -9,3 +9,4 @@ tokio = { version = "1", features = ["full"] }
anyhow = "1.0"
reqwest = "0.12.9"
scraper = "0.22.0"
clokwerk = "0.4.0"

View file

@ -1,16 +1,27 @@
use crate::scrapper::gfourmedia::G4Media;
use crate::scrapper::WebScrapperEngine;
use clokwerk::{AsyncScheduler, TimeUnits};
use std::time::Duration;
mod scrapper;
#[tokio::main]
async fn main() -> Result<(), anyhow::Error> {
env_logger::init();
println!("Hello, world!");
let scrapper = WebScrapperEngine::new(G4Media::default()).await?;
let posts = scrapper.get_posts().await?;
posts.iter().for_each(|p| println!("{:?}", p));
let mut scheduler = AsyncScheduler::new();
scheduler.every(60.seconds()).run(|| async {
let posts = WebScrapperEngine::get_posts(G4Media::default())
.await
.expect("failed to get posts");
posts
.iter()
.filter(|p| p.is_complete())
.for_each(|p| println!("{:?}", p));
});
// Manually run the scheduler forever
loop {
scheduler.run_pending().await;
tokio::time::sleep(Duration::from_millis(10)).await;
}
Ok(())
}

View file

@ -9,8 +9,6 @@ pub struct NewsPost {
pub title: Option<String>,
/// A summary of the post.
pub summary: Option<String>,
/// The content of the post.
pub content: Option<String>,
/// A link to the post.
pub link: Option<String>,
/// The author of the post.
@ -31,26 +29,16 @@ pub(crate) trait ScrappableWebPage {
}
/// The web scraper engine is used to scrape web pages.
pub struct WebScrapperEngine<P>
where
P: ScrappableWebPage,
{
web_page: P,
}
pub struct WebScrapperEngine;
impl<P> WebScrapperEngine<P>
where
P: ScrappableWebPage,
{
/// Creates a new instance of WebScrapperEngine
pub async fn new(web_page: P) -> Result<Self, anyhow::Error> {
Ok(WebScrapperEngine { web_page })
}
impl WebScrapperEngine {
pub async fn get_posts<P>(web_page: P) -> Result<Vec<NewsPost>, anyhow::Error>
where
P: ScrappableWebPage,
{
let body = reqwest::get(web_page.get_url()).await?.text().await?;
pub async fn get_posts(&self) -> Result<Vec<NewsPost>, anyhow::Error> {
let body = reqwest::get(self.web_page.get_url()).await?.text().await?;
let results = self.web_page.get_posts(body)?;
let results = web_page.get_posts(body)?;
Ok(results)
}
}

View file

@ -45,7 +45,6 @@ impl ScrappableWebPage for G4Media {
image: None,
title: None,
summary: None,
content: None,
link: None,
author: None,
};