Use clokwerk for scheduling
This commit is contained in:
parent
1212120a91
commit
99d91ba303
4 changed files with 26 additions and 27 deletions
|
@ -9,3 +9,4 @@ tokio = { version = "1", features = ["full"] }
|
|||
anyhow = "1.0"
|
||||
reqwest = "0.12.9"
|
||||
scraper = "0.22.0"
|
||||
clokwerk = "0.4.0"
|
||||
|
|
23
src/main.rs
23
src/main.rs
|
@ -1,16 +1,27 @@
|
|||
use crate::scrapper::gfourmedia::G4Media;
|
||||
use crate::scrapper::WebScrapperEngine;
|
||||
use clokwerk::{AsyncScheduler, TimeUnits};
|
||||
use std::time::Duration;
|
||||
mod scrapper;
|
||||
|
||||
#[tokio::main]
|
||||
async fn main() -> Result<(), anyhow::Error> {
|
||||
env_logger::init();
|
||||
println!("Hello, world!");
|
||||
|
||||
let scrapper = WebScrapperEngine::new(G4Media::default()).await?;
|
||||
let posts = scrapper.get_posts().await?;
|
||||
|
||||
posts.iter().for_each(|p| println!("{:?}", p));
|
||||
let mut scheduler = AsyncScheduler::new();
|
||||
scheduler.every(60.seconds()).run(|| async {
|
||||
let posts = WebScrapperEngine::get_posts(G4Media::default())
|
||||
.await
|
||||
.expect("failed to get posts");
|
||||
posts
|
||||
.iter()
|
||||
.filter(|p| p.is_complete())
|
||||
.for_each(|p| println!("{:?}", p));
|
||||
});
|
||||
// Manually run the scheduler forever
|
||||
loop {
|
||||
scheduler.run_pending().await;
|
||||
tokio::time::sleep(Duration::from_millis(10)).await;
|
||||
}
|
||||
|
||||
Ok(())
|
||||
}
|
||||
|
|
|
@ -9,8 +9,6 @@ pub struct NewsPost {
|
|||
pub title: Option<String>,
|
||||
/// A summary of the post.
|
||||
pub summary: Option<String>,
|
||||
/// The content of the post.
|
||||
pub content: Option<String>,
|
||||
/// A link to the post.
|
||||
pub link: Option<String>,
|
||||
/// The author of the post.
|
||||
|
@ -31,26 +29,16 @@ pub(crate) trait ScrappableWebPage {
|
|||
}
|
||||
|
||||
/// The web scraper engine is used to scrape web pages.
|
||||
pub struct WebScrapperEngine<P>
|
||||
where
|
||||
P: ScrappableWebPage,
|
||||
{
|
||||
web_page: P,
|
||||
}
|
||||
pub struct WebScrapperEngine;
|
||||
|
||||
impl<P> WebScrapperEngine<P>
|
||||
where
|
||||
P: ScrappableWebPage,
|
||||
{
|
||||
/// Creates a new instance of WebScrapperEngine
|
||||
pub async fn new(web_page: P) -> Result<Self, anyhow::Error> {
|
||||
Ok(WebScrapperEngine { web_page })
|
||||
}
|
||||
impl WebScrapperEngine {
|
||||
pub async fn get_posts<P>(web_page: P) -> Result<Vec<NewsPost>, anyhow::Error>
|
||||
where
|
||||
P: ScrappableWebPage,
|
||||
{
|
||||
let body = reqwest::get(web_page.get_url()).await?.text().await?;
|
||||
|
||||
pub async fn get_posts(&self) -> Result<Vec<NewsPost>, anyhow::Error> {
|
||||
let body = reqwest::get(self.web_page.get_url()).await?.text().await?;
|
||||
|
||||
let results = self.web_page.get_posts(body)?;
|
||||
let results = web_page.get_posts(body)?;
|
||||
Ok(results)
|
||||
}
|
||||
}
|
||||
|
|
|
@ -45,7 +45,6 @@ impl ScrappableWebPage for G4Media {
|
|||
image: None,
|
||||
title: None,
|
||||
summary: None,
|
||||
content: None,
|
||||
link: None,
|
||||
author: None,
|
||||
};
|
||||
|
|
Loading…
Reference in a new issue