Use clokwerk for scheduling
This commit is contained in:
parent
1212120a91
commit
99d91ba303
4 changed files with 26 additions and 27 deletions
|
@ -9,3 +9,4 @@ tokio = { version = "1", features = ["full"] }
|
||||||
anyhow = "1.0"
|
anyhow = "1.0"
|
||||||
reqwest = "0.12.9"
|
reqwest = "0.12.9"
|
||||||
scraper = "0.22.0"
|
scraper = "0.22.0"
|
||||||
|
clokwerk = "0.4.0"
|
||||||
|
|
23
src/main.rs
23
src/main.rs
|
@ -1,16 +1,27 @@
|
||||||
use crate::scrapper::gfourmedia::G4Media;
|
use crate::scrapper::gfourmedia::G4Media;
|
||||||
use crate::scrapper::WebScrapperEngine;
|
use crate::scrapper::WebScrapperEngine;
|
||||||
|
use clokwerk::{AsyncScheduler, TimeUnits};
|
||||||
|
use std::time::Duration;
|
||||||
mod scrapper;
|
mod scrapper;
|
||||||
|
|
||||||
#[tokio::main]
|
#[tokio::main]
|
||||||
async fn main() -> Result<(), anyhow::Error> {
|
async fn main() -> Result<(), anyhow::Error> {
|
||||||
env_logger::init();
|
env_logger::init();
|
||||||
println!("Hello, world!");
|
let mut scheduler = AsyncScheduler::new();
|
||||||
|
scheduler.every(60.seconds()).run(|| async {
|
||||||
let scrapper = WebScrapperEngine::new(G4Media::default()).await?;
|
let posts = WebScrapperEngine::get_posts(G4Media::default())
|
||||||
let posts = scrapper.get_posts().await?;
|
.await
|
||||||
|
.expect("failed to get posts");
|
||||||
posts.iter().for_each(|p| println!("{:?}", p));
|
posts
|
||||||
|
.iter()
|
||||||
|
.filter(|p| p.is_complete())
|
||||||
|
.for_each(|p| println!("{:?}", p));
|
||||||
|
});
|
||||||
|
// Manually run the scheduler forever
|
||||||
|
loop {
|
||||||
|
scheduler.run_pending().await;
|
||||||
|
tokio::time::sleep(Duration::from_millis(10)).await;
|
||||||
|
}
|
||||||
|
|
||||||
Ok(())
|
Ok(())
|
||||||
}
|
}
|
||||||
|
|
|
@ -9,8 +9,6 @@ pub struct NewsPost {
|
||||||
pub title: Option<String>,
|
pub title: Option<String>,
|
||||||
/// A summary of the post.
|
/// A summary of the post.
|
||||||
pub summary: Option<String>,
|
pub summary: Option<String>,
|
||||||
/// The content of the post.
|
|
||||||
pub content: Option<String>,
|
|
||||||
/// A link to the post.
|
/// A link to the post.
|
||||||
pub link: Option<String>,
|
pub link: Option<String>,
|
||||||
/// The author of the post.
|
/// The author of the post.
|
||||||
|
@ -31,26 +29,16 @@ pub(crate) trait ScrappableWebPage {
|
||||||
}
|
}
|
||||||
|
|
||||||
/// The web scraper engine is used to scrape web pages.
|
/// The web scraper engine is used to scrape web pages.
|
||||||
pub struct WebScrapperEngine<P>
|
pub struct WebScrapperEngine;
|
||||||
|
|
||||||
|
impl WebScrapperEngine {
|
||||||
|
pub async fn get_posts<P>(web_page: P) -> Result<Vec<NewsPost>, anyhow::Error>
|
||||||
where
|
where
|
||||||
P: ScrappableWebPage,
|
P: ScrappableWebPage,
|
||||||
{
|
{
|
||||||
web_page: P,
|
let body = reqwest::get(web_page.get_url()).await?.text().await?;
|
||||||
}
|
|
||||||
|
|
||||||
impl<P> WebScrapperEngine<P>
|
let results = web_page.get_posts(body)?;
|
||||||
where
|
|
||||||
P: ScrappableWebPage,
|
|
||||||
{
|
|
||||||
/// Creates a new instance of WebScrapperEngine
|
|
||||||
pub async fn new(web_page: P) -> Result<Self, anyhow::Error> {
|
|
||||||
Ok(WebScrapperEngine { web_page })
|
|
||||||
}
|
|
||||||
|
|
||||||
pub async fn get_posts(&self) -> Result<Vec<NewsPost>, anyhow::Error> {
|
|
||||||
let body = reqwest::get(self.web_page.get_url()).await?.text().await?;
|
|
||||||
|
|
||||||
let results = self.web_page.get_posts(body)?;
|
|
||||||
Ok(results)
|
Ok(results)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
@ -45,7 +45,6 @@ impl ScrappableWebPage for G4Media {
|
||||||
image: None,
|
image: None,
|
||||||
title: None,
|
title: None,
|
||||||
summary: None,
|
summary: None,
|
||||||
content: None,
|
|
||||||
link: None,
|
link: None,
|
||||||
author: None,
|
author: None,
|
||||||
};
|
};
|
||||||
|
|
Loading…
Reference in a new issue