From 303d8d303ee0a51d20fc9ab55edf9ef28cc7e3ae Mon Sep 17 00:00:00 2001 From: Denis-Cosmin NUTIU Date: Thu, 26 Dec 2024 12:33:37 +0200 Subject: [PATCH] extract posts into a separate crate --- .idea/bluesky-bot.iml | 1 + Cargo.toml | 3 ++- post/Cargo.toml | 7 ++++++ post/readme.md | 3 +++ post/src/lib.rs | 23 ++++++++++++++++++++ scrapper/Cargo.toml | 1 + scrapper/src/main.rs | 7 +++--- scrapper/src/{redis.rs => redis_service.rs} | 2 +- scrapper/src/scrapper.rs | 24 +-------------------- scrapper/src/scrapper/gfourmedia.rs | 3 ++- 10 files changed, 45 insertions(+), 29 deletions(-) create mode 100644 post/Cargo.toml create mode 100644 post/readme.md create mode 100644 post/src/lib.rs rename scrapper/src/{redis.rs => redis_service.rs} (99%) diff --git a/.idea/bluesky-bot.iml b/.idea/bluesky-bot.iml index 077945f..e7ffe8a 100644 --- a/.idea/bluesky-bot.iml +++ b/.idea/bluesky-bot.iml @@ -4,6 +4,7 @@ + diff --git a/Cargo.toml b/Cargo.toml index a73ac4b..379b2d3 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -1,5 +1,6 @@ [workspace] members = [ + "post", "scrapper", ] -resolver = "2" \ No newline at end of file +resolver = "2" diff --git a/post/Cargo.toml b/post/Cargo.toml new file mode 100644 index 0000000..184ca53 --- /dev/null +++ b/post/Cargo.toml @@ -0,0 +1,7 @@ +[package] +name = "post" +version = "0.1.0" +edition = "2021" + +[dependencies] +serde = { version = "1.0.216", features = ["derive"] } \ No newline at end of file diff --git a/post/readme.md b/post/readme.md new file mode 100644 index 0000000..5b143f4 --- /dev/null +++ b/post/readme.md @@ -0,0 +1,3 @@ +# Post + +The posts module holds structs related to the data. \ No newline at end of file diff --git a/post/src/lib.rs b/post/src/lib.rs new file mode 100644 index 0000000..43e3b2d --- /dev/null +++ b/post/src/lib.rs @@ -0,0 +1,23 @@ +use serde::{Deserialize, Serialize}; + +/// NewsPost represents a news post. +#[derive(Serialize, Deserialize, Debug, Clone)] +pub struct NewsPost { + /// A URL containing the image of the post. + pub image: Option, + /// The title of the post. + pub title: Option, + /// A summary of the post. + pub summary: Option, + /// A link to the post. + pub link: Option, + /// The author of the post. + pub author: Option, +} + +impl NewsPost { + /// Is complete checks if the news post contains the minimum fields. + pub fn is_complete(&self) -> bool { + self.title.is_some() && self.summary.is_some() && self.link.is_some() + } +} diff --git a/scrapper/Cargo.toml b/scrapper/Cargo.toml index fb03414..ccb8cca 100644 --- a/scrapper/Cargo.toml +++ b/scrapper/Cargo.toml @@ -17,6 +17,7 @@ redis = { version = "0.27.6", features = ["tokio-comp"] } md5 = "0.7.0" serde = { version = "1.0.216", features = ["derive"] } serde_json = "1.0.134" +post = {path = "../post"} [dev-dependencies] rand = "0.8.5" diff --git a/scrapper/src/main.rs b/scrapper/src/main.rs index 42ead8a..11adad2 100644 --- a/scrapper/src/main.rs +++ b/scrapper/src/main.rs @@ -1,10 +1,11 @@ use crate::cli::CliArgs; -use crate::redis::RedisService; +use crate::redis_service::RedisService; use crate::scrapper::gfourmedia::G4Media; -use crate::scrapper::{NewsPost, WebScrapperEngine}; +use crate::scrapper::WebScrapperEngine; use clap::Parser; use clokwerk::{AsyncScheduler, Interval, TimeUnits}; use log::{debug, error, info}; +use post::NewsPost; use std::sync::atomic::{AtomicBool, Ordering}; use std::sync::mpsc::{Receiver, Sender}; use std::sync::{mpsc, Arc}; @@ -12,7 +13,7 @@ use std::time::Duration; use tokio::task::JoinHandle; mod cli; -mod redis; +mod redis_service; mod scrapper; /// Runs the scheduler in a separated thread. diff --git a/scrapper/src/redis.rs b/scrapper/src/redis_service.rs similarity index 99% rename from scrapper/src/redis.rs rename to scrapper/src/redis_service.rs index cf83fa4..8dfca2a 100644 --- a/scrapper/src/redis.rs +++ b/scrapper/src/redis_service.rs @@ -1,5 +1,5 @@ -use crate::scrapper::NewsPost; use log::error; +use post::NewsPost; use redis::aio::MultiplexedConnection; use redis::{AsyncCommands, RedisError}; diff --git a/scrapper/src/scrapper.rs b/scrapper/src/scrapper.rs index 0c6cbf6..b69dd78 100644 --- a/scrapper/src/scrapper.rs +++ b/scrapper/src/scrapper.rs @@ -1,29 +1,7 @@ -use serde::{Deserialize, Serialize}; +use post::NewsPost; pub(crate) mod gfourmedia; -/// NewsPost represents a news post. -#[derive(Serialize, Deserialize, Debug, Clone)] -pub struct NewsPost { - /// A URL containing the image of the post. - pub image: Option, - /// The title of the post. - pub title: Option, - /// A summary of the post. - pub summary: Option, - /// A link to the post. - pub link: Option, - /// The author of the post. - pub author: Option, -} - -impl NewsPost { - /// Is complete checks if the news post contains the minimum fields. - pub fn is_complete(&self) -> bool { - self.title.is_some() && self.summary.is_some() && self.link.is_some() - } -} - /// Represents a web scrapper which is can be scraped by the engine. pub(crate) trait ScrappableWebPage { fn get_url(&self) -> &str; diff --git a/scrapper/src/scrapper/gfourmedia.rs b/scrapper/src/scrapper/gfourmedia.rs index 0e5fd8c..cf6a1d1 100644 --- a/scrapper/src/scrapper/gfourmedia.rs +++ b/scrapper/src/scrapper/gfourmedia.rs @@ -1,5 +1,6 @@ -use crate::scrapper::{NewsPost, ScrappableWebPage}; +use crate::scrapper::ScrappableWebPage; use anyhow::anyhow; +use post::NewsPost; use scraper::{Html, Selector}; #[derive(Debug)]