extract posts into a separate crate

This commit is contained in:
Denis-Cosmin NUTIU 2024-12-26 12:33:37 +02:00
parent 7a3e69d572
commit 303d8d303e
10 changed files with 45 additions and 29 deletions

View file

@ -4,6 +4,7 @@
<content url="file://$MODULE_DIR$">
<sourceFolder url="file://$MODULE_DIR$/scrapper/src" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
<sourceFolder url="file://$MODULE_DIR$/post/src" isTestSource="false" />
<excludeFolder url="file://$MODULE_DIR$/target" />
</content>
<orderEntry type="inheritedJdk" />

View file

@ -1,5 +1,6 @@
[workspace]
members = [
"post",
"scrapper",
]
resolver = "2"
resolver = "2"

7
post/Cargo.toml Normal file
View file

@ -0,0 +1,7 @@
[package]
name = "post"
version = "0.1.0"
edition = "2021"
[dependencies]
serde = { version = "1.0.216", features = ["derive"] }

3
post/readme.md Normal file
View file

@ -0,0 +1,3 @@
# Post
The posts module holds structs related to the data.

23
post/src/lib.rs Normal file
View file

@ -0,0 +1,23 @@
use serde::{Deserialize, Serialize};
/// NewsPost represents a news post.
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct NewsPost {
/// A URL containing the image of the post.
pub image: Option<String>,
/// The title of the post.
pub title: Option<String>,
/// A summary of the post.
pub summary: Option<String>,
/// A link to the post.
pub link: Option<String>,
/// The author of the post.
pub author: Option<String>,
}
impl NewsPost {
/// Is complete checks if the news post contains the minimum fields.
pub fn is_complete(&self) -> bool {
self.title.is_some() && self.summary.is_some() && self.link.is_some()
}
}

View file

@ -17,6 +17,7 @@ redis = { version = "0.27.6", features = ["tokio-comp"] }
md5 = "0.7.0"
serde = { version = "1.0.216", features = ["derive"] }
serde_json = "1.0.134"
post = {path = "../post"}
[dev-dependencies]
rand = "0.8.5"

View file

@ -1,10 +1,11 @@
use crate::cli::CliArgs;
use crate::redis::RedisService;
use crate::redis_service::RedisService;
use crate::scrapper::gfourmedia::G4Media;
use crate::scrapper::{NewsPost, WebScrapperEngine};
use crate::scrapper::WebScrapperEngine;
use clap::Parser;
use clokwerk::{AsyncScheduler, Interval, TimeUnits};
use log::{debug, error, info};
use post::NewsPost;
use std::sync::atomic::{AtomicBool, Ordering};
use std::sync::mpsc::{Receiver, Sender};
use std::sync::{mpsc, Arc};
@ -12,7 +13,7 @@ use std::time::Duration;
use tokio::task::JoinHandle;
mod cli;
mod redis;
mod redis_service;
mod scrapper;
/// Runs the scheduler in a separated thread.

View file

@ -1,5 +1,5 @@
use crate::scrapper::NewsPost;
use log::error;
use post::NewsPost;
use redis::aio::MultiplexedConnection;
use redis::{AsyncCommands, RedisError};

View file

@ -1,29 +1,7 @@
use serde::{Deserialize, Serialize};
use post::NewsPost;
pub(crate) mod gfourmedia;
/// NewsPost represents a news post.
#[derive(Serialize, Deserialize, Debug, Clone)]
pub struct NewsPost {
/// A URL containing the image of the post.
pub image: Option<String>,
/// The title of the post.
pub title: Option<String>,
/// A summary of the post.
pub summary: Option<String>,
/// A link to the post.
pub link: Option<String>,
/// The author of the post.
pub author: Option<String>,
}
impl NewsPost {
/// Is complete checks if the news post contains the minimum fields.
pub fn is_complete(&self) -> bool {
self.title.is_some() && self.summary.is_some() && self.link.is_some()
}
}
/// Represents a web scrapper which is can be scraped by the engine.
pub(crate) trait ScrappableWebPage {
fn get_url(&self) -> &str;

View file

@ -1,5 +1,6 @@
use crate::scrapper::{NewsPost, ScrappableWebPage};
use crate::scrapper::ScrappableWebPage;
use anyhow::anyhow;
use post::NewsPost;
use scraper::{Html, Selector};
#[derive(Debug)]