extract posts into a separate crate
This commit is contained in:
parent
7a3e69d572
commit
303d8d303e
10 changed files with 45 additions and 29 deletions
|
@ -4,6 +4,7 @@
|
||||||
<content url="file://$MODULE_DIR$">
|
<content url="file://$MODULE_DIR$">
|
||||||
<sourceFolder url="file://$MODULE_DIR$/scrapper/src" isTestSource="false" />
|
<sourceFolder url="file://$MODULE_DIR$/scrapper/src" isTestSource="false" />
|
||||||
<sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
|
<sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
|
||||||
|
<sourceFolder url="file://$MODULE_DIR$/post/src" isTestSource="false" />
|
||||||
<excludeFolder url="file://$MODULE_DIR$/target" />
|
<excludeFolder url="file://$MODULE_DIR$/target" />
|
||||||
</content>
|
</content>
|
||||||
<orderEntry type="inheritedJdk" />
|
<orderEntry type="inheritedJdk" />
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
[workspace]
|
[workspace]
|
||||||
members = [
|
members = [
|
||||||
|
"post",
|
||||||
"scrapper",
|
"scrapper",
|
||||||
]
|
]
|
||||||
resolver = "2"
|
resolver = "2"
|
||||||
|
|
7
post/Cargo.toml
Normal file
7
post/Cargo.toml
Normal file
|
@ -0,0 +1,7 @@
|
||||||
|
[package]
|
||||||
|
name = "post"
|
||||||
|
version = "0.1.0"
|
||||||
|
edition = "2021"
|
||||||
|
|
||||||
|
[dependencies]
|
||||||
|
serde = { version = "1.0.216", features = ["derive"] }
|
3
post/readme.md
Normal file
3
post/readme.md
Normal file
|
@ -0,0 +1,3 @@
|
||||||
|
# Post
|
||||||
|
|
||||||
|
The posts module holds structs related to the data.
|
23
post/src/lib.rs
Normal file
23
post/src/lib.rs
Normal file
|
@ -0,0 +1,23 @@
|
||||||
|
use serde::{Deserialize, Serialize};
|
||||||
|
|
||||||
|
/// NewsPost represents a news post.
|
||||||
|
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||||
|
pub struct NewsPost {
|
||||||
|
/// A URL containing the image of the post.
|
||||||
|
pub image: Option<String>,
|
||||||
|
/// The title of the post.
|
||||||
|
pub title: Option<String>,
|
||||||
|
/// A summary of the post.
|
||||||
|
pub summary: Option<String>,
|
||||||
|
/// A link to the post.
|
||||||
|
pub link: Option<String>,
|
||||||
|
/// The author of the post.
|
||||||
|
pub author: Option<String>,
|
||||||
|
}
|
||||||
|
|
||||||
|
impl NewsPost {
|
||||||
|
/// Is complete checks if the news post contains the minimum fields.
|
||||||
|
pub fn is_complete(&self) -> bool {
|
||||||
|
self.title.is_some() && self.summary.is_some() && self.link.is_some()
|
||||||
|
}
|
||||||
|
}
|
|
@ -17,6 +17,7 @@ redis = { version = "0.27.6", features = ["tokio-comp"] }
|
||||||
md5 = "0.7.0"
|
md5 = "0.7.0"
|
||||||
serde = { version = "1.0.216", features = ["derive"] }
|
serde = { version = "1.0.216", features = ["derive"] }
|
||||||
serde_json = "1.0.134"
|
serde_json = "1.0.134"
|
||||||
|
post = {path = "../post"}
|
||||||
|
|
||||||
[dev-dependencies]
|
[dev-dependencies]
|
||||||
rand = "0.8.5"
|
rand = "0.8.5"
|
||||||
|
|
|
@ -1,10 +1,11 @@
|
||||||
use crate::cli::CliArgs;
|
use crate::cli::CliArgs;
|
||||||
use crate::redis::RedisService;
|
use crate::redis_service::RedisService;
|
||||||
use crate::scrapper::gfourmedia::G4Media;
|
use crate::scrapper::gfourmedia::G4Media;
|
||||||
use crate::scrapper::{NewsPost, WebScrapperEngine};
|
use crate::scrapper::WebScrapperEngine;
|
||||||
use clap::Parser;
|
use clap::Parser;
|
||||||
use clokwerk::{AsyncScheduler, Interval, TimeUnits};
|
use clokwerk::{AsyncScheduler, Interval, TimeUnits};
|
||||||
use log::{debug, error, info};
|
use log::{debug, error, info};
|
||||||
|
use post::NewsPost;
|
||||||
use std::sync::atomic::{AtomicBool, Ordering};
|
use std::sync::atomic::{AtomicBool, Ordering};
|
||||||
use std::sync::mpsc::{Receiver, Sender};
|
use std::sync::mpsc::{Receiver, Sender};
|
||||||
use std::sync::{mpsc, Arc};
|
use std::sync::{mpsc, Arc};
|
||||||
|
@ -12,7 +13,7 @@ use std::time::Duration;
|
||||||
use tokio::task::JoinHandle;
|
use tokio::task::JoinHandle;
|
||||||
|
|
||||||
mod cli;
|
mod cli;
|
||||||
mod redis;
|
mod redis_service;
|
||||||
mod scrapper;
|
mod scrapper;
|
||||||
|
|
||||||
/// Runs the scheduler in a separated thread.
|
/// Runs the scheduler in a separated thread.
|
||||||
|
|
|
@ -1,5 +1,5 @@
|
||||||
use crate::scrapper::NewsPost;
|
|
||||||
use log::error;
|
use log::error;
|
||||||
|
use post::NewsPost;
|
||||||
use redis::aio::MultiplexedConnection;
|
use redis::aio::MultiplexedConnection;
|
||||||
use redis::{AsyncCommands, RedisError};
|
use redis::{AsyncCommands, RedisError};
|
||||||
|
|
|
@ -1,29 +1,7 @@
|
||||||
use serde::{Deserialize, Serialize};
|
use post::NewsPost;
|
||||||
|
|
||||||
pub(crate) mod gfourmedia;
|
pub(crate) mod gfourmedia;
|
||||||
|
|
||||||
/// NewsPost represents a news post.
|
|
||||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
|
||||||
pub struct NewsPost {
|
|
||||||
/// A URL containing the image of the post.
|
|
||||||
pub image: Option<String>,
|
|
||||||
/// The title of the post.
|
|
||||||
pub title: Option<String>,
|
|
||||||
/// A summary of the post.
|
|
||||||
pub summary: Option<String>,
|
|
||||||
/// A link to the post.
|
|
||||||
pub link: Option<String>,
|
|
||||||
/// The author of the post.
|
|
||||||
pub author: Option<String>,
|
|
||||||
}
|
|
||||||
|
|
||||||
impl NewsPost {
|
|
||||||
/// Is complete checks if the news post contains the minimum fields.
|
|
||||||
pub fn is_complete(&self) -> bool {
|
|
||||||
self.title.is_some() && self.summary.is_some() && self.link.is_some()
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/// Represents a web scrapper which is can be scraped by the engine.
|
/// Represents a web scrapper which is can be scraped by the engine.
|
||||||
pub(crate) trait ScrappableWebPage {
|
pub(crate) trait ScrappableWebPage {
|
||||||
fn get_url(&self) -> &str;
|
fn get_url(&self) -> &str;
|
||||||
|
|
|
@ -1,5 +1,6 @@
|
||||||
use crate::scrapper::{NewsPost, ScrappableWebPage};
|
use crate::scrapper::ScrappableWebPage;
|
||||||
use anyhow::anyhow;
|
use anyhow::anyhow;
|
||||||
|
use post::NewsPost;
|
||||||
use scraper::{Html, Selector};
|
use scraper::{Html, Selector};
|
||||||
|
|
||||||
#[derive(Debug)]
|
#[derive(Debug)]
|
||||||
|
|
Loading…
Reference in a new issue