extract posts into a separate crate
This commit is contained in:
parent
7a3e69d572
commit
303d8d303e
10 changed files with 45 additions and 29 deletions
|
@ -4,6 +4,7 @@
|
|||
<content url="file://$MODULE_DIR$">
|
||||
<sourceFolder url="file://$MODULE_DIR$/scrapper/src" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/src" isTestSource="false" />
|
||||
<sourceFolder url="file://$MODULE_DIR$/post/src" isTestSource="false" />
|
||||
<excludeFolder url="file://$MODULE_DIR$/target" />
|
||||
</content>
|
||||
<orderEntry type="inheritedJdk" />
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
[workspace]
|
||||
members = [
|
||||
"post",
|
||||
"scrapper",
|
||||
]
|
||||
resolver = "2"
|
7
post/Cargo.toml
Normal file
7
post/Cargo.toml
Normal file
|
@ -0,0 +1,7 @@
|
|||
[package]
|
||||
name = "post"
|
||||
version = "0.1.0"
|
||||
edition = "2021"
|
||||
|
||||
[dependencies]
|
||||
serde = { version = "1.0.216", features = ["derive"] }
|
3
post/readme.md
Normal file
3
post/readme.md
Normal file
|
@ -0,0 +1,3 @@
|
|||
# Post
|
||||
|
||||
The posts module holds structs related to the data.
|
23
post/src/lib.rs
Normal file
23
post/src/lib.rs
Normal file
|
@ -0,0 +1,23 @@
|
|||
use serde::{Deserialize, Serialize};
|
||||
|
||||
/// NewsPost represents a news post.
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
pub struct NewsPost {
|
||||
/// A URL containing the image of the post.
|
||||
pub image: Option<String>,
|
||||
/// The title of the post.
|
||||
pub title: Option<String>,
|
||||
/// A summary of the post.
|
||||
pub summary: Option<String>,
|
||||
/// A link to the post.
|
||||
pub link: Option<String>,
|
||||
/// The author of the post.
|
||||
pub author: Option<String>,
|
||||
}
|
||||
|
||||
impl NewsPost {
|
||||
/// Is complete checks if the news post contains the minimum fields.
|
||||
pub fn is_complete(&self) -> bool {
|
||||
self.title.is_some() && self.summary.is_some() && self.link.is_some()
|
||||
}
|
||||
}
|
|
@ -17,6 +17,7 @@ redis = { version = "0.27.6", features = ["tokio-comp"] }
|
|||
md5 = "0.7.0"
|
||||
serde = { version = "1.0.216", features = ["derive"] }
|
||||
serde_json = "1.0.134"
|
||||
post = {path = "../post"}
|
||||
|
||||
[dev-dependencies]
|
||||
rand = "0.8.5"
|
||||
|
|
|
@ -1,10 +1,11 @@
|
|||
use crate::cli::CliArgs;
|
||||
use crate::redis::RedisService;
|
||||
use crate::redis_service::RedisService;
|
||||
use crate::scrapper::gfourmedia::G4Media;
|
||||
use crate::scrapper::{NewsPost, WebScrapperEngine};
|
||||
use crate::scrapper::WebScrapperEngine;
|
||||
use clap::Parser;
|
||||
use clokwerk::{AsyncScheduler, Interval, TimeUnits};
|
||||
use log::{debug, error, info};
|
||||
use post::NewsPost;
|
||||
use std::sync::atomic::{AtomicBool, Ordering};
|
||||
use std::sync::mpsc::{Receiver, Sender};
|
||||
use std::sync::{mpsc, Arc};
|
||||
|
@ -12,7 +13,7 @@ use std::time::Duration;
|
|||
use tokio::task::JoinHandle;
|
||||
|
||||
mod cli;
|
||||
mod redis;
|
||||
mod redis_service;
|
||||
mod scrapper;
|
||||
|
||||
/// Runs the scheduler in a separated thread.
|
||||
|
|
|
@ -1,5 +1,5 @@
|
|||
use crate::scrapper::NewsPost;
|
||||
use log::error;
|
||||
use post::NewsPost;
|
||||
use redis::aio::MultiplexedConnection;
|
||||
use redis::{AsyncCommands, RedisError};
|
||||
|
|
@ -1,29 +1,7 @@
|
|||
use serde::{Deserialize, Serialize};
|
||||
use post::NewsPost;
|
||||
|
||||
pub(crate) mod gfourmedia;
|
||||
|
||||
/// NewsPost represents a news post.
|
||||
#[derive(Serialize, Deserialize, Debug, Clone)]
|
||||
pub struct NewsPost {
|
||||
/// A URL containing the image of the post.
|
||||
pub image: Option<String>,
|
||||
/// The title of the post.
|
||||
pub title: Option<String>,
|
||||
/// A summary of the post.
|
||||
pub summary: Option<String>,
|
||||
/// A link to the post.
|
||||
pub link: Option<String>,
|
||||
/// The author of the post.
|
||||
pub author: Option<String>,
|
||||
}
|
||||
|
||||
impl NewsPost {
|
||||
/// Is complete checks if the news post contains the minimum fields.
|
||||
pub fn is_complete(&self) -> bool {
|
||||
self.title.is_some() && self.summary.is_some() && self.link.is_some()
|
||||
}
|
||||
}
|
||||
|
||||
/// Represents a web scrapper which is can be scraped by the engine.
|
||||
pub(crate) trait ScrappableWebPage {
|
||||
fn get_url(&self) -> &str;
|
||||
|
|
|
@ -1,5 +1,6 @@
|
|||
use crate::scrapper::{NewsPost, ScrappableWebPage};
|
||||
use crate::scrapper::ScrappableWebPage;
|
||||
use anyhow::anyhow;
|
||||
use post::NewsPost;
|
||||
use scraper::{Html, Selector};
|
||||
|
||||
#[derive(Debug)]
|
||||
|
|
Loading…
Reference in a new issue