cleanup request.rs file

cleanup-request-rs
Felix Ableitner 2023-10-27 12:33:03 +02:00
parent 95025adaca
commit bf205399e5
9 changed files with 100 additions and 125 deletions

3
Cargo.lock generated
View File

@ -2592,12 +2592,12 @@ dependencies = [
"lemmy_db_views_moderator", "lemmy_db_views_moderator",
"lemmy_utils", "lemmy_utils",
"once_cell", "once_cell",
"percent-encoding",
"regex", "regex",
"reqwest", "reqwest",
"reqwest-middleware", "reqwest-middleware",
"rosetta-i18n", "rosetta-i18n",
"serde", "serde",
"serde_json",
"serde_with", "serde_with",
"serial_test", "serial_test",
"task-local-extensions", "task-local-extensions",
@ -2876,7 +2876,6 @@ dependencies = [
"markdown-it", "markdown-it",
"once_cell", "once_cell",
"openssl", "openssl",
"percent-encoding",
"regex", "regex",
"reqwest", "reqwest",
"reqwest-middleware", "reqwest-middleware",

View File

@ -119,7 +119,6 @@ strum_macros = "0.25.3"
itertools = "0.11.0" itertools = "0.11.0"
futures = "0.3.28" futures = "0.3.28"
http = "0.2.9" http = "0.2.9"
percent-encoding = "2.3.0"
rosetta-i18n = "0.1.3" rosetta-i18n = "0.1.3"
opentelemetry = { version = "0.19.0", features = ["rt-tokio"] } opentelemetry = { version = "0.19.0", features = ["rt-tokio"] }
tracing-opentelemetry = { version = "0.19.0" } tracing-opentelemetry = { version = "0.19.0" }

View File

@ -23,7 +23,6 @@ full = [
"lemmy_db_views_actor/full", "lemmy_db_views_actor/full",
"lemmy_db_views_moderator/full", "lemmy_db_views_moderator/full",
"activitypub_federation", "activitypub_federation",
"percent-encoding",
"encoding", "encoding",
"reqwest-middleware", "reqwest-middleware",
"webpage", "webpage",
@ -52,7 +51,6 @@ tracing = { workspace = true, optional = true }
reqwest-middleware = { workspace = true, optional = true } reqwest-middleware = { workspace = true, optional = true }
regex = { workspace = true } regex = { workspace = true }
rosetta-i18n = { workspace = true, optional = true } rosetta-i18n = { workspace = true, optional = true }
percent-encoding = { workspace = true, optional = true }
anyhow = { workspace = true } anyhow = { workspace = true }
futures = { workspace = true, optional = true } futures = { workspace = true, optional = true }
uuid = { workspace = true, optional = true } uuid = { workspace = true, optional = true }
@ -76,3 +74,4 @@ task-local-extensions = "0.1.4"
[dev-dependencies] [dev-dependencies]
serial_test = { workspace = true } serial_test = { workspace = true }
reqwest-middleware = { workspace = true } reqwest-middleware = { workspace = true }
serde_json = { workspace = true }

View File

@ -7,14 +7,41 @@ use lemmy_utils::{
version::VERSION, version::VERSION,
REQWEST_TIMEOUT, REQWEST_TIMEOUT,
}; };
use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC};
use reqwest::{Client, ClientBuilder}; use reqwest::{Client, ClientBuilder};
use reqwest_middleware::ClientWithMiddleware; use reqwest_middleware::ClientWithMiddleware;
use serde::Deserialize; use serde::{de::DeserializeOwned, Deserialize, Deserializer};
use tracing::info; use tracing::info;
use url::Url; use url::Url;
use urlencoding::encode;
use webpage::HTML; use webpage::HTML;
/// Both are options, since the URL might be either an html page, or an image
/// Returns the SiteMetadata, and an image URL, if there is a picture associated
#[tracing::instrument(skip_all)]
pub async fn fetch_site_data(
url: Option<&Url>,
include_image: bool,
context: &LemmyContext,
) -> (Option<SiteMetadata>, Option<DbUrl>) {
match &url {
Some(url) => {
// Fetch metadata
// Ignore errors, since it may be an image, or not have the data.
// Warning, this may ignore SSL errors
let metadata_option = fetch_site_metadata(context.client(), url).await.ok();
if !include_image {
(metadata_option, None)
} else {
let thumbnail_url = fetch_pictrs_url_from_site_metadata(&metadata_option, url, &context)
.await
.ok();
(metadata_option, thumbnail_url)
}
}
None => (None, None),
}
}
/// Fetches the post link html tags (like title, description, image, etc) /// Fetches the post link html tags (like title, description, image, etc)
#[tracing::instrument(skip_all)] #[tracing::instrument(skip_all)]
pub async fn fetch_site_metadata( pub async fn fetch_site_metadata(
@ -33,6 +60,19 @@ pub async fn fetch_site_metadata(
Ok(tags) Ok(tags)
} }
pub fn client_builder(settings: &Settings) -> ClientBuilder {
let user_agent = format!(
"Lemmy/{}; +{}",
VERSION,
settings.get_protocol_and_hostname()
);
Client::builder()
.user_agent(user_agent.clone())
.timeout(REQWEST_TIMEOUT)
.connect_timeout(REQWEST_TIMEOUT)
}
fn html_to_site_metadata(html_bytes: &[u8], url: &Url) -> Result<SiteMetadata, LemmyError> { fn html_to_site_metadata(html_bytes: &[u8], url: &Url) -> Result<SiteMetadata, LemmyError> {
let html = String::from_utf8_lossy(html_bytes); let html = String::from_utf8_lossy(html_bytes);
@ -97,59 +137,24 @@ fn html_to_site_metadata(html_bytes: &[u8], url: &Url) -> Result<SiteMetadata, L
}) })
} }
#[derive(Deserialize, Debug, Clone)] #[derive(Deserialize, Debug)]
pub(crate) struct PictrsResponse { struct PictrsResponse {
files: Vec<PictrsFile>, files: Vec<PictrsFile>,
msg: String, msg: String,
} }
#[derive(Deserialize, Debug, Clone)] #[derive(Deserialize, Debug)]
pub(crate) struct PictrsFile { struct PictrsFile {
file: String, file: String,
#[allow(dead_code)] #[allow(dead_code)]
delete_token: String, delete_token: String,
} }
#[derive(Deserialize, Debug, Clone)] #[derive(Deserialize, Debug)]
pub(crate) struct PictrsPurgeResponse { struct PictrsPurgeResponse {
msg: String, msg: String,
} }
#[tracing::instrument(skip_all)]
pub(crate) async fn fetch_pictrs(
client: &ClientWithMiddleware,
settings: &Settings,
image_url: &Url,
) -> Result<PictrsResponse, LemmyError> {
let pictrs_config = settings.pictrs_config()?;
is_image_content_type(client, image_url).await?;
if pictrs_config.cache_remote_thumbnails {
// fetch remote non-pictrs images for persistent thumbnail link
let fetch_url = format!(
"{}image/download?url={}",
pictrs_config.url,
utf8_percent_encode(image_url.as_str(), NON_ALPHANUMERIC) // TODO this might not be needed
);
let response = client
.get(&fetch_url)
.timeout(REQWEST_TIMEOUT)
.send()
.await?;
let response: PictrsResponse = response.json().await.map_err(LemmyError::from)?;
if response.msg == "ok" {
Ok(response)
} else {
Err(LemmyErrorType::PictrsResponseError(response.msg))?
}
} else {
Err(LemmyErrorType::PictrsCachingDisabled)?
}
}
/// Purges an image from pictrs /// Purges an image from pictrs
/// Note: This should often be coerced from a Result to .ok() in order to fail softly, because: /// Note: This should often be coerced from a Result to .ok() in order to fail softly, because:
/// - It might fail due to image being not local /// - It might fail due to image being not local
@ -167,13 +172,6 @@ pub async fn purge_image_from_pictrs(
.next_back() .next_back()
.ok_or(LemmyErrorType::ImageUrlMissingLastPathSegment)?; .ok_or(LemmyErrorType::ImageUrlMissingLastPathSegment)?;
purge_image_from_pictrs_by_alias(alias, context).await
}
pub async fn purge_image_from_pictrs_by_alias(
alias: &str,
context: &LemmyContext,
) -> Result<(), LemmyError> {
let pictrs_config = context.settings().pictrs_config()?; let pictrs_config = context.settings().pictrs_config()?;
let purge_url = format!("{}internal/purge?alias={}", pictrs_config.url, alias); let purge_url = format!("{}internal/purge?alias={}", pictrs_config.url, alias);
@ -190,10 +188,9 @@ pub async fn purge_image_from_pictrs_by_alias(
let response: PictrsPurgeResponse = response.json().await.map_err(LemmyError::from)?; let response: PictrsPurgeResponse = response.json().await.map_err(LemmyError::from)?;
if response.msg == "ok" { match response.msg.as_str() {
Ok(()) "ok" => Ok(()),
} else { _ => Err(LemmyErrorType::PictrsPurgeResponseError(response.msg))?,
Err(LemmyErrorType::PictrsPurgeResponseError(response.msg))?
} }
} }
@ -217,62 +214,67 @@ pub async fn delete_image_from_pictrs(
Ok(()) Ok(())
} }
/// Both are options, since the URL might be either an html page, or an image
/// Returns the SiteMetadata, and an image URL, if there is a picture associated
#[tracing::instrument(skip_all)]
pub async fn fetch_site_data(
client: &ClientWithMiddleware,
settings: &Settings,
url: Option<&Url>,
include_image: bool,
) -> (Option<SiteMetadata>, Option<DbUrl>) {
match &url {
Some(url) => {
// Fetch metadata
// Ignore errors, since it may be an image, or not have the data.
// Warning, this may ignore SSL errors
let metadata_option = fetch_site_metadata(client, url).await.ok();
if !include_image {
(metadata_option, None)
} else {
let thumbnail_url =
fetch_pictrs_url_from_site_metadata(client, &metadata_option, settings, url)
.await
.ok();
(metadata_option, thumbnail_url)
}
}
None => (None, None),
}
}
async fn fetch_pictrs_url_from_site_metadata( async fn fetch_pictrs_url_from_site_metadata(
client: &ClientWithMiddleware,
metadata_option: &Option<SiteMetadata>, metadata_option: &Option<SiteMetadata>,
settings: &Settings,
url: &Url, url: &Url,
context: &LemmyContext,
) -> Result<DbUrl, LemmyError> { ) -> Result<DbUrl, LemmyError> {
let pictrs_res = match metadata_option { let pictrs_res = match metadata_option {
Some(metadata_res) => match &metadata_res.image { Some(metadata_res) => match &metadata_res.image {
// Metadata, with image // Metadata, with image
// Try to generate a small thumbnail if there's a full sized one from post-links // Try to generate a small thumbnail if there's a full sized one from post-links
Some(metadata_image) => fetch_pictrs(client, settings, metadata_image).await, Some(metadata_image) => fetch_pictrs(metadata_image, &context).await,
// Metadata, but no image // Metadata, but no image
None => fetch_pictrs(client, settings, url).await, None => fetch_pictrs(url, &context).await,
}, },
// No metadata, try to fetch the URL as an image // No metadata, try to fetch the URL as an image
None => fetch_pictrs(client, settings, url).await, None => fetch_pictrs(url, &context).await,
}?; }?;
Url::parse(&format!( Url::parse(&format!(
"{}/pictrs/image/{}", "{}/pictrs/image/{}",
settings.get_protocol_and_hostname(), context.settings().get_protocol_and_hostname(),
pictrs_res.files.first().expect("missing pictrs file").file pictrs_res.files.first().expect("missing pictrs file").file
)) ))
.map(Into::into) .map(Into::into)
.map_err(Into::into) .map_err(Into::into)
} }
#[tracing::instrument(skip_all)]
async fn fetch_pictrs(
image_url: &Url,
context: &LemmyContext,
) -> Result<PictrsResponse, LemmyError> {
let pictrs_config = context.settings().pictrs_config()?;
is_image_content_type(context.client(), image_url).await?;
if pictrs_config.cache_remote_thumbnails {
// fetch remote non-pictrs images for persistent thumbnail link
let fetch_url = format!(
"{}image/download?url={}",
pictrs_config.url,
encode(image_url.as_str())
);
let response = context
.client()
.get(&fetch_url)
.timeout(REQWEST_TIMEOUT)
.send()
.await?;
let response: PictrsResponse = response.json().await.map_err(LemmyError::from)?;
if response.msg == "ok" {
Ok(response)
} else {
Err(LemmyErrorType::PictrsResponseError(response.msg))?
}
} else {
Err(LemmyErrorType::PictrsCachingDisabled)?
}
}
#[tracing::instrument(skip_all)] #[tracing::instrument(skip_all)]
async fn is_image_content_type(client: &ClientWithMiddleware, url: &Url) -> Result<(), LemmyError> { async fn is_image_content_type(client: &ClientWithMiddleware, url: &Url) -> Result<(), LemmyError> {
let response = client.get(url.as_str()).send().await?; let response = client.get(url.as_str()).send().await?;
@ -289,19 +291,6 @@ async fn is_image_content_type(client: &ClientWithMiddleware, url: &Url) -> Resu
} }
} }
pub fn client_builder(settings: &Settings) -> ClientBuilder {
let user_agent = format!(
"Lemmy/{}; +{}",
VERSION,
settings.get_protocol_and_hostname()
);
Client::builder()
.user_agent(user_agent.clone())
.timeout(REQWEST_TIMEOUT)
.connect_timeout(REQWEST_TIMEOUT)
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
#![allow(clippy::unwrap_used)] #![allow(clippy::unwrap_used)]

View File

@ -83,8 +83,7 @@ pub async fn create_post(
} }
// Fetch post links and pictrs cached image // Fetch post links and pictrs cached image
let (metadata_res, thumbnail_url) = let (metadata_res, thumbnail_url) = fetch_site_data(data_url, true, &context).await;
fetch_site_data(context.client(), context.settings(), data_url, true).await;
let (embed_title, embed_description, embed_video_url) = metadata_res let (embed_title, embed_description, embed_video_url) = metadata_res
.map(|u| (u.title, u.description, u.embed_video_url)) .map(|u| (u.title, u.description, u.embed_video_url))
.unwrap_or_default(); .unwrap_or_default();

View File

@ -69,8 +69,7 @@ pub async fn update_post(
// Fetch post links and Pictrs cached image // Fetch post links and Pictrs cached image
let data_url = data.url.as_ref(); let data_url = data.url.as_ref();
let (metadata_res, thumbnail_url) = let (metadata_res, thumbnail_url) = fetch_site_data(data_url, true, &context).await;
fetch_site_data(context.client(), context.settings(), data_url, true).await;
let (embed_title, embed_description, embed_video_url) = metadata_res let (embed_title, embed_description, embed_video_url) = metadata_res
.map(|u| (Some(u.title), Some(u.description), Some(u.embed_video_url))) .map(|u| (Some(u.title), Some(u.description), Some(u.embed_video_url)))
.unwrap_or_default(); .unwrap_or_default();

View File

@ -217,15 +217,7 @@ impl Object for ApubPost {
// waste resources by fetching metadata for the same post multiple times. // waste resources by fetching metadata for the same post multiple times.
// Additionally, only fetch image if content is not sensitive or is allowed on local site. // Additionally, only fetch image if content is not sensitive or is allowed on local site.
let (metadata_res, thumbnail) = match &url { let (metadata_res, thumbnail) = match &url {
Some(url) if old_post.is_err() => { Some(url) if old_post.is_err() => fetch_site_data(Some(url), include_image, &context).await,
fetch_site_data(
context.client(),
context.settings(),
Some(url),
include_image,
)
.await
}
_ => (None, None), _ => (None, None),
}; };
// If no image was included with metadata, use post image instead when available. // If no image was included with metadata, use post image instead when available.

View File

@ -38,7 +38,6 @@ doku = { workspace = true, features = ["url-2"] }
uuid = { workspace = true, features = ["serde", "v4"] } uuid = { workspace = true, features = ["serde", "v4"] }
rosetta-i18n = { workspace = true } rosetta-i18n = { workspace = true }
typed-builder = { workspace = true } typed-builder = { workspace = true }
percent-encoding = { workspace = true }
tokio = { workspace = true } tokio = { workspace = true }
urlencoding = { workspace = true } urlencoding = { workspace = true }
openssl = "0.10.57" openssl = "0.10.57"

View File

@ -6,9 +6,9 @@ use crate::{
use anyhow::{anyhow, Context}; use anyhow::{anyhow, Context};
use deser_hjson::from_str; use deser_hjson::from_str;
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC};
use regex::Regex; use regex::Regex;
use std::{env, fs, io::Error}; use std::{env, fs, io::Error};
use urlencoding::encode;
pub mod structs; pub mod structs;
@ -53,11 +53,11 @@ impl Settings {
DatabaseConnection::Parts(parts) => { DatabaseConnection::Parts(parts) => {
format!( format!(
"postgres://{}:{}@{}:{}/{}", "postgres://{}:{}@{}:{}/{}",
utf8_percent_encode(&parts.user, NON_ALPHANUMERIC), encode(&parts.user),
utf8_percent_encode(&parts.password, NON_ALPHANUMERIC), encode(&parts.password),
parts.host, parts.host,
parts.port, parts.port,
utf8_percent_encode(&parts.database, NON_ALPHANUMERIC), encode(&parts.database),
) )
} }
} }