From bf205399e5c5c6c3aa54972abf35595108cfa4d6 Mon Sep 17 00:00:00 2001 From: Felix Ableitner Date: Fri, 27 Oct 2023 12:33:03 +0200 Subject: [PATCH] cleanup request.rs file --- Cargo.lock | 3 +- Cargo.toml | 1 - crates/api_common/Cargo.toml | 3 +- crates/api_common/src/request.rs | 193 ++++++++++++++--------------- crates/api_crud/src/post/create.rs | 3 +- crates/api_crud/src/post/update.rs | 3 +- crates/apub/src/objects/post.rs | 10 +- crates/utils/Cargo.toml | 1 - crates/utils/src/settings/mod.rs | 8 +- 9 files changed, 100 insertions(+), 125 deletions(-) diff --git a/Cargo.lock b/Cargo.lock index 50150287e..5f649541c 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2592,12 +2592,12 @@ dependencies = [ "lemmy_db_views_moderator", "lemmy_utils", "once_cell", - "percent-encoding", "regex", "reqwest", "reqwest-middleware", "rosetta-i18n", "serde", + "serde_json", "serde_with", "serial_test", "task-local-extensions", @@ -2876,7 +2876,6 @@ dependencies = [ "markdown-it", "once_cell", "openssl", - "percent-encoding", "regex", "reqwest", "reqwest-middleware", diff --git a/Cargo.toml b/Cargo.toml index 62c5f14f5..9b365a95f 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -119,7 +119,6 @@ strum_macros = "0.25.3" itertools = "0.11.0" futures = "0.3.28" http = "0.2.9" -percent-encoding = "2.3.0" rosetta-i18n = "0.1.3" opentelemetry = { version = "0.19.0", features = ["rt-tokio"] } tracing-opentelemetry = { version = "0.19.0" } diff --git a/crates/api_common/Cargo.toml b/crates/api_common/Cargo.toml index 53a1093aa..6a6264804 100644 --- a/crates/api_common/Cargo.toml +++ b/crates/api_common/Cargo.toml @@ -23,7 +23,6 @@ full = [ "lemmy_db_views_actor/full", "lemmy_db_views_moderator/full", "activitypub_federation", - "percent-encoding", "encoding", "reqwest-middleware", "webpage", @@ -52,7 +51,6 @@ tracing = { workspace = true, optional = true } reqwest-middleware = { workspace = true, optional = true } regex = { workspace = true } rosetta-i18n = { workspace = true, optional = true } -percent-encoding = { workspace = true, optional = true } anyhow = { workspace = true } futures = { workspace = true, optional = true } uuid = { workspace = true, optional = true } @@ -76,3 +74,4 @@ task-local-extensions = "0.1.4" [dev-dependencies] serial_test = { workspace = true } reqwest-middleware = { workspace = true } +serde_json = { workspace = true } diff --git a/crates/api_common/src/request.rs b/crates/api_common/src/request.rs index 6ed1173ce..436e34c78 100644 --- a/crates/api_common/src/request.rs +++ b/crates/api_common/src/request.rs @@ -7,14 +7,41 @@ use lemmy_utils::{ version::VERSION, REQWEST_TIMEOUT, }; -use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC}; use reqwest::{Client, ClientBuilder}; use reqwest_middleware::ClientWithMiddleware; -use serde::Deserialize; +use serde::{de::DeserializeOwned, Deserialize, Deserializer}; use tracing::info; use url::Url; +use urlencoding::encode; use webpage::HTML; +/// Both are options, since the URL might be either an html page, or an image +/// Returns the SiteMetadata, and an image URL, if there is a picture associated +#[tracing::instrument(skip_all)] +pub async fn fetch_site_data( + url: Option<&Url>, + include_image: bool, + context: &LemmyContext, +) -> (Option, Option) { + match &url { + Some(url) => { + // Fetch metadata + // Ignore errors, since it may be an image, or not have the data. + // Warning, this may ignore SSL errors + let metadata_option = fetch_site_metadata(context.client(), url).await.ok(); + if !include_image { + (metadata_option, None) + } else { + let thumbnail_url = fetch_pictrs_url_from_site_metadata(&metadata_option, url, &context) + .await + .ok(); + (metadata_option, thumbnail_url) + } + } + None => (None, None), + } +} + /// Fetches the post link html tags (like title, description, image, etc) #[tracing::instrument(skip_all)] pub async fn fetch_site_metadata( @@ -33,6 +60,19 @@ pub async fn fetch_site_metadata( Ok(tags) } +pub fn client_builder(settings: &Settings) -> ClientBuilder { + let user_agent = format!( + "Lemmy/{}; +{}", + VERSION, + settings.get_protocol_and_hostname() + ); + + Client::builder() + .user_agent(user_agent.clone()) + .timeout(REQWEST_TIMEOUT) + .connect_timeout(REQWEST_TIMEOUT) +} + fn html_to_site_metadata(html_bytes: &[u8], url: &Url) -> Result { let html = String::from_utf8_lossy(html_bytes); @@ -97,59 +137,24 @@ fn html_to_site_metadata(html_bytes: &[u8], url: &Url) -> Result, msg: String, } -#[derive(Deserialize, Debug, Clone)] -pub(crate) struct PictrsFile { +#[derive(Deserialize, Debug)] +struct PictrsFile { file: String, #[allow(dead_code)] delete_token: String, } -#[derive(Deserialize, Debug, Clone)] -pub(crate) struct PictrsPurgeResponse { +#[derive(Deserialize, Debug)] +struct PictrsPurgeResponse { msg: String, } -#[tracing::instrument(skip_all)] -pub(crate) async fn fetch_pictrs( - client: &ClientWithMiddleware, - settings: &Settings, - image_url: &Url, -) -> Result { - let pictrs_config = settings.pictrs_config()?; - is_image_content_type(client, image_url).await?; - - if pictrs_config.cache_remote_thumbnails { - // fetch remote non-pictrs images for persistent thumbnail link - let fetch_url = format!( - "{}image/download?url={}", - pictrs_config.url, - utf8_percent_encode(image_url.as_str(), NON_ALPHANUMERIC) // TODO this might not be needed - ); - - let response = client - .get(&fetch_url) - .timeout(REQWEST_TIMEOUT) - .send() - .await?; - - let response: PictrsResponse = response.json().await.map_err(LemmyError::from)?; - - if response.msg == "ok" { - Ok(response) - } else { - Err(LemmyErrorType::PictrsResponseError(response.msg))? - } - } else { - Err(LemmyErrorType::PictrsCachingDisabled)? - } -} - /// Purges an image from pictrs /// Note: This should often be coerced from a Result to .ok() in order to fail softly, because: /// - It might fail due to image being not local @@ -167,13 +172,6 @@ pub async fn purge_image_from_pictrs( .next_back() .ok_or(LemmyErrorType::ImageUrlMissingLastPathSegment)?; - purge_image_from_pictrs_by_alias(alias, context).await -} - -pub async fn purge_image_from_pictrs_by_alias( - alias: &str, - context: &LemmyContext, -) -> Result<(), LemmyError> { let pictrs_config = context.settings().pictrs_config()?; let purge_url = format!("{}internal/purge?alias={}", pictrs_config.url, alias); @@ -190,10 +188,9 @@ pub async fn purge_image_from_pictrs_by_alias( let response: PictrsPurgeResponse = response.json().await.map_err(LemmyError::from)?; - if response.msg == "ok" { - Ok(()) - } else { - Err(LemmyErrorType::PictrsPurgeResponseError(response.msg))? + match response.msg.as_str() { + "ok" => Ok(()), + _ => Err(LemmyErrorType::PictrsPurgeResponseError(response.msg))?, } } @@ -217,62 +214,67 @@ pub async fn delete_image_from_pictrs( Ok(()) } -/// Both are options, since the URL might be either an html page, or an image -/// Returns the SiteMetadata, and an image URL, if there is a picture associated -#[tracing::instrument(skip_all)] -pub async fn fetch_site_data( - client: &ClientWithMiddleware, - settings: &Settings, - url: Option<&Url>, - include_image: bool, -) -> (Option, Option) { - match &url { - Some(url) => { - // Fetch metadata - // Ignore errors, since it may be an image, or not have the data. - // Warning, this may ignore SSL errors - let metadata_option = fetch_site_metadata(client, url).await.ok(); - if !include_image { - (metadata_option, None) - } else { - let thumbnail_url = - fetch_pictrs_url_from_site_metadata(client, &metadata_option, settings, url) - .await - .ok(); - (metadata_option, thumbnail_url) - } - } - None => (None, None), - } -} - async fn fetch_pictrs_url_from_site_metadata( - client: &ClientWithMiddleware, metadata_option: &Option, - settings: &Settings, url: &Url, + context: &LemmyContext, ) -> Result { let pictrs_res = match metadata_option { Some(metadata_res) => match &metadata_res.image { // Metadata, with image // Try to generate a small thumbnail if there's a full sized one from post-links - Some(metadata_image) => fetch_pictrs(client, settings, metadata_image).await, + Some(metadata_image) => fetch_pictrs(metadata_image, &context).await, // Metadata, but no image - None => fetch_pictrs(client, settings, url).await, + None => fetch_pictrs(url, &context).await, }, // No metadata, try to fetch the URL as an image - None => fetch_pictrs(client, settings, url).await, + None => fetch_pictrs(url, &context).await, }?; Url::parse(&format!( "{}/pictrs/image/{}", - settings.get_protocol_and_hostname(), + context.settings().get_protocol_and_hostname(), pictrs_res.files.first().expect("missing pictrs file").file )) .map(Into::into) .map_err(Into::into) } +#[tracing::instrument(skip_all)] +async fn fetch_pictrs( + image_url: &Url, + context: &LemmyContext, +) -> Result { + let pictrs_config = context.settings().pictrs_config()?; + is_image_content_type(context.client(), image_url).await?; + + if pictrs_config.cache_remote_thumbnails { + // fetch remote non-pictrs images for persistent thumbnail link + let fetch_url = format!( + "{}image/download?url={}", + pictrs_config.url, + encode(image_url.as_str()) + ); + + let response = context + .client() + .get(&fetch_url) + .timeout(REQWEST_TIMEOUT) + .send() + .await?; + + let response: PictrsResponse = response.json().await.map_err(LemmyError::from)?; + + if response.msg == "ok" { + Ok(response) + } else { + Err(LemmyErrorType::PictrsResponseError(response.msg))? + } + } else { + Err(LemmyErrorType::PictrsCachingDisabled)? + } +} + #[tracing::instrument(skip_all)] async fn is_image_content_type(client: &ClientWithMiddleware, url: &Url) -> Result<(), LemmyError> { let response = client.get(url.as_str()).send().await?; @@ -289,19 +291,6 @@ async fn is_image_content_type(client: &ClientWithMiddleware, url: &Url) -> Resu } } -pub fn client_builder(settings: &Settings) -> ClientBuilder { - let user_agent = format!( - "Lemmy/{}; +{}", - VERSION, - settings.get_protocol_and_hostname() - ); - - Client::builder() - .user_agent(user_agent.clone()) - .timeout(REQWEST_TIMEOUT) - .connect_timeout(REQWEST_TIMEOUT) -} - #[cfg(test)] mod tests { #![allow(clippy::unwrap_used)] diff --git a/crates/api_crud/src/post/create.rs b/crates/api_crud/src/post/create.rs index 3ffa54685..3537738ef 100644 --- a/crates/api_crud/src/post/create.rs +++ b/crates/api_crud/src/post/create.rs @@ -83,8 +83,7 @@ pub async fn create_post( } // Fetch post links and pictrs cached image - let (metadata_res, thumbnail_url) = - fetch_site_data(context.client(), context.settings(), data_url, true).await; + let (metadata_res, thumbnail_url) = fetch_site_data(data_url, true, &context).await; let (embed_title, embed_description, embed_video_url) = metadata_res .map(|u| (u.title, u.description, u.embed_video_url)) .unwrap_or_default(); diff --git a/crates/api_crud/src/post/update.rs b/crates/api_crud/src/post/update.rs index 73a4962f9..1c47cb3f4 100644 --- a/crates/api_crud/src/post/update.rs +++ b/crates/api_crud/src/post/update.rs @@ -69,8 +69,7 @@ pub async fn update_post( // Fetch post links and Pictrs cached image let data_url = data.url.as_ref(); - let (metadata_res, thumbnail_url) = - fetch_site_data(context.client(), context.settings(), data_url, true).await; + let (metadata_res, thumbnail_url) = fetch_site_data(data_url, true, &context).await; let (embed_title, embed_description, embed_video_url) = metadata_res .map(|u| (Some(u.title), Some(u.description), Some(u.embed_video_url))) .unwrap_or_default(); diff --git a/crates/apub/src/objects/post.rs b/crates/apub/src/objects/post.rs index b8953c984..674674681 100644 --- a/crates/apub/src/objects/post.rs +++ b/crates/apub/src/objects/post.rs @@ -217,15 +217,7 @@ impl Object for ApubPost { // waste resources by fetching metadata for the same post multiple times. // Additionally, only fetch image if content is not sensitive or is allowed on local site. let (metadata_res, thumbnail) = match &url { - Some(url) if old_post.is_err() => { - fetch_site_data( - context.client(), - context.settings(), - Some(url), - include_image, - ) - .await - } + Some(url) if old_post.is_err() => fetch_site_data(Some(url), include_image, &context).await, _ => (None, None), }; // If no image was included with metadata, use post image instead when available. diff --git a/crates/utils/Cargo.toml b/crates/utils/Cargo.toml index ac094dc4d..1809318ed 100644 --- a/crates/utils/Cargo.toml +++ b/crates/utils/Cargo.toml @@ -38,7 +38,6 @@ doku = { workspace = true, features = ["url-2"] } uuid = { workspace = true, features = ["serde", "v4"] } rosetta-i18n = { workspace = true } typed-builder = { workspace = true } -percent-encoding = { workspace = true } tokio = { workspace = true } urlencoding = { workspace = true } openssl = "0.10.57" diff --git a/crates/utils/src/settings/mod.rs b/crates/utils/src/settings/mod.rs index 25aa7206d..ca320164b 100644 --- a/crates/utils/src/settings/mod.rs +++ b/crates/utils/src/settings/mod.rs @@ -6,9 +6,9 @@ use crate::{ use anyhow::{anyhow, Context}; use deser_hjson::from_str; use once_cell::sync::Lazy; -use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC}; use regex::Regex; use std::{env, fs, io::Error}; +use urlencoding::encode; pub mod structs; @@ -53,11 +53,11 @@ impl Settings { DatabaseConnection::Parts(parts) => { format!( "postgres://{}:{}@{}:{}/{}", - utf8_percent_encode(&parts.user, NON_ALPHANUMERIC), - utf8_percent_encode(&parts.password, NON_ALPHANUMERIC), + encode(&parts.user), + encode(&parts.password), parts.host, parts.port, - utf8_percent_encode(&parts.database, NON_ALPHANUMERIC), + encode(&parts.database), ) } }