cleanup request.rs file

cleanup-request-rs
Felix Ableitner 2023-10-27 12:33:03 +02:00
parent 95025adaca
commit bf205399e5
9 changed files with 100 additions and 125 deletions

3
Cargo.lock generated
View File

@ -2592,12 +2592,12 @@ dependencies = [
"lemmy_db_views_moderator",
"lemmy_utils",
"once_cell",
"percent-encoding",
"regex",
"reqwest",
"reqwest-middleware",
"rosetta-i18n",
"serde",
"serde_json",
"serde_with",
"serial_test",
"task-local-extensions",
@ -2876,7 +2876,6 @@ dependencies = [
"markdown-it",
"once_cell",
"openssl",
"percent-encoding",
"regex",
"reqwest",
"reqwest-middleware",

View File

@ -119,7 +119,6 @@ strum_macros = "0.25.3"
itertools = "0.11.0"
futures = "0.3.28"
http = "0.2.9"
percent-encoding = "2.3.0"
rosetta-i18n = "0.1.3"
opentelemetry = { version = "0.19.0", features = ["rt-tokio"] }
tracing-opentelemetry = { version = "0.19.0" }

View File

@ -23,7 +23,6 @@ full = [
"lemmy_db_views_actor/full",
"lemmy_db_views_moderator/full",
"activitypub_federation",
"percent-encoding",
"encoding",
"reqwest-middleware",
"webpage",
@ -52,7 +51,6 @@ tracing = { workspace = true, optional = true }
reqwest-middleware = { workspace = true, optional = true }
regex = { workspace = true }
rosetta-i18n = { workspace = true, optional = true }
percent-encoding = { workspace = true, optional = true }
anyhow = { workspace = true }
futures = { workspace = true, optional = true }
uuid = { workspace = true, optional = true }
@ -76,3 +74,4 @@ task-local-extensions = "0.1.4"
[dev-dependencies]
serial_test = { workspace = true }
reqwest-middleware = { workspace = true }
serde_json = { workspace = true }

View File

@ -7,14 +7,41 @@ use lemmy_utils::{
version::VERSION,
REQWEST_TIMEOUT,
};
use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC};
use reqwest::{Client, ClientBuilder};
use reqwest_middleware::ClientWithMiddleware;
use serde::Deserialize;
use serde::{de::DeserializeOwned, Deserialize, Deserializer};
use tracing::info;
use url::Url;
use urlencoding::encode;
use webpage::HTML;
/// Both are options, since the URL might be either an html page, or an image
/// Returns the SiteMetadata, and an image URL, if there is a picture associated
#[tracing::instrument(skip_all)]
pub async fn fetch_site_data(
url: Option<&Url>,
include_image: bool,
context: &LemmyContext,
) -> (Option<SiteMetadata>, Option<DbUrl>) {
match &url {
Some(url) => {
// Fetch metadata
// Ignore errors, since it may be an image, or not have the data.
// Warning, this may ignore SSL errors
let metadata_option = fetch_site_metadata(context.client(), url).await.ok();
if !include_image {
(metadata_option, None)
} else {
let thumbnail_url = fetch_pictrs_url_from_site_metadata(&metadata_option, url, &context)
.await
.ok();
(metadata_option, thumbnail_url)
}
}
None => (None, None),
}
}
/// Fetches the post link html tags (like title, description, image, etc)
#[tracing::instrument(skip_all)]
pub async fn fetch_site_metadata(
@ -33,6 +60,19 @@ pub async fn fetch_site_metadata(
Ok(tags)
}
pub fn client_builder(settings: &Settings) -> ClientBuilder {
let user_agent = format!(
"Lemmy/{}; +{}",
VERSION,
settings.get_protocol_and_hostname()
);
Client::builder()
.user_agent(user_agent.clone())
.timeout(REQWEST_TIMEOUT)
.connect_timeout(REQWEST_TIMEOUT)
}
fn html_to_site_metadata(html_bytes: &[u8], url: &Url) -> Result<SiteMetadata, LemmyError> {
let html = String::from_utf8_lossy(html_bytes);
@ -97,59 +137,24 @@ fn html_to_site_metadata(html_bytes: &[u8], url: &Url) -> Result<SiteMetadata, L
})
}
#[derive(Deserialize, Debug, Clone)]
pub(crate) struct PictrsResponse {
#[derive(Deserialize, Debug)]
struct PictrsResponse {
files: Vec<PictrsFile>,
msg: String,
}
#[derive(Deserialize, Debug, Clone)]
pub(crate) struct PictrsFile {
#[derive(Deserialize, Debug)]
struct PictrsFile {
file: String,
#[allow(dead_code)]
delete_token: String,
}
#[derive(Deserialize, Debug, Clone)]
pub(crate) struct PictrsPurgeResponse {
#[derive(Deserialize, Debug)]
struct PictrsPurgeResponse {
msg: String,
}
#[tracing::instrument(skip_all)]
pub(crate) async fn fetch_pictrs(
client: &ClientWithMiddleware,
settings: &Settings,
image_url: &Url,
) -> Result<PictrsResponse, LemmyError> {
let pictrs_config = settings.pictrs_config()?;
is_image_content_type(client, image_url).await?;
if pictrs_config.cache_remote_thumbnails {
// fetch remote non-pictrs images for persistent thumbnail link
let fetch_url = format!(
"{}image/download?url={}",
pictrs_config.url,
utf8_percent_encode(image_url.as_str(), NON_ALPHANUMERIC) // TODO this might not be needed
);
let response = client
.get(&fetch_url)
.timeout(REQWEST_TIMEOUT)
.send()
.await?;
let response: PictrsResponse = response.json().await.map_err(LemmyError::from)?;
if response.msg == "ok" {
Ok(response)
} else {
Err(LemmyErrorType::PictrsResponseError(response.msg))?
}
} else {
Err(LemmyErrorType::PictrsCachingDisabled)?
}
}
/// Purges an image from pictrs
/// Note: This should often be coerced from a Result to .ok() in order to fail softly, because:
/// - It might fail due to image being not local
@ -167,13 +172,6 @@ pub async fn purge_image_from_pictrs(
.next_back()
.ok_or(LemmyErrorType::ImageUrlMissingLastPathSegment)?;
purge_image_from_pictrs_by_alias(alias, context).await
}
pub async fn purge_image_from_pictrs_by_alias(
alias: &str,
context: &LemmyContext,
) -> Result<(), LemmyError> {
let pictrs_config = context.settings().pictrs_config()?;
let purge_url = format!("{}internal/purge?alias={}", pictrs_config.url, alias);
@ -190,10 +188,9 @@ pub async fn purge_image_from_pictrs_by_alias(
let response: PictrsPurgeResponse = response.json().await.map_err(LemmyError::from)?;
if response.msg == "ok" {
Ok(())
} else {
Err(LemmyErrorType::PictrsPurgeResponseError(response.msg))?
match response.msg.as_str() {
"ok" => Ok(()),
_ => Err(LemmyErrorType::PictrsPurgeResponseError(response.msg))?,
}
}
@ -217,62 +214,67 @@ pub async fn delete_image_from_pictrs(
Ok(())
}
/// Both are options, since the URL might be either an html page, or an image
/// Returns the SiteMetadata, and an image URL, if there is a picture associated
#[tracing::instrument(skip_all)]
pub async fn fetch_site_data(
client: &ClientWithMiddleware,
settings: &Settings,
url: Option<&Url>,
include_image: bool,
) -> (Option<SiteMetadata>, Option<DbUrl>) {
match &url {
Some(url) => {
// Fetch metadata
// Ignore errors, since it may be an image, or not have the data.
// Warning, this may ignore SSL errors
let metadata_option = fetch_site_metadata(client, url).await.ok();
if !include_image {
(metadata_option, None)
} else {
let thumbnail_url =
fetch_pictrs_url_from_site_metadata(client, &metadata_option, settings, url)
.await
.ok();
(metadata_option, thumbnail_url)
}
}
None => (None, None),
}
}
async fn fetch_pictrs_url_from_site_metadata(
client: &ClientWithMiddleware,
metadata_option: &Option<SiteMetadata>,
settings: &Settings,
url: &Url,
context: &LemmyContext,
) -> Result<DbUrl, LemmyError> {
let pictrs_res = match metadata_option {
Some(metadata_res) => match &metadata_res.image {
// Metadata, with image
// Try to generate a small thumbnail if there's a full sized one from post-links
Some(metadata_image) => fetch_pictrs(client, settings, metadata_image).await,
Some(metadata_image) => fetch_pictrs(metadata_image, &context).await,
// Metadata, but no image
None => fetch_pictrs(client, settings, url).await,
None => fetch_pictrs(url, &context).await,
},
// No metadata, try to fetch the URL as an image
None => fetch_pictrs(client, settings, url).await,
None => fetch_pictrs(url, &context).await,
}?;
Url::parse(&format!(
"{}/pictrs/image/{}",
settings.get_protocol_and_hostname(),
context.settings().get_protocol_and_hostname(),
pictrs_res.files.first().expect("missing pictrs file").file
))
.map(Into::into)
.map_err(Into::into)
}
#[tracing::instrument(skip_all)]
async fn fetch_pictrs(
image_url: &Url,
context: &LemmyContext,
) -> Result<PictrsResponse, LemmyError> {
let pictrs_config = context.settings().pictrs_config()?;
is_image_content_type(context.client(), image_url).await?;
if pictrs_config.cache_remote_thumbnails {
// fetch remote non-pictrs images for persistent thumbnail link
let fetch_url = format!(
"{}image/download?url={}",
pictrs_config.url,
encode(image_url.as_str())
);
let response = context
.client()
.get(&fetch_url)
.timeout(REQWEST_TIMEOUT)
.send()
.await?;
let response: PictrsResponse = response.json().await.map_err(LemmyError::from)?;
if response.msg == "ok" {
Ok(response)
} else {
Err(LemmyErrorType::PictrsResponseError(response.msg))?
}
} else {
Err(LemmyErrorType::PictrsCachingDisabled)?
}
}
#[tracing::instrument(skip_all)]
async fn is_image_content_type(client: &ClientWithMiddleware, url: &Url) -> Result<(), LemmyError> {
let response = client.get(url.as_str()).send().await?;
@ -289,19 +291,6 @@ async fn is_image_content_type(client: &ClientWithMiddleware, url: &Url) -> Resu
}
}
pub fn client_builder(settings: &Settings) -> ClientBuilder {
let user_agent = format!(
"Lemmy/{}; +{}",
VERSION,
settings.get_protocol_and_hostname()
);
Client::builder()
.user_agent(user_agent.clone())
.timeout(REQWEST_TIMEOUT)
.connect_timeout(REQWEST_TIMEOUT)
}
#[cfg(test)]
mod tests {
#![allow(clippy::unwrap_used)]

View File

@ -83,8 +83,7 @@ pub async fn create_post(
}
// Fetch post links and pictrs cached image
let (metadata_res, thumbnail_url) =
fetch_site_data(context.client(), context.settings(), data_url, true).await;
let (metadata_res, thumbnail_url) = fetch_site_data(data_url, true, &context).await;
let (embed_title, embed_description, embed_video_url) = metadata_res
.map(|u| (u.title, u.description, u.embed_video_url))
.unwrap_or_default();

View File

@ -69,8 +69,7 @@ pub async fn update_post(
// Fetch post links and Pictrs cached image
let data_url = data.url.as_ref();
let (metadata_res, thumbnail_url) =
fetch_site_data(context.client(), context.settings(), data_url, true).await;
let (metadata_res, thumbnail_url) = fetch_site_data(data_url, true, &context).await;
let (embed_title, embed_description, embed_video_url) = metadata_res
.map(|u| (Some(u.title), Some(u.description), Some(u.embed_video_url)))
.unwrap_or_default();

View File

@ -217,15 +217,7 @@ impl Object for ApubPost {
// waste resources by fetching metadata for the same post multiple times.
// Additionally, only fetch image if content is not sensitive or is allowed on local site.
let (metadata_res, thumbnail) = match &url {
Some(url) if old_post.is_err() => {
fetch_site_data(
context.client(),
context.settings(),
Some(url),
include_image,
)
.await
}
Some(url) if old_post.is_err() => fetch_site_data(Some(url), include_image, &context).await,
_ => (None, None),
};
// If no image was included with metadata, use post image instead when available.

View File

@ -38,7 +38,6 @@ doku = { workspace = true, features = ["url-2"] }
uuid = { workspace = true, features = ["serde", "v4"] }
rosetta-i18n = { workspace = true }
typed-builder = { workspace = true }
percent-encoding = { workspace = true }
tokio = { workspace = true }
urlencoding = { workspace = true }
openssl = "0.10.57"

View File

@ -6,9 +6,9 @@ use crate::{
use anyhow::{anyhow, Context};
use deser_hjson::from_str;
use once_cell::sync::Lazy;
use percent_encoding::{utf8_percent_encode, NON_ALPHANUMERIC};
use regex::Regex;
use std::{env, fs, io::Error};
use urlencoding::encode;
pub mod structs;
@ -53,11 +53,11 @@ impl Settings {
DatabaseConnection::Parts(parts) => {
format!(
"postgres://{}:{}@{}:{}/{}",
utf8_percent_encode(&parts.user, NON_ALPHANUMERIC),
utf8_percent_encode(&parts.password, NON_ALPHANUMERIC),
encode(&parts.user),
encode(&parts.password),
parts.host,
parts.port,
utf8_percent_encode(&parts.database, NON_ALPHANUMERIC),
encode(&parts.database),
)
}
}