mirror of https://github.com/LemmyNet/lemmy.git
more cleanup (fixes #2611)
parent
bf205399e5
commit
c8c355dbe2
|
@ -2591,6 +2591,7 @@ dependencies = [
|
|||
"lemmy_db_views_actor",
|
||||
"lemmy_db_views_moderator",
|
||||
"lemmy_utils",
|
||||
"mime",
|
||||
"once_cell",
|
||||
"regex",
|
||||
"reqwest",
|
||||
|
|
|
@ -2,7 +2,7 @@ use actix_web::web::{Data, Json};
|
|||
use lemmy_api_common::{
|
||||
context::LemmyContext,
|
||||
post::{GetSiteMetadata, GetSiteMetadataResponse},
|
||||
request::fetch_site_metadata,
|
||||
request::fetch_link_metadata,
|
||||
};
|
||||
use lemmy_utils::error::LemmyError;
|
||||
|
||||
|
@ -11,7 +11,7 @@ pub async fn get_link_metadata(
|
|||
data: Json<GetSiteMetadata>,
|
||||
context: Data<LemmyContext>,
|
||||
) -> Result<Json<GetSiteMetadataResponse>, LemmyError> {
|
||||
let metadata = fetch_site_metadata(context.client(), &data.url).await?;
|
||||
let metadata = fetch_link_metadata(&data.url, false, &context).await?;
|
||||
|
||||
Ok(Json(GetSiteMetadataResponse { metadata }))
|
||||
}
|
||||
|
|
|
@ -70,6 +70,7 @@ jsonwebtoken = { version = "8.3.0", optional = true }
|
|||
# necessary for wasmt compilation
|
||||
getrandom = { version = "0.2.10", features = ["js"] }
|
||||
task-local-extensions = "0.1.4"
|
||||
mime = "0.3.17"
|
||||
|
||||
[dev-dependencies]
|
||||
serial_test = { workspace = true }
|
||||
|
|
|
@ -238,17 +238,20 @@ pub struct GetSiteMetadata {
|
|||
#[cfg_attr(feature = "full", ts(export))]
|
||||
/// The site metadata response.
|
||||
pub struct GetSiteMetadataResponse {
|
||||
pub metadata: SiteMetadata,
|
||||
pub metadata: LinkMetadata,
|
||||
}
|
||||
|
||||
#[skip_serializing_none]
|
||||
#[derive(Debug, Deserialize, Serialize, PartialEq, Eq, Clone)]
|
||||
#[derive(Debug, Deserialize, Serialize, PartialEq, Eq, Clone, Default)]
|
||||
#[cfg_attr(feature = "full", derive(TS))]
|
||||
#[cfg_attr(feature = "full", ts(export))]
|
||||
/// Site metadata, from its opengraph tags.
|
||||
pub struct SiteMetadata {
|
||||
pub struct LinkMetadata {
|
||||
pub title: Option<String>,
|
||||
pub description: Option<String>,
|
||||
pub(crate) image: Option<DbUrl>,
|
||||
pub embed_video_url: Option<DbUrl>,
|
||||
pub content_type: Option<String>,
|
||||
#[serde(skip)]
|
||||
pub thumbnail: Option<DbUrl>,
|
||||
}
|
||||
|
|
|
@ -1,65 +1,20 @@
|
|||
use crate::{context::LemmyContext, post::SiteMetadata};
|
||||
use crate::{context::LemmyContext, post::LinkMetadata};
|
||||
use encoding::{all::encodings, DecoderTrap};
|
||||
use lemmy_db_schema::newtypes::DbUrl;
|
||||
use lemmy_utils::{
|
||||
error::{LemmyError, LemmyErrorType},
|
||||
settings::structs::Settings,
|
||||
version::VERSION,
|
||||
REQWEST_TIMEOUT,
|
||||
};
|
||||
use reqwest::{Client, ClientBuilder};
|
||||
use mime::Mime;
|
||||
use reqwest::{header::CONTENT_TYPE, Client, ClientBuilder};
|
||||
use reqwest_middleware::ClientWithMiddleware;
|
||||
use serde::{de::DeserializeOwned, Deserialize, Deserializer};
|
||||
use serde::Deserialize;
|
||||
use tracing::info;
|
||||
use url::Url;
|
||||
use urlencoding::encode;
|
||||
use webpage::HTML;
|
||||
|
||||
/// Both are options, since the URL might be either an html page, or an image
|
||||
/// Returns the SiteMetadata, and an image URL, if there is a picture associated
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub async fn fetch_site_data(
|
||||
url: Option<&Url>,
|
||||
include_image: bool,
|
||||
context: &LemmyContext,
|
||||
) -> (Option<SiteMetadata>, Option<DbUrl>) {
|
||||
match &url {
|
||||
Some(url) => {
|
||||
// Fetch metadata
|
||||
// Ignore errors, since it may be an image, or not have the data.
|
||||
// Warning, this may ignore SSL errors
|
||||
let metadata_option = fetch_site_metadata(context.client(), url).await.ok();
|
||||
if !include_image {
|
||||
(metadata_option, None)
|
||||
} else {
|
||||
let thumbnail_url = fetch_pictrs_url_from_site_metadata(&metadata_option, url, &context)
|
||||
.await
|
||||
.ok();
|
||||
(metadata_option, thumbnail_url)
|
||||
}
|
||||
}
|
||||
None => (None, None),
|
||||
}
|
||||
}
|
||||
|
||||
/// Fetches the post link html tags (like title, description, image, etc)
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub async fn fetch_site_metadata(
|
||||
client: &ClientWithMiddleware,
|
||||
url: &Url,
|
||||
) -> Result<SiteMetadata, LemmyError> {
|
||||
info!("Fetching site metadata for url: {}", url);
|
||||
let response = client.get(url.as_str()).send().await?;
|
||||
|
||||
// Can't use .text() here, because it only checks the content header, not the actual bytes
|
||||
// https://github.com/LemmyNet/lemmy/issues/1964
|
||||
let html_bytes = response.bytes().await.map_err(LemmyError::from)?.to_vec();
|
||||
|
||||
let tags = html_to_site_metadata(&html_bytes, url)?;
|
||||
|
||||
Ok(tags)
|
||||
}
|
||||
|
||||
pub fn client_builder(settings: &Settings) -> ClientBuilder {
|
||||
let user_agent = format!(
|
||||
"Lemmy/{}; +{}",
|
||||
|
@ -73,7 +28,47 @@ pub fn client_builder(settings: &Settings) -> ClientBuilder {
|
|||
.connect_timeout(REQWEST_TIMEOUT)
|
||||
}
|
||||
|
||||
fn html_to_site_metadata(html_bytes: &[u8], url: &Url) -> Result<SiteMetadata, LemmyError> {
|
||||
/// Fetches metadata for the given link and optionally generates thumbnail.
|
||||
#[tracing::instrument(skip_all)]
|
||||
pub async fn fetch_link_metadata(
|
||||
url: &Url,
|
||||
generate_thumbnail: bool,
|
||||
context: &LemmyContext,
|
||||
) -> Result<LinkMetadata, LemmyError> {
|
||||
info!("Fetching site metadata for url: {}", url);
|
||||
let response = context.client().get(url.as_str()).send().await?;
|
||||
|
||||
let content_type: Option<Mime> = response
|
||||
.headers()
|
||||
.get(CONTENT_TYPE)
|
||||
.and_then(|h| h.to_str().ok())
|
||||
.and_then(|h| h.parse().ok());
|
||||
let is_image = content_type.as_ref().unwrap_or(&mime::TEXT_PLAIN).type_() == mime::IMAGE;
|
||||
|
||||
// Can't use .text() here, because it only checks the content header, not the actual bytes
|
||||
// https://github.com/LemmyNet/lemmy/issues/1964
|
||||
let html_bytes = response.bytes().await.map_err(LemmyError::from)?.to_vec();
|
||||
|
||||
let mut metadata = extract_opengraph_data(&html_bytes, url)?;
|
||||
|
||||
metadata.content_type = content_type.map(|c| c.to_string());
|
||||
if generate_thumbnail && is_image {
|
||||
let image_url = metadata
|
||||
.image
|
||||
.as_ref()
|
||||
.map(lemmy_db_schema::newtypes::DbUrl::inner)
|
||||
.unwrap_or(url);
|
||||
metadata.thumbnail = generate_pictrs_thumbnail(image_url, context)
|
||||
.await
|
||||
.ok()
|
||||
.map(Into::into);
|
||||
}
|
||||
|
||||
Ok(metadata)
|
||||
}
|
||||
|
||||
/// Extract site metadata from HTML Opengraph attributes.
|
||||
fn extract_opengraph_data(html_bytes: &[u8], url: &Url) -> Result<LinkMetadata, LemmyError> {
|
||||
let html = String::from_utf8_lossy(html_bytes);
|
||||
|
||||
// Make sure the first line is doctype html
|
||||
|
@ -129,11 +124,13 @@ fn html_to_site_metadata(html_bytes: &[u8], url: &Url) -> Result<SiteMetadata, L
|
|||
// join also works if the target URL is absolute
|
||||
.and_then(|v| url.join(&v.url).ok());
|
||||
|
||||
Ok(SiteMetadata {
|
||||
Ok(LinkMetadata {
|
||||
title: og_title.or(page_title),
|
||||
description: og_description.or(page_description),
|
||||
image: og_image.map(Into::into),
|
||||
embed_video_url: og_embed_url.map(Into::into),
|
||||
content_type: None,
|
||||
thumbnail: None,
|
||||
})
|
||||
}
|
||||
|
||||
|
@ -214,42 +211,17 @@ pub async fn delete_image_from_pictrs(
|
|||
Ok(())
|
||||
}
|
||||
|
||||
async fn fetch_pictrs_url_from_site_metadata(
|
||||
metadata_option: &Option<SiteMetadata>,
|
||||
url: &Url,
|
||||
context: &LemmyContext,
|
||||
) -> Result<DbUrl, LemmyError> {
|
||||
let pictrs_res = match metadata_option {
|
||||
Some(metadata_res) => match &metadata_res.image {
|
||||
// Metadata, with image
|
||||
// Try to generate a small thumbnail if there's a full sized one from post-links
|
||||
Some(metadata_image) => fetch_pictrs(metadata_image, &context).await,
|
||||
// Metadata, but no image
|
||||
None => fetch_pictrs(url, &context).await,
|
||||
},
|
||||
// No metadata, try to fetch the URL as an image
|
||||
None => fetch_pictrs(url, &context).await,
|
||||
}?;
|
||||
|
||||
Url::parse(&format!(
|
||||
"{}/pictrs/image/{}",
|
||||
context.settings().get_protocol_and_hostname(),
|
||||
pictrs_res.files.first().expect("missing pictrs file").file
|
||||
))
|
||||
.map(Into::into)
|
||||
.map_err(Into::into)
|
||||
}
|
||||
|
||||
/// Retrieves the image with local pict-rs and generates a thumbnail. Returns the thumbnail url.
|
||||
#[tracing::instrument(skip_all)]
|
||||
async fn fetch_pictrs(
|
||||
async fn generate_pictrs_thumbnail(
|
||||
image_url: &Url,
|
||||
context: &LemmyContext,
|
||||
) -> Result<PictrsResponse, LemmyError> {
|
||||
) -> Result<Url, LemmyError> {
|
||||
let pictrs_config = context.settings().pictrs_config()?;
|
||||
is_image_content_type(context.client(), image_url).await?;
|
||||
|
||||
if pictrs_config.cache_remote_thumbnails {
|
||||
// fetch remote non-pictrs images for persistent thumbnail link
|
||||
// TODO: should limit size once supported by pictrs
|
||||
let fetch_url = format!(
|
||||
"{}image/download?url={}",
|
||||
pictrs_config.url,
|
||||
|
@ -263,18 +235,25 @@ async fn fetch_pictrs(
|
|||
.send()
|
||||
.await?;
|
||||
|
||||
let response: PictrsResponse = response.json().await.map_err(LemmyError::from)?;
|
||||
let response: PictrsResponse = response.json().await?;
|
||||
|
||||
if response.msg == "ok" {
|
||||
Ok(response)
|
||||
let thumbnail_url = Url::parse(&format!(
|
||||
"{}/pictrs/image/{}",
|
||||
context.settings().get_protocol_and_hostname(),
|
||||
response.files.first().expect("missing pictrs file").file
|
||||
))?;
|
||||
Ok(thumbnail_url)
|
||||
} else {
|
||||
Err(LemmyErrorType::PictrsResponseError(response.msg))?
|
||||
}
|
||||
} else {
|
||||
Err(LemmyErrorType::PictrsCachingDisabled)?
|
||||
// return the original image as "thumbnail"
|
||||
Ok(image_url.clone())
|
||||
}
|
||||
}
|
||||
|
||||
// TODO: get rid of this
|
||||
#[tracing::instrument(skip_all)]
|
||||
async fn is_image_content_type(client: &ClientWithMiddleware, url: &Url) -> Result<(), LemmyError> {
|
||||
let response = client.get(url.as_str()).send().await?;
|
||||
|
@ -296,32 +275,39 @@ mod tests {
|
|||
#![allow(clippy::unwrap_used)]
|
||||
#![allow(clippy::indexing_slicing)]
|
||||
|
||||
use crate::request::{client_builder, fetch_site_metadata, html_to_site_metadata, SiteMetadata};
|
||||
use lemmy_utils::settings::SETTINGS;
|
||||
use crate::{
|
||||
context::LemmyContext,
|
||||
request::{extract_opengraph_data, fetch_link_metadata},
|
||||
};
|
||||
use url::Url;
|
||||
|
||||
// These helped with testing
|
||||
#[tokio::test]
|
||||
async fn test_site_metadata() {
|
||||
let settings = &SETTINGS.clone();
|
||||
let client = client_builder(settings).build().unwrap().into();
|
||||
async fn test_link_metadata() {
|
||||
let context = LemmyContext::init_test_context().await;
|
||||
let sample_url = Url::parse("https://gitlab.com/IzzyOnDroid/repo/-/wikis/FAQ").unwrap();
|
||||
let sample_res = fetch_site_metadata(&client, &sample_url).await.unwrap();
|
||||
let sample_res = fetch_link_metadata(&sample_url, false, &context)
|
||||
.await
|
||||
.unwrap();
|
||||
assert_eq!(
|
||||
SiteMetadata {
|
||||
title: Some("FAQ · Wiki · IzzyOnDroid / repo · GitLab".to_string()),
|
||||
description: Some(
|
||||
"The F-Droid compatible repo at https://apt.izzysoft.de/fdroid/".to_string()
|
||||
),
|
||||
image: Some(
|
||||
Some("FAQ · Wiki · IzzyOnDroid / repo · GitLab".to_string()),
|
||||
sample_res.title
|
||||
);
|
||||
assert_eq!(
|
||||
Some("The F-Droid compatible repo at https://apt.izzysoft.de/fdroid/".to_string()),
|
||||
sample_res.description
|
||||
);
|
||||
assert_eq!(
|
||||
Some(
|
||||
Url::parse("https://gitlab.com/uploads/-/system/project/avatar/4877469/iod_logo.png")
|
||||
.unwrap()
|
||||
.into()
|
||||
),
|
||||
embed_video_url: None,
|
||||
},
|
||||
sample_res
|
||||
sample_res.image
|
||||
);
|
||||
assert_eq!(None, sample_res.embed_video_url);
|
||||
assert_eq!(None, sample_res.content_type);
|
||||
assert_eq!(None, sample_res.thumbnail);
|
||||
}
|
||||
|
||||
// #[test]
|
||||
|
@ -339,7 +325,7 @@ mod tests {
|
|||
|
||||
// root relative url
|
||||
let html_bytes = b"<!DOCTYPE html><html><head><meta property='og:image' content='/image.jpg'></head><body></body></html>";
|
||||
let metadata = html_to_site_metadata(html_bytes, &url).expect("Unable to parse metadata");
|
||||
let metadata = extract_opengraph_data(html_bytes, &url).expect("Unable to parse metadata");
|
||||
assert_eq!(
|
||||
metadata.image,
|
||||
Some(Url::parse("https://example.com/image.jpg").unwrap().into())
|
||||
|
@ -347,7 +333,7 @@ mod tests {
|
|||
|
||||
// base relative url
|
||||
let html_bytes = b"<!DOCTYPE html><html><head><meta property='og:image' content='image.jpg'></head><body></body></html>";
|
||||
let metadata = html_to_site_metadata(html_bytes, &url).expect("Unable to parse metadata");
|
||||
let metadata = extract_opengraph_data(html_bytes, &url).expect("Unable to parse metadata");
|
||||
assert_eq!(
|
||||
metadata.image,
|
||||
Some(
|
||||
|
@ -359,7 +345,7 @@ mod tests {
|
|||
|
||||
// absolute url
|
||||
let html_bytes = b"<!DOCTYPE html><html><head><meta property='og:image' content='https://cdn.host.com/image.jpg'></head><body></body></html>";
|
||||
let metadata = html_to_site_metadata(html_bytes, &url).expect("Unable to parse metadata");
|
||||
let metadata = extract_opengraph_data(html_bytes, &url).expect("Unable to parse metadata");
|
||||
assert_eq!(
|
||||
metadata.image,
|
||||
Some(Url::parse("https://cdn.host.com/image.jpg").unwrap().into())
|
||||
|
@ -367,7 +353,7 @@ mod tests {
|
|||
|
||||
// protocol relative url
|
||||
let html_bytes = b"<!DOCTYPE html><html><head><meta property='og:image' content='//example.com/image.jpg'></head><body></body></html>";
|
||||
let metadata = html_to_site_metadata(html_bytes, &url).expect("Unable to parse metadata");
|
||||
let metadata = extract_opengraph_data(html_bytes, &url).expect("Unable to parse metadata");
|
||||
assert_eq!(
|
||||
metadata.image,
|
||||
Some(Url::parse("https://example.com/image.jpg").unwrap().into())
|
||||
|
|
|
@ -4,7 +4,7 @@ use lemmy_api_common::{
|
|||
build_response::build_post_response,
|
||||
context::LemmyContext,
|
||||
post::{CreatePost, PostResponse},
|
||||
request::fetch_site_data,
|
||||
request::fetch_link_metadata,
|
||||
send_activity::{ActivityChannel, SendActivityData},
|
||||
utils::{
|
||||
check_community_user_action,
|
||||
|
@ -83,10 +83,12 @@ pub async fn create_post(
|
|||
}
|
||||
|
||||
// Fetch post links and pictrs cached image
|
||||
let (metadata_res, thumbnail_url) = fetch_site_data(data_url, true, &context).await;
|
||||
let (embed_title, embed_description, embed_video_url) = metadata_res
|
||||
.map(|u| (u.title, u.description, u.embed_video_url))
|
||||
.unwrap_or_default();
|
||||
let metadata = match data_url {
|
||||
Some(url) => fetch_link_metadata(url, true, &context)
|
||||
.await
|
||||
.unwrap_or_default(),
|
||||
_ => Default::default(),
|
||||
};
|
||||
|
||||
// Only need to check if language is allowed in case user set it explicitly. When using default
|
||||
// language, it already only returns allowed languages.
|
||||
|
@ -117,11 +119,11 @@ pub async fn create_post(
|
|||
.community_id(data.community_id)
|
||||
.creator_id(local_user_view.person.id)
|
||||
.nsfw(data.nsfw)
|
||||
.embed_title(embed_title)
|
||||
.embed_description(embed_description)
|
||||
.embed_video_url(embed_video_url)
|
||||
.embed_title(metadata.title)
|
||||
.embed_description(metadata.description)
|
||||
.embed_video_url(metadata.embed_video_url)
|
||||
.language_id(language_id)
|
||||
.thumbnail_url(thumbnail_url)
|
||||
.thumbnail_url(metadata.thumbnail)
|
||||
.build();
|
||||
|
||||
let inserted_post = Post::create(&mut context.pool(), &post_form)
|
||||
|
|
|
@ -4,7 +4,7 @@ use lemmy_api_common::{
|
|||
build_response::build_post_response,
|
||||
context::LemmyContext,
|
||||
post::{EditPost, PostResponse},
|
||||
request::fetch_site_data,
|
||||
request::fetch_link_metadata,
|
||||
send_activity::{ActivityChannel, SendActivityData},
|
||||
utils::{check_community_user_action, local_site_to_slur_regex, process_markdown_opt},
|
||||
};
|
||||
|
@ -68,11 +68,12 @@ pub async fn update_post(
|
|||
}
|
||||
|
||||
// Fetch post links and Pictrs cached image
|
||||
let data_url = data.url.as_ref();
|
||||
let (metadata_res, thumbnail_url) = fetch_site_data(data_url, true, &context).await;
|
||||
let (embed_title, embed_description, embed_video_url) = metadata_res
|
||||
.map(|u| (Some(u.title), Some(u.description), Some(u.embed_video_url)))
|
||||
.unwrap_or_default();
|
||||
let metadata = match data_url {
|
||||
Some(url) => fetch_link_metadata(url, true, &context)
|
||||
.await
|
||||
.unwrap_or_default(),
|
||||
_ => Default::default(),
|
||||
};
|
||||
|
||||
let language_id = data.language_id;
|
||||
CommunityLanguage::is_allowed_community_language(
|
||||
|
@ -87,11 +88,11 @@ pub async fn update_post(
|
|||
url,
|
||||
body: diesel_option_overwrite(body),
|
||||
nsfw: data.nsfw,
|
||||
embed_title,
|
||||
embed_description,
|
||||
embed_video_url,
|
||||
embed_title: Some(metadata.title),
|
||||
embed_description: Some(metadata.description),
|
||||
embed_video_url: Some(metadata.embed_video_url),
|
||||
language_id: data.language_id,
|
||||
thumbnail_url: Some(thumbnail_url),
|
||||
thumbnail_url: Some(metadata.thumbnail),
|
||||
updated: Some(Some(naive_now())),
|
||||
..Default::default()
|
||||
};
|
||||
|
|
|
@ -24,7 +24,7 @@ use chrono::{DateTime, Utc};
|
|||
use html2text::{from_read_with_decorator, render::text_renderer::TrivialDecorator};
|
||||
use lemmy_api_common::{
|
||||
context::LemmyContext,
|
||||
request::fetch_site_data,
|
||||
request::fetch_link_metadata,
|
||||
utils::{
|
||||
is_mod_or_admin,
|
||||
local_site_opt_to_sensitive,
|
||||
|
@ -216,16 +216,12 @@ impl Object for ApubPost {
|
|||
// Only fetch metadata if the post has a url and was not seen previously. We dont want to
|
||||
// waste resources by fetching metadata for the same post multiple times.
|
||||
// Additionally, only fetch image if content is not sensitive or is allowed on local site.
|
||||
let (metadata_res, thumbnail) = match &url {
|
||||
Some(url) if old_post.is_err() => fetch_site_data(Some(url), include_image, &context).await,
|
||||
_ => (None, None),
|
||||
let metadata = match &url {
|
||||
Some(url) => fetch_link_metadata(url, include_image, context)
|
||||
.await
|
||||
.unwrap_or_default(),
|
||||
_ => Default::default(),
|
||||
};
|
||||
// If no image was included with metadata, use post image instead when available.
|
||||
let thumbnail_url = thumbnail.or_else(|| page.image.map(|i| i.url.into()));
|
||||
|
||||
let (embed_title, embed_description, embed_video_url) = metadata_res
|
||||
.map(|u| (u.title, u.description, u.embed_video_url))
|
||||
.unwrap_or_default();
|
||||
let slur_regex = &local_site_opt_to_slur_regex(&local_site);
|
||||
|
||||
let body = read_from_string_or_source_opt(&page.content, &page.media_type, &page.source);
|
||||
|
@ -245,10 +241,10 @@ impl Object for ApubPost {
|
|||
updated: page.updated.map(Into::into),
|
||||
deleted: Some(false),
|
||||
nsfw: page.sensitive,
|
||||
embed_title,
|
||||
embed_description,
|
||||
embed_video_url,
|
||||
thumbnail_url,
|
||||
embed_title: metadata.title,
|
||||
embed_description: metadata.description,
|
||||
embed_video_url: metadata.embed_video_url,
|
||||
thumbnail_url: metadata.thumbnail,
|
||||
ap_id: Some(page.id.clone().into()),
|
||||
local: Some(false),
|
||||
language_id,
|
||||
|
|
Loading…
Reference in New Issue