Untangle thumbnail generation logic (ref #4604) (#4615)

* Untangle thumbnail generation logic (ref #4604)

* prettier

* test cleanup

* fix tests

* also consider opengraph image for local thumbnail generation
pull/4638/head
Nutomic 2024-04-17 16:36:45 +02:00 committed by GitHub
parent b0370ae2fd
commit 31829b6c05
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
8 changed files with 65 additions and 120 deletions

View File

@ -27,9 +27,10 @@ import {
setupLogins,
waitForPost,
unfollows,
editPostThumbnail,
getPost,
waitUntil,
randomString,
createPostWithThumbnail,
} from "./shared";
const downloadFileSync = require("download-file-sync");
@ -269,10 +270,11 @@ test("Make regular post, and give it a custom thumbnail", async () => {
// Use wikipedia since it has an opengraph image
const wikipediaUrl = "https://wikipedia.org/";
let post = await createPost(
let post = await createPostWithThumbnail(
alphaImage,
community.community_view.community.id,
wikipediaUrl,
upload1.url!,
);
// Wait for the metadata to get fetched, since this is backgrounded now
@ -281,17 +283,7 @@ test("Make regular post, and give it a custom thumbnail", async () => {
p => p.post_view.post.thumbnail_url != undefined,
);
expect(post.post_view.post.url).toBe(wikipediaUrl);
expect(post.post_view.post.thumbnail_url).toBeDefined();
// Edit the thumbnail
await editPostThumbnail(alphaImage, post.post_view.post, upload1.url!);
post = await waitUntil(
() => getPost(alphaImage, post.post_view.post.id),
p => p.post_view.post.thumbnail_url == upload1.url,
);
// Make sure the thumbnail got edited.
// Make sure it uses custom thumbnail
expect(post.post_view.post.thumbnail_url).toBe(upload1.url);
});
@ -308,23 +300,17 @@ test("Create an image post, and make sure a custom thumbnail doesn't overwrite i
const community = await createCommunity(alphaImage);
let post = await createPost(
let post = await createPostWithThumbnail(
alphaImage,
community.community_view.community.id,
upload1.url,
upload1.url!,
upload2.url!,
);
expect(post.post_view.post.url).toBe(upload1.url);
// Edit the post
await editPostThumbnail(alphaImage, post.post_view.post, upload2.url!);
// Wait for the metadata to get fetched
post = await waitUntil(
() => getPost(alphaImage, post.post_view.post.id),
p => p.post_view.post.thumbnail_url == upload1.url,
p => p.post_view.post.thumbnail_url != undefined,
);
// Make sure the new custom thumbnail is ignored, and doesn't overwrite the image post
expect(post.post_view.post.url).toBe(upload1.url);
expect(post.post_view.post.thumbnail_url).toBe(upload1.url);
// Make sure the custom thumbnail is ignored
expect(post.post_view.post.thumbnail_url == upload2.url).toBe(false);
});

View File

@ -203,6 +203,7 @@ export async function createPost(
// use example.com for consistent title and embed description
name: string = randomString(5),
alt_text = randomString(10),
custom_thumbnail: string | undefined = undefined,
): Promise<PostResponse> {
let form: CreatePost = {
name,
@ -210,6 +211,7 @@ export async function createPost(
body,
alt_text,
community_id,
custom_thumbnail,
};
return api.createPost(form);
}
@ -226,16 +228,19 @@ export async function editPost(
return api.editPost(form);
}
export async function editPostThumbnail(
export async function createPostWithThumbnail(
api: LemmyHttp,
post: Post,
customThumbnail: string,
community_id: number,
url: string,
custom_thumbnail: string,
): Promise<PostResponse> {
let form: EditPost = {
post_id: post.id,
custom_thumbnail: customThumbnail,
let form: CreatePost = {
name: randomString(10),
url,
community_id,
custom_thumbnail,
};
return api.editPost(form);
return api.createPost(form);
}
export async function deletePost(

View File

@ -11,7 +11,7 @@ pub async fn get_link_metadata(
data: Query<GetSiteMetadata>,
context: Data<LemmyContext>,
) -> LemmyResult<Json<GetSiteMetadataResponse>> {
let metadata = fetch_link_metadata(&data.url, false, &context).await?;
let metadata = fetch_link_metadata(&data.url, &context).await?;
Ok(Json(GetSiteMetadataResponse { metadata }))
}

View File

@ -270,8 +270,6 @@ pub struct LinkMetadata {
#[serde(flatten)]
pub opengraph_data: OpenGraphData,
pub content_type: Option<String>,
#[serde(skip)]
pub thumbnail: Option<DbUrl>,
}
#[skip_serializing_none]

View File

@ -42,11 +42,7 @@ pub fn client_builder(settings: &Settings) -> ClientBuilder {
/// Fetches metadata for the given link and optionally generates thumbnail.
#[tracing::instrument(skip_all)]
pub async fn fetch_link_metadata(
url: &Url,
generate_thumbnail: bool,
context: &LemmyContext,
) -> LemmyResult<LinkMetadata> {
pub async fn fetch_link_metadata(url: &Url, context: &LemmyContext) -> LemmyResult<LinkMetadata> {
info!("Fetching site metadata for url: {}", url);
let response = context.client().get(url.as_str()).send().await?;
@ -63,71 +59,61 @@ pub async fn fetch_link_metadata(
let opengraph_data = extract_opengraph_data(&html_bytes, url)
.map_err(|e| info!("{e}"))
.unwrap_or_default();
let thumbnail =
extract_thumbnail_from_opengraph_data(url, &opengraph_data, generate_thumbnail, context).await;
Ok(LinkMetadata {
opengraph_data,
content_type: content_type.map(|c| c.to_string()),
thumbnail,
})
}
#[tracing::instrument(skip_all)]
pub async fn fetch_link_metadata_opt(
url: Option<&Url>,
generate_thumbnail: bool,
context: &LemmyContext,
) -> LinkMetadata {
match &url {
Some(url) => fetch_link_metadata(url, generate_thumbnail, context)
.await
.unwrap_or_default(),
_ => Default::default(),
}
}
/// Generate post thumbnail in background task, because some sites can be very slow to respond.
///
/// Takes a callback to generate a send activity task, so that post can be federated with metadata.
///
/// TODO: `federated_thumbnail` param can be removed once we federate full metadata and can
/// write it to db directly, without calling this function.
/// https://github.com/LemmyNet/lemmy/issues/4598
pub fn generate_post_link_metadata(
post: Post,
custom_thumbnail: Option<Url>,
federated_thumbnail: Option<Url>,
send_activity: impl FnOnce(Post) -> Option<SendActivityData> + Send + 'static,
local_site: Option<LocalSite>,
context: Data<LemmyContext>,
) {
spawn_try_task(async move {
// Decide if the thumbnail should be generated
let allow_sensitive = local_site_opt_to_sensitive(&local_site);
let page_is_sensitive = post.nsfw;
let allow_generate_thumbnail = allow_sensitive || !page_is_sensitive;
let do_generate_thumbnail =
allow_generate_thumbnail && custom_thumbnail.is_none() && post.thumbnail_url.is_none();
// Generate local thumbnail only if no thumbnail was federated and 'sensitive' attributes allow it.
let metadata = fetch_link_metadata_opt(
post.url.as_ref().map(DbUrl::inner),
do_generate_thumbnail,
&context,
)
.await;
// If its an image post, it needs to overwrite the thumbnail, and take precedence
let image_url = if metadata
.content_type
.as_ref()
.is_some_and(|content_type| content_type.starts_with("image"))
{
post.url.map(Into::into)
} else {
None
let metadata = match &post.url {
Some(url) => fetch_link_metadata(url, &context).await.unwrap_or_default(),
_ => Default::default(),
};
// Build the thumbnail url based on either the post image url, custom thumbnail, metadata fetch, or existing thumbnail.
let thumbnail_url = image_url
.or(custom_thumbnail)
.or(metadata.thumbnail.map(Into::into))
.or(post.thumbnail_url.map(Into::into));
let is_image_post = metadata
.content_type
.as_ref()
.is_some_and(|content_type| content_type.starts_with("image"));
// Decide if we are allowed to generate local thumbnail
let allow_sensitive = local_site_opt_to_sensitive(&local_site);
let allow_generate_thumbnail = allow_sensitive || !post.nsfw;
// Use custom thumbnail if available and its not an image post
let thumbnail_url = if !is_image_post && custom_thumbnail.is_some() {
custom_thumbnail
}
// Use federated thumbnail if available
else if federated_thumbnail.is_some() {
federated_thumbnail
}
// Generate local thumbnail if allowed
else if allow_generate_thumbnail {
match post.url.or(metadata.opengraph_data.image) {
Some(url) => generate_pictrs_thumbnail(&url, &context).await.ok(),
None => None,
}
}
// Otherwise use opengraph preview image directly
else {
metadata.opengraph_data.image.map(Into::into)
};
// Proxy the image fetch if necessary
let proxied_thumbnail_url = proxy_image_link_opt_apub(thumbnail_url, &context).await?;
@ -213,28 +199,6 @@ fn extract_opengraph_data(html_bytes: &[u8], url: &Url) -> LemmyResult<OpenGraph
})
}
#[tracing::instrument(skip_all)]
pub async fn extract_thumbnail_from_opengraph_data(
url: &Url,
opengraph_data: &OpenGraphData,
generate_thumbnail: bool,
context: &LemmyContext,
) -> Option<DbUrl> {
if generate_thumbnail {
let image_url = opengraph_data
.image
.as_ref()
.map(DbUrl::inner)
.unwrap_or(url);
generate_pictrs_thumbnail(image_url, context)
.await
.ok()
.map(Into::into)
} else {
opengraph_data.image.clone()
}
}
#[derive(Deserialize, Debug)]
struct PictrsResponse {
files: Vec<PictrsFile>,
@ -414,9 +378,7 @@ mod tests {
async fn test_link_metadata() {
let context = LemmyContext::init_test_context().await;
let sample_url = Url::parse("https://gitlab.com/IzzyOnDroid/repo/-/wikis/FAQ").unwrap();
let sample_res = fetch_link_metadata(&sample_url, false, &context)
.await
.unwrap();
let sample_res = fetch_link_metadata(&sample_url, &context).await.unwrap();
assert_eq!(
Some("FAQ · Wiki · IzzyOnDroid / repo · GitLab".to_string()),
sample_res.opengraph_data.title
@ -438,17 +400,8 @@ mod tests {
Some(mime::TEXT_HTML_UTF_8.to_string()),
sample_res.content_type
);
assert!(sample_res.thumbnail.is_some());
}
// #[test]
// fn test_pictshare() {
// let res = fetch_pictshare("https://upload.wikimedia.org/wikipedia/en/2/27/The_Mandalorian_logo.jpg");
// assert!(res.is_ok());
// let res_other = fetch_pictshare("https://upload.wikimedia.org/wikipedia/en/2/27/The_Mandalorian_logo.jpgaoeu");
// assert!(res_other.is_err());
// }
#[test]
fn test_resolve_image_url() {
// url that lists the opengraph fields

View File

@ -159,6 +159,7 @@ pub async fn create_post(
generate_post_link_metadata(
updated_post.clone(),
custom_thumbnail,
None,
|post| Some(SendActivityData::CreatePost(post)),
Some(local_site),
context.reset_request_count(),

View File

@ -118,6 +118,7 @@ pub async fn update_post(
generate_post_link_metadata(
updated_post.clone(),
custom_thumbnail,
None,
|post| Some(SendActivityData::UpdatePost(post)),
Some(local_site),
context.reset_request_count(),

View File

@ -280,6 +280,7 @@ impl Object for ApubPost {
generate_post_link_metadata(
post.clone(),
None,
page.image.map(|i| i.url),
|_| None,
local_site,