From 97697aa4131a8665499bbde6499c9ee9939ed711 Mon Sep 17 00:00:00 2001 From: Felix Ableitner Date: Fri, 27 Oct 2023 16:42:43 +0200 Subject: [PATCH] store post url content type in db --- crates/api_common/Cargo.toml | 2 +- crates/api_common/src/request.rs | 16 +++++++++++++-- crates/api_crud/src/post/create.rs | 13 ++++-------- crates/api_crud/src/post/update.rs | 15 ++++---------- crates/apub/src/objects/post.rs | 20 +++++-------------- crates/db_schema/src/impls/post.rs | 1 + crates/db_schema/src/schema.rs | 1 + crates/db_schema/src/source/post.rs | 3 +++ crates/db_views/src/comment_view.rs | 1 + crates/db_views/src/post_view.rs | 1 + .../down.sql | 3 +++ .../up.sql | 3 +++ 12 files changed, 41 insertions(+), 38 deletions(-) create mode 100644 migrations/2023-10-27-142514_post_url_content_type/down.sql create mode 100644 migrations/2023-10-27-142514_post_url_content_type/up.sql diff --git a/crates/api_common/Cargo.toml b/crates/api_common/Cargo.toml index 16a0e6bc6..9912b294a 100644 --- a/crates/api_common/Cargo.toml +++ b/crates/api_common/Cargo.toml @@ -34,7 +34,7 @@ full = [ "futures", "once_cell", "jsonwebtoken", - "mime" + "mime", ] [dependencies] diff --git a/crates/api_common/src/request.rs b/crates/api_common/src/request.rs index 70912ba48..1fe9a9a98 100644 --- a/crates/api_common/src/request.rs +++ b/crates/api_common/src/request.rs @@ -29,8 +29,6 @@ pub fn client_builder(settings: &Settings) -> ClientBuilder { } /// Fetches metadata for the given link and optionally generates thumbnail. -/// -/// TODO: consider caching the results as it will be called by api then apub send right after each other #[tracing::instrument(skip_all)] pub async fn fetch_link_metadata( url: &Url, @@ -68,6 +66,20 @@ pub async fn fetch_link_metadata( Ok(metadata) } +#[tracing::instrument(skip_all)] +pub async fn fetch_link_metadata_opt( + url: Option<&Url>, + generate_thumbnail: bool, + context: &LemmyContext, +) -> Result { + let metadata = match &url { + Some(url) => fetch_link_metadata(url, generate_thumbnail, context) + .await + .unwrap_or_default(), + _ => Default::default(), + }; + Ok(metadata) +} /// Extract site metadata from HTML Opengraph attributes. fn extract_opengraph_data(html_bytes: &[u8], url: &Url) -> Result { diff --git a/crates/api_crud/src/post/create.rs b/crates/api_crud/src/post/create.rs index 211e3b55d..344d54641 100644 --- a/crates/api_crud/src/post/create.rs +++ b/crates/api_crud/src/post/create.rs @@ -4,7 +4,7 @@ use lemmy_api_common::{ build_response::build_post_response, context::LemmyContext, post::{CreatePost, PostResponse}, - request::fetch_link_metadata, + request::fetch_link_metadata_opt, send_activity::{ActivityChannel, SendActivityData}, utils::{ check_community_user_action, @@ -54,7 +54,7 @@ pub async fn create_post( honeypot_check(&data.honeypot)?; let data_url = data.url.as_ref(); - let url = data_url.map(clean_url_params).map(Into::into); // TODO no good way to handle a "clear" + let url = data_url.map(clean_url_params); // TODO no good way to handle a "clear" is_valid_post_title(&data.name)?; is_valid_body_field(&body, true)?; @@ -83,12 +83,7 @@ pub async fn create_post( } // Fetch post links and pictrs cached image - let metadata = match data_url { - Some(url) => fetch_link_metadata(url, true, &context) - .await - .unwrap_or_default(), - _ => Default::default(), - }; + let metadata = fetch_link_metadata_opt(url.as_ref(), true, &context).await?; // Only need to check if language is allowed in case user set it explicitly. When using default // language, it already only returns allowed languages. @@ -114,7 +109,7 @@ pub async fn create_post( let post_form = PostInsertForm::builder() .name(data.name.trim().to_string()) - .url(url) + .url(url.map(Into::into)) .body(body) .community_id(data.community_id) .creator_id(local_user_view.person.id) diff --git a/crates/api_crud/src/post/update.rs b/crates/api_crud/src/post/update.rs index b0d1c6c43..b0f41951d 100644 --- a/crates/api_crud/src/post/update.rs +++ b/crates/api_crud/src/post/update.rs @@ -4,7 +4,7 @@ use lemmy_api_common::{ build_response::build_post_response, context::LemmyContext, post::{EditPost, PostResponse}, - request::fetch_link_metadata, + request::fetch_link_metadata_opt, send_activity::{ActivityChannel, SendActivityData}, utils::{check_community_user_action, local_site_to_slur_regex, process_markdown_opt}, }; @@ -35,11 +35,9 @@ pub async fn update_post( ) -> Result, LemmyError> { let local_site = LocalSite::read(&mut context.pool()).await?; - let data_url = data.url.as_ref(); - // TODO No good way to handle a clear. // Issue link: https://github.com/LemmyNet/lemmy/issues/2287 - let url = Some(data_url.map(clean_url_params).map(Into::into)); + let url = data.url.as_ref().map(clean_url_params); let slur_regex = local_site_to_slur_regex(&local_site); check_slurs_opt(&data.name, &slur_regex)?; @@ -68,12 +66,7 @@ pub async fn update_post( } // Fetch post links and Pictrs cached image - let metadata = match data_url { - Some(url) => fetch_link_metadata(url, true, &context) - .await - .unwrap_or_default(), - _ => Default::default(), - }; + let metadata = fetch_link_metadata_opt(url.as_ref(), true, &context).await?; let language_id = data.language_id; CommunityLanguage::is_allowed_community_language( @@ -85,7 +78,7 @@ pub async fn update_post( let post_form = PostUpdateForm { name: data.name.clone(), - url, + url: Some(url.map(Into::into)), body: diesel_option_overwrite(body), nsfw: data.nsfw, embed_title: Some(metadata.title), diff --git a/crates/apub/src/objects/post.rs b/crates/apub/src/objects/post.rs index d87eaad9f..204d49021 100644 --- a/crates/apub/src/objects/post.rs +++ b/crates/apub/src/objects/post.rs @@ -24,7 +24,7 @@ use chrono::{DateTime, Utc}; use html2text::{from_read_with_decorator, render::text_renderer::TrivialDecorator}; use lemmy_api_common::{ context::LemmyContext, - request::fetch_link_metadata, + request::fetch_link_metadata_opt, utils::{ is_mod_or_admin, local_site_opt_to_sensitive, @@ -112,16 +112,10 @@ impl Object for ApubPost { let community = Community::read(&mut context.pool(), community_id).await?; let language = LanguageTag::new_single(self.language_id, &mut context.pool()).await?; - let metadata = match &self.url { - Some(url) => fetch_link_metadata(url, false, &context) - .await - .unwrap_or_default(), - _ => Default::default(), - }; let attachment = self .url .clone() - .map(|url| Attachment::new(url, metadata.content_type)) + .map(|url| Attachment::new(url, self.url_content_type.clone())) .into_iter() .collect(); @@ -224,17 +218,12 @@ impl Object for ApubPost { let local_site = LocalSite::read(&mut context.pool()).await.ok(); let allow_sensitive = local_site_opt_to_sensitive(&local_site); let page_is_sensitive = page.sensitive.unwrap_or(false); - let include_image = allow_sensitive || !page_is_sensitive; + let generate_thumbnail = allow_sensitive || !page_is_sensitive; // Only fetch metadata if the post has a url and was not seen previously. We dont want to // waste resources by fetching metadata for the same post multiple times. // Additionally, only fetch image if content is not sensitive or is allowed on local site. - let metadata = match &url { - Some(url) => fetch_link_metadata(url, include_image, context) - .await - .unwrap_or_default(), - _ => Default::default(), - }; + let metadata = fetch_link_metadata_opt(url.as_ref(), generate_thumbnail, context).await?; let slur_regex = &local_site_opt_to_slur_regex(&local_site); let body = read_from_string_or_source_opt(&page.content, &page.media_type, &page.source); @@ -263,6 +252,7 @@ impl Object for ApubPost { language_id, featured_community: None, featured_local: None, + url_content_type: metadata.content_type, } } else { // if is mod action, only update locked/stickied fields, nothing else diff --git a/crates/db_schema/src/impls/post.rs b/crates/db_schema/src/impls/post.rs index 4a719415a..f8dab1b3e 100644 --- a/crates/db_schema/src/impls/post.rs +++ b/crates/db_schema/src/impls/post.rs @@ -432,6 +432,7 @@ mod tests { language_id: Default::default(), featured_community: false, featured_local: false, + url_content_type: None, }; // Post Like diff --git a/crates/db_schema/src/schema.rs b/crates/db_schema/src/schema.rs index bf16d6efd..ccbfc78b5 100644 --- a/crates/db_schema/src/schema.rs +++ b/crates/db_schema/src/schema.rs @@ -712,6 +712,7 @@ diesel::table! { language_id -> Int4, featured_community -> Bool, featured_local -> Bool, + url_content_type -> Nullable, } } diff --git a/crates/db_schema/src/source/post.rs b/crates/db_schema/src/source/post.rs index 72c32d4af..f36335b9c 100644 --- a/crates/db_schema/src/source/post.rs +++ b/crates/db_schema/src/source/post.rs @@ -54,6 +54,7 @@ pub struct Post { pub featured_community: bool, /// Whether the post is featured to its site. pub featured_local: bool, + pub url_content_type: Option, } #[derive(Debug, Clone, TypedBuilder)] @@ -84,6 +85,7 @@ pub struct PostInsertForm { pub language_id: Option, pub featured_community: Option, pub featured_local: Option, + pub url_content_type: Option, } #[derive(Debug, Clone, Default)] @@ -108,6 +110,7 @@ pub struct PostUpdateForm { pub language_id: Option, pub featured_community: Option, pub featured_local: Option, + pub url_content_type: Option, } #[derive(PartialEq, Eq, Debug)] diff --git a/crates/db_views/src/comment_view.rs b/crates/db_views/src/comment_view.rs index 99f046cdf..b6d79567e 100644 --- a/crates/db_views/src/comment_view.rs +++ b/crates/db_views/src/comment_view.rs @@ -919,6 +919,7 @@ mod tests { language_id: Default::default(), featured_community: false, featured_local: false, + url_content_type: None, }, community: Community { id: data.inserted_community.id, diff --git a/crates/db_views/src/post_view.rs b/crates/db_views/src/post_view.rs index 51213abb2..fc74ba23b 100644 --- a/crates/db_views/src/post_view.rs +++ b/crates/db_views/src/post_view.rs @@ -1407,6 +1407,7 @@ mod tests { language_id: LanguageId(47), featured_community: false, featured_local: false, + url_content_type: None, }, my_vote: None, unread_comments: 0, diff --git a/migrations/2023-10-27-142514_post_url_content_type/down.sql b/migrations/2023-10-27-142514_post_url_content_type/down.sql new file mode 100644 index 000000000..df7e2c3bb --- /dev/null +++ b/migrations/2023-10-27-142514_post_url_content_type/down.sql @@ -0,0 +1,3 @@ +ALTER TABLE post + DROP COLUMN url_content_type; + diff --git a/migrations/2023-10-27-142514_post_url_content_type/up.sql b/migrations/2023-10-27-142514_post_url_content_type/up.sql new file mode 100644 index 000000000..ce78109f0 --- /dev/null +++ b/migrations/2023-10-27-142514_post_url_content_type/up.sql @@ -0,0 +1,3 @@ +ALTER TABLE post + ADD COLUMN url_content_type text; +