diff --git a/api_tests/src/post.spec.ts b/api_tests/src/post.spec.ts index 02080c4cc..59a0557bf 100644 --- a/api_tests/src/post.spec.ts +++ b/api_tests/src/post.spec.ts @@ -18,6 +18,7 @@ import { resolveBetaCommunity, createComment, deletePost, + delay, removePost, getPost, unfollowRemotes, @@ -710,3 +711,25 @@ test("Fetch post via redirect", async () => { expect(gammaPost.post?.post.ap_id).toBe(alphaPost.post_view.post.ap_id); await unfollowRemotes(alpha); }); + +test("Block post that contains banned URL", async () => { + let editSiteForm: EditSite = { + blocked_urls: ["https://evil.com/"], + }; + + await epsilon.editSite(editSiteForm); + + await delay(500); + + if (!betaCommunity) { + throw "Missing beta community"; + } + + expect( + createPost(epsilon, betaCommunity.community.id, "https://evil.com"), + ).rejects.toStrictEqual(Error("blocked_url")); + + // Later tests need this to be empty + editSiteForm.blocked_urls = []; + await epsilon.editSite(editSiteForm); +}); diff --git a/api_tests/src/user.spec.ts b/api_tests/src/user.spec.ts index ccfc5e1fe..4846d60f7 100644 --- a/api_tests/src/user.spec.ts +++ b/api_tests/src/user.spec.ts @@ -45,7 +45,7 @@ test("Create user", async () => { if (!site.my_user) { throw "Missing site user"; } - apShortname = `@${site.my_user.local_user_view.person.name}@lemmy-alpha:8541`; + apShortname = `${site.my_user.local_user_view.person.name}@lemmy-alpha:8541`; }); test("Set some user settings, check that they are federated", async () => { @@ -68,7 +68,7 @@ test("Delete user", async () => { let user = await registerUser(alpha, alphaUrl); // make a local post and comment - let alphaCommunity = (await resolveCommunity(user, "!main@lemmy-alpha:8541")) + let alphaCommunity = (await resolveCommunity(user, "main@lemmy-alpha:8541")) .community; if (!alphaCommunity) { throw "Missing alpha community"; @@ -134,7 +134,7 @@ test("Create user with Arabic name", async () => { if (!site.my_user) { throw "Missing site user"; } - apShortname = `@${site.my_user.local_user_view.person.name}@lemmy-alpha:8541`; + apShortname = `${site.my_user.local_user_view.person.name}@lemmy-alpha:8541`; let alphaPerson = (await resolvePerson(alpha, apShortname)).person; expect(alphaPerson).toBeDefined(); diff --git a/crates/api/src/local_user/save_settings.rs b/crates/api/src/local_user/save_settings.rs index d918bdc00..927496416 100644 --- a/crates/api/src/local_user/save_settings.rs +++ b/crates/api/src/local_user/save_settings.rs @@ -3,6 +3,7 @@ use lemmy_api_common::{ context::LemmyContext, person::SaveUserSettings, utils::{ + get_url_blocklist, local_site_to_slur_regex, process_markdown_opt, proxy_image_link_opt_api, @@ -35,7 +36,10 @@ pub async fn save_user_settings( let site_view = SiteView::read_local(&mut context.pool()).await?; let slur_regex = local_site_to_slur_regex(&site_view.local_site); - let bio = diesel_option_overwrite(process_markdown_opt(&data.bio, &slur_regex, &context).await?); + let url_blocklist = get_url_blocklist(&context).await?; + let bio = diesel_option_overwrite( + process_markdown_opt(&data.bio, &slur_regex, &url_blocklist, &context).await?, + ); let avatar = proxy_image_link_opt_api(&data.avatar, &context).await?; let banner = proxy_image_link_opt_api(&data.banner, &context).await?; diff --git a/crates/api/src/site/leave_admin.rs b/crates/api/src/site/leave_admin.rs index 77e28332b..0d149d07d 100644 --- a/crates/api/src/site/leave_admin.rs +++ b/crates/api/src/site/leave_admin.rs @@ -4,6 +4,7 @@ use lemmy_db_schema::{ source::{ actor_language::SiteLanguage, language::Language, + local_site_url_blocklist::LocalSiteUrlBlocklist, local_user::{LocalUser, LocalUserUpdateForm}, moderator::{ModAdd, ModAddForm}, tagline::Tagline, @@ -62,6 +63,7 @@ pub async fn leave_admin( let taglines = Tagline::get_all(&mut context.pool(), site_view.local_site.id).await?; let custom_emojis = CustomEmojiView::get_all(&mut context.pool(), site_view.local_site.id).await?; + let blocked_urls = LocalSiteUrlBlocklist::get_all(&mut context.pool()).await?; Ok(Json(GetSiteResponse { site_view, @@ -72,5 +74,6 @@ pub async fn leave_admin( discussion_languages, taglines, custom_emojis, + blocked_urls, })) } diff --git a/crates/api_common/Cargo.toml b/crates/api_common/Cargo.toml index 3acd7d4ca..47545446f 100644 --- a/crates/api_common/Cargo.toml +++ b/crates/api_common/Cargo.toml @@ -59,6 +59,8 @@ uuid = { workspace = true, optional = true } tokio = { workspace = true, optional = true } reqwest = { workspace = true, optional = true } ts-rs = { workspace = true, optional = true } +moka.workspace = true +anyhow.workspace = true once_cell = { workspace = true, optional = true } actix-web = { workspace = true, optional = true } enum-map = { workspace = true } diff --git a/crates/api_common/src/request.rs b/crates/api_common/src/request.rs index aaae7f866..c17d96a61 100644 --- a/crates/api_common/src/request.rs +++ b/crates/api_common/src/request.rs @@ -59,14 +59,8 @@ pub async fn fetch_link_metadata( let opengraph_data = extract_opengraph_data(&html_bytes, url) .map_err(|e| info!("{e}")) .unwrap_or_default(); - let thumbnail = extract_thumbnail_from_opengraph_data( - url, - &opengraph_data, - &content_type, - generate_thumbnail, - context, - ) - .await; + let thumbnail = + extract_thumbnail_from_opengraph_data(url, &opengraph_data, generate_thumbnail, context).await; Ok(LinkMetadata { opengraph_data, @@ -158,23 +152,21 @@ fn extract_opengraph_data(html_bytes: &[u8], url: &Url) -> Result, generate_thumbnail: bool, context: &LemmyContext, ) -> Option { - let is_image = content_type.as_ref().unwrap_or(&mime::TEXT_PLAIN).type_() == mime::IMAGE; - if generate_thumbnail && is_image { + if generate_thumbnail { let image_url = opengraph_data .image .as_ref() - .map(lemmy_db_schema::newtypes::DbUrl::inner) + .map(DbUrl::inner) .unwrap_or(url); generate_pictrs_thumbnail(image_url, context) .await .ok() .map(Into::into) } else { - None + opengraph_data.image.clone() } } @@ -363,7 +355,7 @@ mod tests { Some(mime::TEXT_HTML_UTF_8.to_string()), sample_res.content_type ); - assert_eq!(None, sample_res.thumbnail); + assert!(sample_res.thumbnail.is_some()); } // #[test] diff --git a/crates/api_common/src/site.rs b/crates/api_common/src/site.rs index bed81c2e4..d87cbdaaf 100644 --- a/crates/api_common/src/site.rs +++ b/crates/api_common/src/site.rs @@ -6,6 +6,7 @@ use lemmy_db_schema::{ federation_queue_state::FederationQueueState, instance::Instance, language::Language, + local_site_url_blocklist::LocalSiteUrlBlocklist, tagline::Tagline, }, ListingType, @@ -268,6 +269,8 @@ pub struct EditSite { pub allowed_instances: Option>, /// A list of blocked instances. pub blocked_instances: Option>, + /// A list of blocked URLs + pub blocked_urls: Option>, /// A list of taglines shown at the top of the front page. pub taglines: Option>, pub registration_mode: Option, @@ -305,6 +308,7 @@ pub struct GetSiteResponse { pub taglines: Vec, /// A list of custom emojis your site supports. pub custom_emojis: Vec, + pub blocked_urls: Vec, } #[skip_serializing_none] diff --git a/crates/api_common/src/utils.rs b/crates/api_common/src/utils.rs index d51751854..b13d21f9f 100644 --- a/crates/api_common/src/utils.rs +++ b/crates/api_common/src/utils.rs @@ -17,6 +17,7 @@ use lemmy_db_schema::{ instance_block::InstanceBlock, local_site::LocalSite, local_site_rate_limit::LocalSiteRateLimit, + local_site_url_blocklist::LocalSiteUrlBlocklist, password_reset_request::PasswordResetRequest, person::{Person, PersonUpdateForm}, person_block::PersonBlock, @@ -38,18 +39,24 @@ use lemmy_utils::{ rate_limit::{ActionType, BucketConfig}, settings::structs::{PictrsImageMode, Settings}, utils::{ - markdown::markdown_rewrite_image_links, + markdown::{markdown_check_for_blocked_urls, markdown_rewrite_image_links}, slurs::{build_slur_regex, remove_slurs}, }, }; -use regex::Regex; +use moka::future::Cache; +use once_cell::sync::Lazy; +use regex::{escape, Regex, RegexSet}; use rosetta_i18n::{Language, LanguageId}; -use std::collections::HashSet; +use std::{collections::HashSet, time::Duration}; use tracing::warn; use url::{ParseError, Url}; use urlencoding::encode; pub static AUTH_COOKIE_NAME: &str = "jwt"; +#[cfg(debug_assertions)] +static URL_BLOCKLIST_RECHECK_DELAY: Duration = Duration::from_millis(500); +#[cfg(not(debug_assertions))] +static URL_BLOCKLIST_RECHECK_DELAY: Duration = Duration::from_secs(60); #[tracing::instrument(skip_all)] pub async fn is_mod_or_admin( @@ -516,6 +523,47 @@ pub fn local_site_opt_to_sensitive(local_site: &Option) -> bool { .unwrap_or(false) } +pub async fn get_url_blocklist(context: &LemmyContext) -> LemmyResult { + static URL_BLOCKLIST: Lazy> = Lazy::new(|| { + Cache::builder() + .max_capacity(1) + .time_to_live(URL_BLOCKLIST_RECHECK_DELAY) + .build() + }); + + Ok( + URL_BLOCKLIST + .try_get_with::<_, LemmyError>((), async { + let urls = LocalSiteUrlBlocklist::get_all(&mut context.pool()).await?; + + let regexes = urls.iter().map(|url| { + let url = &url.url; + let parsed = Url::parse(url).expect("Coundln't parse URL."); + if url.ends_with('/') { + format!( + "({}://)?{}{}?", + parsed.scheme(), + escape(parsed.domain().expect("No domain.")), + escape(parsed.path()) + ) + } else { + format!( + "({}://)?{}{}", + parsed.scheme(), + escape(parsed.domain().expect("No domain.")), + escape(parsed.path()) + ) + } + }); + + let set = RegexSet::new(regexes)?; + Ok(set) + }) + .await + .map_err(|e| anyhow::anyhow!("Failed to build URL blocklist due to `{}`", e))?, + ) +} + pub async fn send_application_approved_email( user: &LocalUserView, settings: &Settings, @@ -867,9 +915,13 @@ fn limit_expire_time(expires: DateTime) -> LemmyResult pub async fn process_markdown( text: &str, slur_regex: &Option, + url_blocklist: &RegexSet, context: &LemmyContext, ) -> LemmyResult { let text = remove_slurs(text, slur_regex); + + markdown_check_for_blocked_urls(&text, url_blocklist)?; + if context.settings().pictrs_config()?.image_mode() == PictrsImageMode::ProxyAllImages { let (text, links) = markdown_rewrite_image_links(text); RemoteImage::create(&mut context.pool(), links).await?; @@ -882,10 +934,13 @@ pub async fn process_markdown( pub async fn process_markdown_opt( text: &Option, slur_regex: &Option, + url_blocklist: &RegexSet, context: &LemmyContext, ) -> LemmyResult> { match text { - Some(t) => process_markdown(t, slur_regex, context).await.map(Some), + Some(t) => process_markdown(t, slur_regex, url_blocklist, context) + .await + .map(Some), None => Ok(None), } } diff --git a/crates/api_crud/src/comment/create.rs b/crates/api_crud/src/comment/create.rs index e5a869223..9269ec382 100644 --- a/crates/api_crud/src/comment/create.rs +++ b/crates/api_crud/src/comment/create.rs @@ -10,6 +10,7 @@ use lemmy_api_common::{ check_post_deleted_or_removed, generate_local_apub_endpoint, get_post, + get_url_blocklist, is_mod_or_admin, local_site_to_slur_regex, process_markdown, @@ -44,7 +45,8 @@ pub async fn create_comment( let local_site = LocalSite::read(&mut context.pool()).await?; let slur_regex = local_site_to_slur_regex(&local_site); - let content = process_markdown(&data.content, &slur_regex, &context).await?; + let url_blocklist = get_url_blocklist(&context).await?; + let content = process_markdown(&data.content, &slur_regex, &url_blocklist, &context).await?; is_valid_body_field(&Some(content.clone()), false)?; // Check for a community ban diff --git a/crates/api_crud/src/comment/update.rs b/crates/api_crud/src/comment/update.rs index 2d6bf79be..e814ebd6b 100644 --- a/crates/api_crud/src/comment/update.rs +++ b/crates/api_crud/src/comment/update.rs @@ -5,7 +5,12 @@ use lemmy_api_common::{ comment::{CommentResponse, EditComment}, context::LemmyContext, send_activity::{ActivityChannel, SendActivityData}, - utils::{check_community_user_action, local_site_to_slur_regex, process_markdown_opt}, + utils::{ + check_community_user_action, + get_url_blocklist, + local_site_to_slur_regex, + process_markdown_opt, + }, }; use lemmy_db_schema::{ source::{ @@ -54,7 +59,8 @@ pub async fn update_comment( .await?; let slur_regex = local_site_to_slur_regex(&local_site); - let content = process_markdown_opt(&data.content, &slur_regex, &context).await?; + let url_blocklist = get_url_blocklist(&context).await?; + let content = process_markdown_opt(&data.content, &slur_regex, &url_blocklist, &context).await?; is_valid_body_field(&content, false)?; let comment_id = data.comment_id; diff --git a/crates/api_crud/src/community/create.rs b/crates/api_crud/src/community/create.rs index ef2da9ef8..679655078 100644 --- a/crates/api_crud/src/community/create.rs +++ b/crates/api_crud/src/community/create.rs @@ -9,6 +9,7 @@ use lemmy_api_common::{ generate_inbox_url, generate_local_apub_endpoint, generate_shared_inbox_url, + get_url_blocklist, is_admin, local_site_to_slur_regex, process_markdown_opt, @@ -53,9 +54,11 @@ pub async fn create_community( } let slur_regex = local_site_to_slur_regex(&local_site); + let url_blocklist = get_url_blocklist(&context).await?; check_slurs(&data.name, &slur_regex)?; check_slurs(&data.title, &slur_regex)?; - let description = process_markdown_opt(&data.description, &slur_regex, &context).await?; + let description = + process_markdown_opt(&data.description, &slur_regex, &url_blocklist, &context).await?; let icon = proxy_image_link_api(&data.icon, &context).await?; let banner = proxy_image_link_api(&data.banner, &context).await?; diff --git a/crates/api_crud/src/community/update.rs b/crates/api_crud/src/community/update.rs index 14bb5c326..83ffded13 100644 --- a/crates/api_crud/src/community/update.rs +++ b/crates/api_crud/src/community/update.rs @@ -7,6 +7,7 @@ use lemmy_api_common::{ send_activity::{ActivityChannel, SendActivityData}, utils::{ check_community_mod_action, + get_url_blocklist, local_site_to_slur_regex, process_markdown_opt, proxy_image_link_opt_api, @@ -36,8 +37,10 @@ pub async fn update_community( let local_site = LocalSite::read(&mut context.pool()).await?; let slur_regex = local_site_to_slur_regex(&local_site); + let url_blocklist = get_url_blocklist(&context).await?; check_slurs_opt(&data.title, &slur_regex)?; - let description = process_markdown_opt(&data.description, &slur_regex, &context).await?; + let description = + process_markdown_opt(&data.description, &slur_regex, &url_blocklist, &context).await?; is_valid_body_field(&data.description, false)?; let description = diesel_option_overwrite(description); diff --git a/crates/api_crud/src/post/create.rs b/crates/api_crud/src/post/create.rs index c52cfaf80..fabab6b09 100644 --- a/crates/api_crud/src/post/create.rs +++ b/crates/api_crud/src/post/create.rs @@ -9,6 +9,7 @@ use lemmy_api_common::{ utils::{ check_community_user_action, generate_local_apub_endpoint, + get_url_blocklist, honeypot_check, local_site_to_slur_regex, mark_post_as_read, @@ -29,7 +30,7 @@ use lemmy_db_schema::{ CommunityVisibility, }; use lemmy_db_views::structs::LocalUserView; -use lemmy_db_views_actor::structs::CommunityView; +use lemmy_db_views_actor::structs::CommunityModeratorView; use lemmy_utils::{ error::{LemmyError, LemmyErrorExt, LemmyErrorType}, spawn_try_task, @@ -38,6 +39,7 @@ use lemmy_utils::{ validation::{ check_url_scheme, clean_url_params, + is_url_blocked, is_valid_alt_text_field, is_valid_body_field, is_valid_post_title, @@ -60,8 +62,9 @@ pub async fn create_post( let slur_regex = local_site_to_slur_regex(&local_site); check_slurs(&data.name, &slur_regex)?; + let url_blocklist = get_url_blocklist(&context).await?; - let body = process_markdown_opt(&data.body, &slur_regex, &context).await?; + let body = process_markdown_opt(&data.body, &slur_regex, &url_blocklist, &context).await?; let data_url = data.url.as_ref(); let url = data_url.map(clean_url_params); // TODO no good way to handle a "clear" let custom_thumbnail = data.custom_thumbnail.as_ref().map(clean_url_params); @@ -69,6 +72,7 @@ pub async fn create_post( is_valid_post_title(&data.name)?; is_valid_body_field(&body, true)?; is_valid_alt_text_field(&data.alt_text)?; + is_url_blocked(&url, &url_blocklist)?; check_url_scheme(&url)?; check_url_scheme(&custom_thumbnail)?; @@ -83,10 +87,10 @@ pub async fn create_post( let community = Community::read(&mut context.pool(), community_id).await?; if community.posting_restricted_to_mods { let community_id = data.community_id; - let is_mod = CommunityView::is_mod_or_admin( + let is_mod = CommunityModeratorView::is_community_moderator( &mut context.pool(), - local_user_view.local_user.person_id, community_id, + local_user_view.local_user.person_id, ) .await?; if !is_mod { diff --git a/crates/api_crud/src/post/update.rs b/crates/api_crud/src/post/update.rs index 6db65dffe..08c5425b9 100644 --- a/crates/api_crud/src/post/update.rs +++ b/crates/api_crud/src/post/update.rs @@ -8,6 +8,7 @@ use lemmy_api_common::{ send_activity::{ActivityChannel, SendActivityData}, utils::{ check_community_user_action, + get_url_blocklist, local_site_to_slur_regex, process_markdown_opt, proxy_image_link_opt_apub, @@ -30,6 +31,7 @@ use lemmy_utils::{ validation::{ check_url_scheme, clean_url_params, + is_url_blocked, is_valid_alt_text_field, is_valid_body_field, is_valid_post_title, @@ -51,9 +53,11 @@ pub async fn update_post( let url = data.url.as_ref().map(clean_url_params); let custom_thumbnail = data.custom_thumbnail.as_ref().map(clean_url_params); + let url_blocklist = get_url_blocklist(&context).await?; + let slur_regex = local_site_to_slur_regex(&local_site); check_slurs_opt(&data.name, &slur_regex)?; - let body = process_markdown_opt(&data.body, &slur_regex, &context).await?; + let body = process_markdown_opt(&data.body, &slur_regex, &url_blocklist, &context).await?; if let Some(name) = &data.name { is_valid_post_title(name)?; @@ -61,6 +65,7 @@ pub async fn update_post( is_valid_body_field(&body, true)?; is_valid_alt_text_field(&data.alt_text)?; + is_url_blocked(&url, &url_blocklist)?; check_url_scheme(&url)?; check_url_scheme(&custom_thumbnail)?; diff --git a/crates/api_crud/src/private_message/create.rs b/crates/api_crud/src/private_message/create.rs index c4832ec70..32d8b99e6 100644 --- a/crates/api_crud/src/private_message/create.rs +++ b/crates/api_crud/src/private_message/create.rs @@ -8,6 +8,7 @@ use lemmy_api_common::{ check_person_block, generate_local_apub_endpoint, get_interface_language, + get_url_blocklist, local_site_to_slur_regex, process_markdown, send_email_to_user, @@ -36,7 +37,8 @@ pub async fn create_private_message( let local_site = LocalSite::read(&mut context.pool()).await?; let slur_regex = local_site_to_slur_regex(&local_site); - let content = process_markdown(&data.content, &slur_regex, &context).await?; + let url_blocklist = get_url_blocklist(&context).await?; + let content = process_markdown(&data.content, &slur_regex, &url_blocklist, &context).await?; is_valid_body_field(&Some(content.clone()), false)?; check_person_block( diff --git a/crates/api_crud/src/private_message/update.rs b/crates/api_crud/src/private_message/update.rs index dfcf522a8..29063fd10 100644 --- a/crates/api_crud/src/private_message/update.rs +++ b/crates/api_crud/src/private_message/update.rs @@ -4,7 +4,7 @@ use lemmy_api_common::{ context::LemmyContext, private_message::{EditPrivateMessage, PrivateMessageResponse}, send_activity::{ActivityChannel, SendActivityData}, - utils::{local_site_to_slur_regex, process_markdown}, + utils::{get_url_blocklist, local_site_to_slur_regex, process_markdown}, }; use lemmy_db_schema::{ source::{ @@ -37,7 +37,8 @@ pub async fn update_private_message( // Doing the update let slur_regex = local_site_to_slur_regex(&local_site); - let content = process_markdown(&data.content, &slur_regex, &context).await?; + let url_blocklist = get_url_blocklist(&context).await?; + let content = process_markdown(&data.content, &slur_regex, &url_blocklist, &context).await?; is_valid_body_field(&Some(content.clone()), false)?; let private_message_id = data.private_message_id; diff --git a/crates/api_crud/src/site/create.rs b/crates/api_crud/src/site/create.rs index b5441bffe..76aae405e 100644 --- a/crates/api_crud/src/site/create.rs +++ b/crates/api_crud/src/site/create.rs @@ -6,6 +6,7 @@ use lemmy_api_common::{ site::{CreateSite, SiteResponse}, utils::{ generate_shared_inbox_url, + get_url_blocklist, is_admin, local_site_rate_limit_to_rate_limit_config, local_site_to_slur_regex, @@ -58,7 +59,8 @@ pub async fn create_site( let keypair = generate_actor_keypair()?; let slur_regex = local_site_to_slur_regex(&local_site); - let sidebar = process_markdown_opt(&data.sidebar, &slur_regex, &context).await?; + let url_blocklist = get_url_blocklist(&context).await?; + let sidebar = process_markdown_opt(&data.sidebar, &slur_regex, &url_blocklist, &context).await?; let icon = proxy_image_link_opt_api(&data.icon, &context).await?; let banner = proxy_image_link_opt_api(&data.banner, &context).await?; diff --git a/crates/api_crud/src/site/read.rs b/crates/api_crud/src/site/read.rs index b64503666..e99a222fa 100644 --- a/crates/api_crud/src/site/read.rs +++ b/crates/api_crud/src/site/read.rs @@ -6,6 +6,7 @@ use lemmy_api_common::{ use lemmy_db_schema::source::{ actor_language::{LocalUserLanguage, SiteLanguage}, language::Language, + local_site_url_blocklist::LocalSiteUrlBlocklist, tagline::Tagline, }; use lemmy_db_views::structs::{CustomEmojiView, LocalUserView, SiteView}; @@ -47,6 +48,7 @@ pub async fn get_site( let taglines = Tagline::get_all(&mut context.pool(), site_view.local_site.id).await?; let custom_emojis = CustomEmojiView::get_all(&mut context.pool(), site_view.local_site.id).await?; + let blocked_urls = LocalSiteUrlBlocklist::get_all(&mut context.pool()).await?; Ok(GetSiteResponse { site_view, admins, @@ -56,6 +58,7 @@ pub async fn get_site( discussion_languages, taglines, custom_emojis, + blocked_urls, }) }) .await diff --git a/crates/api_crud/src/site/update.rs b/crates/api_crud/src/site/update.rs index 17e81937e..809dbe498 100644 --- a/crates/api_crud/src/site/update.rs +++ b/crates/api_crud/src/site/update.rs @@ -4,6 +4,7 @@ use lemmy_api_common::{ context::LemmyContext, site::{EditSite, SiteResponse}, utils::{ + get_url_blocklist, is_admin, local_site_rate_limit_to_rate_limit_config, local_site_to_slur_regex, @@ -18,6 +19,7 @@ use lemmy_db_schema::{ federation_blocklist::FederationBlockList, local_site::{LocalSite, LocalSiteUpdateForm}, local_site_rate_limit::{LocalSiteRateLimit, LocalSiteRateLimitUpdateForm}, + local_site_url_blocklist::LocalSiteUrlBlocklist, local_user::LocalUser, site::{Site, SiteUpdateForm}, tagline::Tagline, @@ -34,6 +36,7 @@ use lemmy_utils::{ validation::{ build_and_check_regex, check_site_visibility_valid, + check_urls_are_valid, is_valid_body_field, site_description_length_check, site_name_length_check, @@ -61,7 +64,8 @@ pub async fn update_site( } let slur_regex = local_site_to_slur_regex(&local_site); - let sidebar = process_markdown_opt(&data.sidebar, &slur_regex, &context).await?; + let url_blocklist = get_url_blocklist(&context).await?; + let sidebar = process_markdown_opt(&data.sidebar, &slur_regex, &url_blocklist, &context).await?; let icon = proxy_image_link_opt_api(&data.icon, &context).await?; let banner = proxy_image_link_opt_api(&data.banner, &context).await?; @@ -137,6 +141,11 @@ pub async fn update_site( let blocked = data.blocked_instances.clone(); FederationBlockList::replace(&mut context.pool(), blocked).await?; + if let Some(url_blocklist) = data.blocked_urls.clone() { + let parsed_urls = check_urls_are_valid(&url_blocklist)?; + LocalSiteUrlBlocklist::replace(&mut context.pool(), parsed_urls).await?; + } + // TODO can't think of a better way to do this. // If the server suddenly requires email verification, or required applications, no old users // will be able to log in. It really only wants this to be a requirement for NEW signups. @@ -578,6 +587,7 @@ mod tests { captcha_difficulty: None, allowed_instances: None, blocked_instances: None, + blocked_urls: None, taglines: None, registration_mode: site_registration_mode, reports_email_admins: None, diff --git a/crates/apub/src/api/list_comments.rs b/crates/apub/src/api/list_comments.rs index 3ae85cdcc..c83756f54 100644 --- a/crates/apub/src/api/list_comments.rs +++ b/crates/apub/src/api/list_comments.rs @@ -27,8 +27,11 @@ pub async fn list_comments( check_private_instance(&local_user_view, &local_site)?; let community_id = if let Some(name) = &data.community_name { - Some(resolve_actor_identifier::(name, &context, &None, true).await?) - .map(|c| c.id) + Some( + resolve_actor_identifier::(name, &context, &local_user_view, true) + .await?, + ) + .map(|c| c.id) } else { data.community_id }; diff --git a/crates/apub/src/api/list_posts.rs b/crates/apub/src/api/list_posts.rs index b2ca95648..384f1b60e 100644 --- a/crates/apub/src/api/list_posts.rs +++ b/crates/apub/src/api/list_posts.rs @@ -30,8 +30,11 @@ pub async fn list_posts( let page = data.page; let limit = data.limit; let community_id = if let Some(name) = &data.community_name { - Some(resolve_actor_identifier::(name, &context, &None, true).await?) - .map(|c| c.id) + Some( + resolve_actor_identifier::(name, &context, &local_user_view, true) + .await?, + ) + .map(|c| c.id) } else { data.community_id }; diff --git a/crates/apub/src/api/resolve_object.rs b/crates/apub/src/api/resolve_object.rs index e081377f6..6d672a8cd 100644 --- a/crates/apub/src/api/resolve_object.rs +++ b/crates/apub/src/api/resolve_object.rs @@ -1,7 +1,6 @@ -use crate::fetcher::search::{ - search_query_to_object_id, - search_query_to_object_id_local, - SearchableObjects, +use crate::fetcher::{ + search::{search_query_to_object_id, search_query_to_object_id_local, SearchableObjects}, + user_or_community::UserOrCommunity, }; use activitypub_federation::config::Data; use actix_web::web::{Json, Query}; @@ -31,7 +30,7 @@ pub async fn resolve_object( let res = if is_authenticated { // user is fully authenticated; allow remote lookups as well. - search_query_to_object_id(&data.q, &context).await + search_query_to_object_id(data.q.clone(), &context).await } else { // user isn't authenticated only allow a local search. search_query_to_object_id_local(&data.q, &context).await @@ -52,14 +51,6 @@ async fn convert_response( let removed_or_deleted; let mut res = ResolveObjectResponse::default(); match object { - Person(p) => { - removed_or_deleted = p.deleted; - res.person = Some(PersonView::read(pool, p.id).await?) - } - Community(c) => { - removed_or_deleted = c.deleted || c.removed; - res.community = Some(CommunityView::read(pool, c.id, user_id, false).await?) - } Post(p) => { removed_or_deleted = p.deleted || p.removed; res.post = Some(PostView::read(pool, p.id, user_id, false).await?) @@ -68,6 +59,16 @@ async fn convert_response( removed_or_deleted = c.deleted || c.removed; res.comment = Some(CommentView::read(pool, c.id, user_id).await?) } + PersonOrCommunity(p) => match *p { + UserOrCommunity::User(u) => { + removed_or_deleted = u.deleted; + res.person = Some(PersonView::read(pool, u.id).await?) + } + UserOrCommunity::Community(c) => { + removed_or_deleted = c.deleted || c.removed; + res.community = Some(CommunityView::read(pool, c.id, user_id, false).await?) + } + }, }; // if the object was deleted from database, dont return it if removed_or_deleted { diff --git a/crates/apub/src/fetcher/search.rs b/crates/apub/src/fetcher/search.rs index 54951edd9..74d755da0 100644 --- a/crates/apub/src/fetcher/search.rs +++ b/crates/apub/src/fetcher/search.rs @@ -1,6 +1,7 @@ use crate::{ + fetcher::user_or_community::{PersonOrGroup, UserOrCommunity}, objects::{comment::ApubComment, community::ApubCommunity, person::ApubPerson, post::ApubPost}, - protocol::objects::{group::Group, note::Note, page::Page, person::Person}, + protocol::objects::{note::Note, page::Page}, }; use activitypub_federation::{ config::Data, @@ -9,7 +10,7 @@ use activitypub_federation::{ }; use chrono::{DateTime, Utc}; use lemmy_api_common::context::LemmyContext; -use lemmy_utils::error::{LemmyError, LemmyErrorType}; +use lemmy_utils::error::LemmyError; use serde::Deserialize; use url::Url; @@ -18,28 +19,22 @@ use url::Url; /// which gets resolved to an URL. #[tracing::instrument(skip_all)] pub(crate) async fn search_query_to_object_id( - query: &str, + mut query: String, context: &Data, ) -> Result { - Ok(match Url::parse(query) { + Ok(match Url::parse(&query) { Ok(url) => { // its already an url, just go with it ObjectId::from(url).dereference(context).await? } Err(_) => { // not an url, try to resolve via webfinger - let mut chars = query.chars(); - let kind = chars.next(); - let identifier = chars.as_str(); - match kind { - Some('@') => SearchableObjects::Person( - webfinger_resolve_actor::(identifier, context).await?, - ), - Some('!') => SearchableObjects::Community( - webfinger_resolve_actor::(identifier, context).await?, - ), - _ => return Err(LemmyErrorType::InvalidQuery)?, + if query.starts_with('!') || query.starts_with('@') { + query.remove(0); } + SearchableObjects::PersonOrCommunity(Box::new( + webfinger_resolve_actor::(&query, context).await?, + )) } }) } @@ -59,19 +54,17 @@ pub(crate) async fn search_query_to_object_id_local( /// The types of ActivityPub objects that can be fetched directly by searching for their ID. #[derive(Debug)] pub(crate) enum SearchableObjects { - Person(ApubPerson), - Community(ApubCommunity), Post(ApubPost), Comment(ApubComment), + PersonOrCommunity(Box), } #[derive(Deserialize)] #[serde(untagged)] pub(crate) enum SearchableKinds { - Group(Group), - Person(Person), - Page(Page), + Page(Box), Note(Note), + PersonOrGroup(Box), } #[async_trait::async_trait] @@ -82,10 +75,9 @@ impl Object for SearchableObjects { fn last_refreshed_at(&self) -> Option> { match self { - SearchableObjects::Person(p) => p.last_refreshed_at(), - SearchableObjects::Community(c) => c.last_refreshed_at(), SearchableObjects::Post(p) => p.last_refreshed_at(), SearchableObjects::Comment(c) => c.last_refreshed_at(), + SearchableObjects::PersonOrCommunity(p) => p.last_refreshed_at(), } } @@ -99,13 +91,9 @@ impl Object for SearchableObjects { object_id: Url, context: &Data, ) -> Result, LemmyError> { - let c = ApubCommunity::read_from_id(object_id.clone(), context).await?; - if let Some(c) = c { - return Ok(Some(SearchableObjects::Community(c))); - } - let p = ApubPerson::read_from_id(object_id.clone(), context).await?; - if let Some(p) = p { - return Ok(Some(SearchableObjects::Person(p))); + let uc = UserOrCommunity::read_from_id(object_id.clone(), context).await?; + if let Some(uc) = uc { + return Ok(Some(SearchableObjects::PersonOrCommunity(Box::new(uc)))); } let p = ApubPost::read_from_id(object_id.clone(), context).await?; if let Some(p) = p { @@ -121,10 +109,12 @@ impl Object for SearchableObjects { #[tracing::instrument(skip_all)] async fn delete(self, data: &Data) -> Result<(), LemmyError> { match self { - SearchableObjects::Person(p) => p.delete(data).await, - SearchableObjects::Community(c) => c.delete(data).await, SearchableObjects::Post(p) => p.delete(data).await, SearchableObjects::Comment(c) => c.delete(data).await, + SearchableObjects::PersonOrCommunity(pc) => match *pc { + UserOrCommunity::User(p) => p.delete(data).await, + UserOrCommunity::Community(c) => c.delete(data).await, + }, } } @@ -139,10 +129,12 @@ impl Object for SearchableObjects { data: &Data, ) -> Result<(), LemmyError> { match apub { - SearchableKinds::Group(a) => ApubCommunity::verify(a, expected_domain, data).await, - SearchableKinds::Person(a) => ApubPerson::verify(a, expected_domain, data).await, SearchableKinds::Page(a) => ApubPost::verify(a, expected_domain, data).await, SearchableKinds::Note(a) => ApubComment::verify(a, expected_domain, data).await, + SearchableKinds::PersonOrGroup(pg) => match pg.as_ref() { + PersonOrGroup::Person(a) => ApubPerson::verify(a, expected_domain, data).await, + PersonOrGroup::Group(a) => ApubCommunity::verify(a, expected_domain, data).await, + }, } } @@ -151,10 +143,11 @@ impl Object for SearchableObjects { use SearchableKinds as SAT; use SearchableObjects as SO; Ok(match apub { - SAT::Group(g) => SO::Community(ApubCommunity::from_json(g, context).await?), - SAT::Person(p) => SO::Person(ApubPerson::from_json(p, context).await?), - SAT::Page(p) => SO::Post(ApubPost::from_json(p, context).await?), + SAT::Page(p) => SO::Post(ApubPost::from_json(*p, context).await?), SAT::Note(n) => SO::Comment(ApubComment::from_json(n, context).await?), + SAT::PersonOrGroup(pg) => { + SO::PersonOrCommunity(Box::new(UserOrCommunity::from_json(*pg, context).await?)) + } }) } } diff --git a/crates/apub/src/objects/comment.rs b/crates/apub/src/objects/comment.rs index 6d8d814bf..ba7cc914f 100644 --- a/crates/apub/src/objects/comment.rs +++ b/crates/apub/src/objects/comment.rs @@ -18,7 +18,7 @@ use activitypub_federation::{ use chrono::{DateTime, Utc}; use lemmy_api_common::{ context::LemmyContext, - utils::{is_mod_or_admin, local_site_opt_to_slur_regex, process_markdown}, + utils::{get_url_blocklist, is_mod_or_admin, local_site_opt_to_slur_regex, process_markdown}, }; use lemmy_db_schema::{ source::{ @@ -165,7 +165,8 @@ impl Object for ApubComment { let local_site = LocalSite::read(&mut context.pool()).await.ok(); let slur_regex = &local_site_opt_to_slur_regex(&local_site); - let content = process_markdown(&content, slur_regex, context).await?; + let url_blocklist = get_url_blocklist(context).await?; + let content = process_markdown(&content, slur_regex, &url_blocklist, context).await?; let language_id = LanguageTag::to_language_id_single(note.language, &mut context.pool()).await?; diff --git a/crates/apub/src/objects/community.rs b/crates/apub/src/objects/community.rs index ad8472915..7630d80b2 100644 --- a/crates/apub/src/objects/community.rs +++ b/crates/apub/src/objects/community.rs @@ -21,6 +21,7 @@ use lemmy_api_common::{ generate_featured_url, generate_moderators_url, generate_outbox_url, + get_url_blocklist, local_site_opt_to_slur_regex, process_markdown_opt, proxy_image_link_opt_apub, @@ -141,8 +142,10 @@ impl Object for ApubCommunity { let local_site = LocalSite::read(&mut context.pool()).await.ok(); let slur_regex = &local_site_opt_to_slur_regex(&local_site); + let url_blocklist = get_url_blocklist(context).await?; let description = read_from_string_or_source_opt(&group.summary, &None, &group.source); - let description = process_markdown_opt(&description, slur_regex, context).await?; + let description = + process_markdown_opt(&description, slur_regex, &url_blocklist, context).await?; let icon = proxy_image_link_opt_apub(group.icon.map(|i| i.url), context).await?; let banner = proxy_image_link_opt_apub(group.image.map(|i| i.url), context).await?; @@ -177,18 +180,21 @@ impl Object for ApubCommunity { let community: ApubCommunity = community.into(); - // Fetching mods and outbox is not necessary for Lemmy to work, so ignore errors. Besides, - // we need to ignore these errors so that tests can work entirely offline. + // These collections are not necessary for Lemmy to work, so ignore errors. let community_ = community.clone(); let context_ = context.reset_request_count(); spawn_try_task(async move { - group.outbox.dereference(&community_, &context_).await?; - group.followers.dereference(&community_, &context_).await?; + group.outbox.dereference(&community_, &context_).await.ok(); + group + .followers + .dereference(&community_, &context_) + .await + .ok(); if let Some(featured) = group.featured { - featured.dereference(&community_, &context_).await?; + featured.dereference(&community_, &context_).await.ok(); } if let Some(moderators) = group.attributed_to { - moderators.dereference(&community_, &context_).await?; + moderators.dereference(&community_, &context_).await.ok(); } Ok(()) }); diff --git a/crates/apub/src/objects/instance.rs b/crates/apub/src/objects/instance.rs index 8f4f163db..6894643d6 100644 --- a/crates/apub/src/objects/instance.rs +++ b/crates/apub/src/objects/instance.rs @@ -19,7 +19,12 @@ use activitypub_federation::{ use chrono::{DateTime, Utc}; use lemmy_api_common::{ context::LemmyContext, - utils::{local_site_opt_to_slur_regex, process_markdown_opt, proxy_image_link_opt_apub}, + utils::{ + get_url_blocklist, + local_site_opt_to_slur_regex, + process_markdown_opt, + proxy_image_link_opt_apub, + }, }; use lemmy_db_schema::{ newtypes::InstanceId, @@ -138,8 +143,9 @@ impl Object for ApubSite { let local_site = LocalSite::read(&mut context.pool()).await.ok(); let slur_regex = &local_site_opt_to_slur_regex(&local_site); + let url_blocklist = get_url_blocklist(context).await?; let sidebar = read_from_string_or_source_opt(&apub.content, &None, &apub.source); - let sidebar = process_markdown_opt(&sidebar, slur_regex, context).await?; + let sidebar = process_markdown_opt(&sidebar, slur_regex, &url_blocklist, context).await?; let icon = proxy_image_link_opt_apub(apub.icon.map(|i| i.url), context).await?; let banner = proxy_image_link_opt_apub(apub.image.map(|i| i.url), context).await?; diff --git a/crates/apub/src/objects/person.rs b/crates/apub/src/objects/person.rs index 7bfb68a04..d4456344f 100644 --- a/crates/apub/src/objects/person.rs +++ b/crates/apub/src/objects/person.rs @@ -22,6 +22,7 @@ use lemmy_api_common::{ context::LemmyContext, utils::{ generate_outbox_url, + get_url_blocklist, local_site_opt_to_slur_regex, process_markdown_opt, proxy_image_link_opt_apub, @@ -152,8 +153,9 @@ impl Object for ApubPerson { let local_site = LocalSite::read(&mut context.pool()).await.ok(); let slur_regex = &local_site_opt_to_slur_regex(&local_site); + let url_blocklist = get_url_blocklist(context).await?; let bio = read_from_string_or_source_opt(&person.summary, &None, &person.source); - let bio = process_markdown_opt(&bio, slur_regex, context).await?; + let bio = process_markdown_opt(&bio, slur_regex, &url_blocklist, context).await?; let avatar = proxy_image_link_opt_apub(person.icon.map(|i| i.url), context).await?; let banner = proxy_image_link_opt_apub(person.image.map(|i| i.url), context).await?; diff --git a/crates/apub/src/objects/post.rs b/crates/apub/src/objects/post.rs index 2b211fff9..15184b622 100644 --- a/crates/apub/src/objects/post.rs +++ b/crates/apub/src/objects/post.rs @@ -26,7 +26,7 @@ use lemmy_api_common::{ context::LemmyContext, request::fetch_link_metadata_opt, utils::{ - is_mod_or_admin, + get_url_blocklist, local_site_opt_to_sensitive, local_site_opt_to_slur_regex, process_markdown_opt, @@ -43,6 +43,7 @@ use lemmy_db_schema::{ }, traits::Crud, }; +use lemmy_db_views_actor::structs::CommunityModeratorView; use lemmy_utils::{ error::LemmyError, utils::{markdown::markdown_to_html, slurs::check_slurs_opt, validation::check_url_scheme}, @@ -185,7 +186,8 @@ impl Object for ApubPost { let creator = page.creator()?.dereference(context).await?; let community = page.community(context).await?; if community.posting_restricted_to_mods { - is_mod_or_admin(&mut context.pool(), &creator, community.id).await?; + CommunityModeratorView::is_community_moderator(&mut context.pool(), community.id, creator.id) + .await?; } let mut name = page .name @@ -245,9 +247,10 @@ impl Object for ApubPost { let thumbnail_url = proxy_image_link_opt_apub(thumbnail_url, context).await?; let slur_regex = &local_site_opt_to_slur_regex(&local_site); + let url_blocklist = get_url_blocklist(context).await?; let body = read_from_string_or_source_opt(&page.content, &page.media_type, &page.source); - let body = process_markdown_opt(&body, slur_regex, context).await?; + let body = process_markdown_opt(&body, slur_regex, &url_blocklist, context).await?; let language_id = LanguageTag::to_language_id_single(page.language, &mut context.pool()).await?; diff --git a/crates/apub/src/objects/private_message.rs b/crates/apub/src/objects/private_message.rs index d5c00632f..647510802 100644 --- a/crates/apub/src/objects/private_message.rs +++ b/crates/apub/src/objects/private_message.rs @@ -14,7 +14,7 @@ use activitypub_federation::{ use chrono::{DateTime, Utc}; use lemmy_api_common::{ context::LemmyContext, - utils::{check_person_block, local_site_opt_to_slur_regex, process_markdown}, + utils::{check_person_block, get_url_blocklist, local_site_opt_to_slur_regex, process_markdown}, }; use lemmy_db_schema::{ source::{ @@ -127,8 +127,9 @@ impl Object for ApubPrivateMessage { let local_site = LocalSite::read(&mut context.pool()).await.ok(); let slur_regex = &local_site_opt_to_slur_regex(&local_site); + let url_blocklist = get_url_blocklist(context).await?; let content = read_from_string_or_source(¬e.content, &None, ¬e.source); - let content = process_markdown(&content, slur_regex, context).await?; + let content = process_markdown(&content, slur_regex, &url_blocklist, context).await?; let form = PrivateMessageInsertForm { creator_id: creator.id, diff --git a/crates/db_schema/src/impls/local_site_url_blocklist.rs b/crates/db_schema/src/impls/local_site_url_blocklist.rs new file mode 100644 index 000000000..73dedabce --- /dev/null +++ b/crates/db_schema/src/impls/local_site_url_blocklist.rs @@ -0,0 +1,49 @@ +use crate::{ + schema::local_site_url_blocklist, + source::local_site_url_blocklist::{LocalSiteUrlBlocklist, LocalSiteUrlBlocklistForm}, + utils::{get_conn, DbPool}, +}; +use diesel::{dsl::insert_into, result::Error}; +use diesel_async::{AsyncPgConnection, RunQueryDsl}; + +impl LocalSiteUrlBlocklist { + pub async fn replace(pool: &mut DbPool<'_>, url_blocklist: Vec) -> Result<(), Error> { + let conn = &mut get_conn(pool).await?; + + conn + .build_transaction() + .run(|conn| { + Box::pin(async move { + use crate::schema::local_site_url_blocklist::dsl::local_site_url_blocklist; + + Self::clear(conn).await?; + + let forms = url_blocklist + .into_iter() + .map(|url| LocalSiteUrlBlocklistForm { url, updated: None }) + .collect::>(); + + insert_into(local_site_url_blocklist) + .values(forms) + .execute(conn) + .await?; + + Ok(()) + }) as _ + }) + .await + } + + async fn clear(conn: &mut AsyncPgConnection) -> Result { + diesel::delete(local_site_url_blocklist::table) + .execute(conn) + .await + } + + pub async fn get_all(pool: &mut DbPool<'_>) -> Result, Error> { + let conn = &mut get_conn(pool).await?; + local_site_url_blocklist::table + .get_results::(conn) + .await + } +} diff --git a/crates/db_schema/src/impls/mod.rs b/crates/db_schema/src/impls/mod.rs index 711a6c4e6..3a4e71307 100644 --- a/crates/db_schema/src/impls/mod.rs +++ b/crates/db_schema/src/impls/mod.rs @@ -17,6 +17,7 @@ pub mod instance_block; pub mod language; pub mod local_site; pub mod local_site_rate_limit; +pub mod local_site_url_blocklist; pub mod local_user; pub mod local_user_vote_display_mode; pub mod login_token; diff --git a/crates/db_schema/src/schema.rs b/crates/db_schema/src/schema.rs index a61b2d24f..408ed0540 100644 --- a/crates/db_schema/src/schema.rs +++ b/crates/db_schema/src/schema.rs @@ -409,6 +409,15 @@ diesel::table! { } } +diesel::table! { + local_site_url_blocklist (id) { + id -> Int4, + url -> Text, + published -> Timestamptz, + updated -> Nullable, + } +} + diesel::table! { use diesel::sql_types::*; use super::sql_types::SortTypeEnum; @@ -1052,6 +1061,7 @@ diesel::allow_tables_to_appear_in_same_query!( local_image, local_site, local_site_rate_limit, + local_site_url_blocklist, local_user, local_user_language, local_user_vote_display_mode, diff --git a/crates/db_schema/src/source/local_site_url_blocklist.rs b/crates/db_schema/src/source/local_site_url_blocklist.rs new file mode 100644 index 000000000..4ac0893ec --- /dev/null +++ b/crates/db_schema/src/source/local_site_url_blocklist.rs @@ -0,0 +1,28 @@ +#[cfg(feature = "full")] +use crate::schema::local_site_url_blocklist; +use chrono::{DateTime, Utc}; +use serde::{Deserialize, Serialize}; +use serde_with::skip_serializing_none; +#[cfg(feature = "full")] +use ts_rs::TS; + +#[skip_serializing_none] +#[derive(Clone, PartialEq, Eq, Debug, Serialize, Deserialize)] +#[cfg_attr(feature = "full", derive(Queryable, Selectable, Identifiable, TS))] +#[cfg_attr(feature = "full", diesel(table_name = local_site_url_blocklist))] +#[cfg_attr(feature = "full", diesel(check_for_backend(diesel::pg::Pg)))] +#[cfg_attr(feature = "full", ts(export))] +pub struct LocalSiteUrlBlocklist { + pub id: i32, + pub url: String, + pub published: DateTime, + pub updated: Option>, +} + +#[derive(Default, Clone)] +#[cfg_attr(feature = "full", derive(Insertable, AsChangeset))] +#[cfg_attr(feature = "full", diesel(table_name = local_site_url_blocklist))] +pub struct LocalSiteUrlBlocklistForm { + pub url: String, + pub updated: Option>, +} diff --git a/crates/db_schema/src/source/mod.rs b/crates/db_schema/src/source/mod.rs index ab82a114c..3a6501717 100644 --- a/crates/db_schema/src/source/mod.rs +++ b/crates/db_schema/src/source/mod.rs @@ -22,6 +22,7 @@ pub mod instance_block; pub mod language; pub mod local_site; pub mod local_site_rate_limit; +pub mod local_site_url_blocklist; pub mod local_user; pub mod local_user_vote_display_mode; pub mod login_token; diff --git a/crates/utils/src/error.rs b/crates/utils/src/error.rs index 9da018960..d25845894 100644 --- a/crates/utils/src/error.rs +++ b/crates/utils/src/error.rs @@ -135,6 +135,7 @@ pub enum LemmyErrorType { CouldntSetAllRegistrationsAccepted, CouldntSetAllEmailVerified, Banned, + BlockedUrl, CouldntGetComments, CouldntGetPosts, InvalidUrl, diff --git a/crates/utils/src/utils/markdown/mod.rs b/crates/utils/src/utils/markdown/mod.rs index bee2dcb94..c3def13a7 100644 --- a/crates/utils/src/utils/markdown/mod.rs +++ b/crates/utils/src/utils/markdown/mod.rs @@ -1,6 +1,7 @@ -use crate::settings::SETTINGS; +use crate::{error::LemmyResult, settings::SETTINGS, LemmyErrorType}; use markdown_it::{plugins::cmark::inline::image::Image, MarkdownIt}; use once_cell::sync::Lazy; +use regex::RegexSet; use url::Url; use urlencoding::encode; @@ -98,6 +99,13 @@ pub fn markdown_rewrite_image_links(mut src: String) -> (String, Vec) { (src, links) } +pub fn markdown_check_for_blocked_urls(text: &str, blocklist: &RegexSet) -> LemmyResult<()> { + if blocklist.is_match(text) { + Err(LemmyErrorType::BlockedUrl)? + } + Ok(()) +} + #[cfg(test)] mod tests { #![allow(clippy::unwrap_used)] @@ -109,65 +117,65 @@ mod tests { #[test] fn test_basic_markdown() { let tests: Vec<_> = vec![ - ( - "headings", - "# h1\n## h2\n### h3\n#### h4\n##### h5\n###### h6", - "

h1

\n

h2

\n

h3

\n

h4

\n
h5
\n
h6
\n" - ), - ( - "line breaks", - "First\rSecond", - "

First\nSecond

\n"), - ( - "emphasis", - "__bold__ **bold** *italic* ***bold+italic***", - "

bold bold italic bold+italic

\n" - ), - ( - "blockquotes", - "> #### Hello\n > \n > - Hola\n > - 안영 \n>> Goodbye\n", - "
\n

Hello

\n
    \n
  • Hola
  • \n
  • 안영
  • \n
\n
\n

Goodbye

\n
\n
\n" - ), - ( - "lists (ordered, unordered)", - "1. pen\n2. apple\n3. apple pen\n- pen\n- pineapple\n- pineapple pen", - "
    \n
  1. pen
  2. \n
  3. apple
  4. \n
  5. apple pen
  6. \n
\n
    \n
  • pen
  • \n
  • pineapple
  • \n
  • pineapple pen
  • \n
\n" - ), - ( - "code and code blocks", - "this is my amazing `code snippet` and my amazing ```code block```", - "

this is my amazing code snippet and my amazing code block

\n" - ), - // Links with added nofollow attribute - ( - "links", - "[Lemmy](https://join-lemmy.org/ \"Join Lemmy!\")", - "

Lemmy

\n" - ), - // Remote images with proxy - ( - "images", - "![My linked image](https://example.com/image.png \"image alt text\")", - "

\"My

\n" - ), - // Local images without proxy - ( - "images", - "![My linked image](https://lemmy-alpha/image.png \"image alt text\")", - "

\"My

\n" - ), - // Ensure spoiler plugin is added - ( - "basic spoiler", - "::: spoiler click to see more\nhow spicy!\n:::\n", - "
click to see more

how spicy!\n

\n" - ), - ( - "escape html special chars", - " hello &\"", - "

<script>alert(‘xss’);</script> hello &"

\n" - ) - ]; + ( + "headings", + "# h1\n## h2\n### h3\n#### h4\n##### h5\n###### h6", + "

h1

\n

h2

\n

h3

\n

h4

\n
h5
\n
h6
\n" + ), + ( + "line breaks", + "First\rSecond", + "

First\nSecond

\n"), + ( + "emphasis", + "__bold__ **bold** *italic* ***bold+italic***", + "

bold bold italic bold+italic

\n" + ), + ( + "blockquotes", + "> #### Hello\n > \n > - Hola\n > - 안영 \n>> Goodbye\n", + "
\n

Hello

\n
    \n
  • Hola
  • \n
  • 안영
  • \n
\n
\n

Goodbye

\n
\n
\n" + ), + ( + "lists (ordered, unordered)", + "1. pen\n2. apple\n3. apple pen\n- pen\n- pineapple\n- pineapple pen", + "
    \n
  1. pen
  2. \n
  3. apple
  4. \n
  5. apple pen
  6. \n
\n
    \n
  • pen
  • \n
  • pineapple
  • \n
  • pineapple pen
  • \n
\n" + ), + ( + "code and code blocks", + "this is my amazing `code snippet` and my amazing ```code block```", + "

this is my amazing code snippet and my amazing code block

\n" + ), + // Links with added nofollow attribute + ( + "links", + "[Lemmy](https://join-lemmy.org/ \"Join Lemmy!\")", + "

Lemmy

\n" + ), + // Remote images with proxy + ( + "images", + "![My linked image](https://example.com/image.png \"image alt text\")", + "

\"My

\n" + ), + // Local images without proxy + ( + "images", + "![My linked image](https://lemmy-alpha/image.png \"image alt text\")", + "

\"My

\n" + ), + // Ensure spoiler plugin is added + ( + "basic spoiler", + "::: spoiler click to see more\nhow spicy!\n:::\n", + "
click to see more

how spicy!\n

\n" + ), + ( + "escape html special chars", + " hello &\"", + "

<script>alert(‘xss’);</script> hello &"

\n" + ) + ]; tests.iter().for_each(|&(msg, input, expected)| { let result = markdown_to_html(input); @@ -184,46 +192,46 @@ mod tests { fn test_markdown_proxy_images() { let tests: Vec<_> = vec![ - ( - "remote image proxied", - "![link](http://example.com/image.jpg)", - "![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)", - ), - ( - "local image unproxied", - "![link](http://lemmy-alpha/image.jpg)", - "![link](http://lemmy-alpha/image.jpg)", - ), - ( - "multiple image links", - "![link](http://example.com/image1.jpg) ![link](http://example.com/image2.jpg)", - "![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage1.jpg) ![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage2.jpg)", - ), - ( - "empty link handled", - "![image]()", - "![image]()" - ), - ( - "empty label handled", - "![](http://example.com/image.jpg)", - "![](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)" - ), - ( - "invalid image link removed", - "![image](http-not-a-link)", - "![image]()" - ), - ( - "label with nested markdown handled", - "![a *b* c](http://example.com/image.jpg)", - "![a *b* c](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)" - ), - ( - "custom emoji support", - r#"![party-blob](https://www.hexbear.net/pictrs/image/83405746-0620-4728-9358-5f51b040ffee.gif "emoji party-blob")"#, - r#"![party-blob](https://lemmy-alpha/api/v3/image_proxy?url=https%3A%2F%2Fwww.hexbear.net%2Fpictrs%2Fimage%2F83405746-0620-4728-9358-5f51b040ffee.gif "emoji party-blob")"# - ) + ( + "remote image proxied", + "![link](http://example.com/image.jpg)", + "![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)", + ), + ( + "local image unproxied", + "![link](http://lemmy-alpha/image.jpg)", + "![link](http://lemmy-alpha/image.jpg)", + ), + ( + "multiple image links", + "![link](http://example.com/image1.jpg) ![link](http://example.com/image2.jpg)", + "![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage1.jpg) ![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage2.jpg)", + ), + ( + "empty link handled", + "![image]()", + "![image]()" + ), + ( + "empty label handled", + "![](http://example.com/image.jpg)", + "![](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)" + ), + ( + "invalid image link removed", + "![image](http-not-a-link)", + "![image]()" + ), + ( + "label with nested markdown handled", + "![a *b* c](http://example.com/image.jpg)", + "![a *b* c](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)" + ), + ( + "custom emoji support", + r#"![party-blob](https://www.hexbear.net/pictrs/image/83405746-0620-4728-9358-5f51b040ffee.gif "emoji party-blob")"#, + r#"![party-blob](https://lemmy-alpha/api/v3/image_proxy?url=https%3A%2F%2Fwww.hexbear.net%2Fpictrs%2Fimage%2F83405746-0620-4728-9358-5f51b040ffee.gif "emoji party-blob")"# + ) ]; tests.iter().for_each(|&(msg, input, expected)| { @@ -237,6 +245,69 @@ mod tests { }); } + #[test] + fn test_url_blocking() { + let set = RegexSet::new(vec![r"(https://)?example\.com/?"]).unwrap(); + + assert!( + markdown_check_for_blocked_urls(&String::from("[](https://example.com)"), &set).is_err() + ); + + assert!(markdown_check_for_blocked_urls( + &String::from("Go to https://example.com to get free Robux"), + &set + ) + .is_err()); + + assert!( + markdown_check_for_blocked_urls(&String::from("[](https://example.blog)"), &set).is_ok() + ); + + assert!(markdown_check_for_blocked_urls(&String::from("example.com"), &set).is_err()); + + assert!(markdown_check_for_blocked_urls( + "Odio exercitationem culpa sed sunt + et. Sit et similique tempora deserunt doloremque. Cupiditate iusto + repellat et quis qui. Cum veritatis facere quasi repellendus sunt + eveniet nemo sint. Cumque sit unde est. https://example.com Alias + repellendus at quos.", + &set + ) + .is_err()); + + let set = RegexSet::new(vec![r"(https://)?example\.com/spam\.jpg"]).unwrap(); + assert!(markdown_check_for_blocked_urls( + &String::from("![](https://example.com/spam.jpg)"), + &set + ) + .is_err()); + + let set = RegexSet::new(vec![ + r"(https://)?quo\.example\.com/?", + r"(https://)?foo\.example\.com/?", + r"(https://)?bar\.example\.com/?", + ]) + .unwrap(); + + assert!( + markdown_check_for_blocked_urls(&String::from("https://baz.example.com"), &set).is_ok() + ); + + assert!( + markdown_check_for_blocked_urls(&String::from("https://bar.example.com"), &set).is_err() + ); + + let set = RegexSet::new(vec![r"(https://)?example\.com/banned_page"]).unwrap(); + + assert!( + markdown_check_for_blocked_urls(&String::from("https://example.com/page"), &set).is_ok() + ); + + let set = RegexSet::new(vec![r"(https://)?ex\.mple\.com/?"]).unwrap(); + + assert!(markdown_check_for_blocked_urls("example.com", &set).is_ok()); + } + #[test] fn test_sanitize_html() { let sanitized = sanitize_html(" hello &\"'"); diff --git a/crates/utils/src/utils/validation.rs b/crates/utils/src/utils/validation.rs index 23ef9744a..f9b2a87e5 100644 --- a/crates/utils/src/utils/validation.rs +++ b/crates/utils/src/utils/validation.rs @@ -1,8 +1,8 @@ use crate::error::{LemmyErrorExt, LemmyErrorType, LemmyResult}; use itertools::Itertools; use once_cell::sync::Lazy; -use regex::{Regex, RegexBuilder}; -use url::Url; +use regex::{Regex, RegexBuilder, RegexSet}; +use url::{ParseError, Url}; // From here: https://github.com/vector-im/element-android/blob/develop/matrix-sdk-android/src/main/java/org/matrix/android/sdk/api/MatrixPatterns.kt#L35 static VALID_MATRIX_ID_REGEX: Lazy = Lazy::new(|| { @@ -299,6 +299,33 @@ pub fn check_url_scheme(url: &Option) -> LemmyResult<()> { } } +pub fn is_url_blocked(url: &Option, blocklist: &RegexSet) -> LemmyResult<()> { + if let Some(url) = url { + if blocklist.is_match(url.as_str()) { + Err(LemmyErrorType::BlockedUrl)? + } + } + + Ok(()) +} + +pub fn check_urls_are_valid(urls: &Vec) -> LemmyResult> { + let mut parsed_urls = vec![]; + for url in urls { + let url = Url::parse(url).or_else(|e| { + if e == ParseError::RelativeUrlWithoutBase { + Url::parse(&format!("https://{url}")) + } else { + Err(e) + } + })?; + + parsed_urls.push(url.to_string()); + } + + Ok(parsed_urls) +} + #[cfg(test)] mod tests { #![allow(clippy::unwrap_used)] @@ -310,7 +337,9 @@ mod tests { build_and_check_regex, check_site_visibility_valid, check_url_scheme, + check_urls_are_valid, clean_url_params, + is_url_blocked, is_valid_actor_name, is_valid_bio_field, is_valid_display_name, @@ -550,4 +579,38 @@ mod tests { let magnet_link="magnet:?xt=urn:btih:4b390af3891e323778959d5abfff4b726510f14c&dn=Ravel%20Complete%20Piano%20Sheet%20Music%20-%20Public%20Domain&tr=udp%3A%2F%2Fopen.tracker.cl%3A1337%2Fannounce"; assert!(check_url_scheme(&Some(Url::parse(magnet_link).unwrap())).is_ok()); } + + #[test] + fn test_url_block() { + let set = regex::RegexSet::new(vec![ + r"(https://)?example\.org/page/to/article", + r"(https://)?example\.net/?", + r"(https://)?example\.com/?", + ]) + .unwrap(); + + assert!(is_url_blocked(&Some(Url::parse("https://example.blog").unwrap()), &set).is_ok()); + + assert!(is_url_blocked(&Some(Url::parse("https://example.org").unwrap()), &set).is_ok()); + + assert!(is_url_blocked(&None, &set).is_ok()); + + assert!(is_url_blocked(&Some(Url::parse("https://example.com").unwrap()), &set).is_err()); + } + + #[test] + fn test_url_parsed() { + assert_eq!( + vec![String::from("https://example.com/")], + check_urls_are_valid(&vec![String::from("example.com")]).unwrap() + ); + + assert!(check_urls_are_valid(&vec![ + String::from("example.com"), + String::from("https://example.blog") + ]) + .is_ok()); + + assert!(check_urls_are_valid(&vec![String::from("https://example .com"),]).is_err()); + } } diff --git a/migrations/2024-03-06-201637_url_blocklist/down.sql b/migrations/2024-03-06-201637_url_blocklist/down.sql new file mode 100644 index 000000000..442f3c922 --- /dev/null +++ b/migrations/2024-03-06-201637_url_blocklist/down.sql @@ -0,0 +1,3 @@ +-- This file should undo anything in `up.sql` +DROP TABLE local_site_url_blocklist; + diff --git a/migrations/2024-03-06-201637_url_blocklist/up.sql b/migrations/2024-03-06-201637_url_blocklist/up.sql new file mode 100644 index 000000000..bb9b704b4 --- /dev/null +++ b/migrations/2024-03-06-201637_url_blocklist/up.sql @@ -0,0 +1,7 @@ +CREATE TABLE local_site_url_blocklist ( + id serial NOT NULL PRIMARY KEY, + url text NOT NULL UNIQUE, + published timestamp with time zone NOT NULL DEFAULT now(), + updated timestamp with time zone +); +