From 3be2a55dd01352bd7a16c58fbda15535de6e8fb4 Mon Sep 17 00:00:00 2001 From: Felix Ableitner Date: Wed, 11 Oct 2023 11:50:19 +0200 Subject: [PATCH] Add markdown image rule to add local image proxy (fixes #1036) --- Cargo.lock | 6 +- Cargo.toml | 1 + crates/routes/Cargo.toml | 1 + crates/routes/src/image_proxy.rs | 32 ++++++++++ crates/routes/src/images.rs | 17 ++---- crates/routes/src/lib.rs | 1 + crates/utils/Cargo.toml | 1 + crates/utils/src/utils/markdown/image_rule.rs | 53 ++++++++++++++++ crates/utils/src/utils/markdown/link_rule.rs | 41 ++++++------- crates/utils/src/utils/markdown/mod.rs | 60 +++++++++++-------- src/lib.rs | 11 +--- 11 files changed, 156 insertions(+), 68 deletions(-) create mode 100644 crates/routes/src/image_proxy.rs create mode 100644 crates/utils/src/utils/markdown/image_rule.rs diff --git a/Cargo.lock b/Cargo.lock index 41f132612..f163c30dc 100644 --- a/Cargo.lock +++ b/Cargo.lock @@ -2876,6 +2876,7 @@ dependencies = [ "tokio", "tracing", "url", + "urlencoding", ] [[package]] @@ -2962,6 +2963,7 @@ dependencies = [ "ts-rs", "typed-builder", "url", + "urlencoding", "uuid", ] @@ -5757,9 +5759,9 @@ dependencies = [ [[package]] name = "urlencoding" -version = "2.1.2" +version = "2.1.3" source = "registry+https://github.com/rust-lang/crates.io-index" -checksum = "e8db7427f936968176eaa7cdf81b7f98b980b18495ec28f1b5791ac3bfe3eea9" +checksum = "daf8dba3b7eb870caf1ddeed7bc9d2a049f3cfdfae7cb521b087cc33ae4c49da" [[package]] name = "utf-8" diff --git a/Cargo.toml b/Cargo.toml index 5e5cc654f..235a78210 100644 --- a/Cargo.toml +++ b/Cargo.toml @@ -127,6 +127,7 @@ rustls = { version = "0.21.3", features = ["dangerous_configuration"] } futures-util = "0.3.28" tokio-postgres = "0.7.8" tokio-postgres-rustls = "0.10.0" +urlencoding = "2.1.3" [dependencies] lemmy_api = { workspace = true } diff --git a/crates/routes/Cargo.toml b/crates/routes/Cargo.toml index 88672a1ec..aaa4191d6 100644 --- a/crates/routes/Cargo.toml +++ b/crates/routes/Cargo.toml @@ -30,4 +30,5 @@ strum = { workspace = true } once_cell = { workspace = true } tracing = { workspace = true } tokio = { workspace = true } +urlencoding = { workspace = true } rss = "2.0.4" diff --git a/crates/routes/src/image_proxy.rs b/crates/routes/src/image_proxy.rs new file mode 100644 index 000000000..cd9932f28 --- /dev/null +++ b/crates/routes/src/image_proxy.rs @@ -0,0 +1,32 @@ +use actix_web::{ + web, + web::{Query, ServiceConfig}, + HttpResponse, +}; +use lemmy_api_common::context::LemmyContext; +use lemmy_utils::{error::LemmyResult, rate_limit::RateLimitCell}; +use serde::Deserialize; +use urlencoding::decode; + +pub fn config(cfg: &mut ServiceConfig, rate_limit: &RateLimitCell) { + cfg.service( + web::resource("/api/v3/image_proxy") + .wrap(rate_limit.message()) + .route(web::post().to(image_proxy)), + ); +} + +#[derive(Deserialize)] +struct ImageProxyParams { + url: String, +} + +async fn image_proxy( + Query(params): Query, + context: web::Data, +) -> LemmyResult { + let url = decode(¶ms.url)?.into_owned(); + let image_response = context.client().get(url).send().await?; + + Ok(HttpResponse::Ok().streaming(image_response.bytes_stream())) +} diff --git a/crates/routes/src/images.rs b/crates/routes/src/images.rs index a537300d2..2b4c9b579 100644 --- a/crates/routes/src/images.rs +++ b/crates/routes/src/images.rs @@ -23,13 +23,8 @@ use reqwest_middleware::{ClientWithMiddleware, RequestBuilder}; use serde::{Deserialize, Serialize}; use std::time::Duration; -pub fn config( - cfg: &mut web::ServiceConfig, - client: ClientWithMiddleware, - rate_limit: &RateLimitCell, -) { +pub fn config(cfg: &mut web::ServiceConfig, rate_limit: &RateLimitCell) { cfg - .app_data(web::Data::new(client)) .service( web::resource("/pictrs/image") .wrap(rate_limit.image()) @@ -135,7 +130,6 @@ async fn full_res( filename: web::Path, web::Query(params): web::Query, req: HttpRequest, - client: web::Data, context: web::Data, local_user_view: Option, ) -> Result { @@ -166,15 +160,15 @@ async fn full_res( url }; - image(url, req, client).await + image(url, req, context.client()).await } async fn image( url: String, req: HttpRequest, - client: web::Data, + client: &ClientWithMiddleware, ) -> Result { - let mut client_req = adapt_request(&req, &client, url); + let mut client_req = adapt_request(&req, client, url); if let Some(addr) = req.head().peer_addr { client_req = client_req.header("X-Forwarded-For", addr.to_string()); @@ -202,7 +196,6 @@ async fn image( async fn delete( components: web::Path<(String, String)>, req: HttpRequest, - client: web::Data, context: web::Data, // require login _local_user_view: LocalUserView, @@ -212,7 +205,7 @@ async fn delete( let pictrs_config = context.settings().pictrs_config()?; let url = format!("{}image/delete/{}/{}", pictrs_config.url, &token, &file); - let mut client_req = adapt_request(&req, &client, url); + let mut client_req = adapt_request(&req, context.client(), url); if let Some(addr) = req.head().peer_addr { client_req = client_req.header("X-Forwarded-For", addr.to_string()); diff --git a/crates/routes/src/lib.rs b/crates/routes/src/lib.rs index 28da113ef..5ad010af5 100644 --- a/crates/routes/src/lib.rs +++ b/crates/routes/src/lib.rs @@ -3,6 +3,7 @@ use lemmy_db_views::structs::LocalUserView; use lemmy_utils::error::LemmyError; pub mod feeds; +pub mod image_proxy; pub mod images; pub mod nodeinfo; pub mod webfinger; diff --git a/crates/utils/Cargo.toml b/crates/utils/Cargo.toml index 20611702e..b215a6662 100644 --- a/crates/utils/Cargo.toml +++ b/crates/utils/Cargo.toml @@ -40,6 +40,7 @@ rosetta-i18n = { workspace = true } typed-builder = { workspace = true } percent-encoding = { workspace = true } tokio = { workspace = true } +urlencoding = { workspace = true } openssl = "0.10.55" html2text = "0.6.0" deser-hjson = "1.2.0" diff --git a/crates/utils/src/utils/markdown/image_rule.rs b/crates/utils/src/utils/markdown/image_rule.rs new file mode 100644 index 000000000..3a13c7703 --- /dev/null +++ b/crates/utils/src/utils/markdown/image_rule.rs @@ -0,0 +1,53 @@ +use crate::settings::SETTINGS; +use markdown_it::{generics::inline::full_link, MarkdownIt, Node, NodeValue, Renderer}; +use url::Url; +use urlencoding::encode; + +/// Renders markdown images. Copied directly from markdown-it source. It rewrites remote image URLs +/// to go through local proxy. +/// +/// https://github.com/markdown-it-rust/markdown-it/blob/master/src/plugins/cmark/inline/image.rs +#[derive(Debug)] +pub struct Image { + pub url: String, + pub title: Option, +} + +impl NodeValue for Image { + fn render(&self, node: &Node, fmt: &mut dyn Renderer) { + let mut attrs = node.attrs.clone(); + + // TODO: error handling + + let url = Url::parse(&self.url).unwrap(); + + // Rewrite remote links to go through proxy + let url = if url.domain().unwrap() != SETTINGS.hostname { + let url = encode(&self.url); + format!( + "{}/api/v3/image_proxy?url={}", + SETTINGS.get_protocol_and_hostname(), + url + ) + } else { + self.url.clone() + }; + attrs.push(("src", url)); + attrs.push(("alt", node.collect_text())); + + if let Some(title) = &self.title { + attrs.push(("title", title.clone())); + } + + fmt.self_close("img", &attrs); + } +} + +pub fn add(md: &mut MarkdownIt) { + full_link::add_prefix::<'!', true>(md, |href, title| { + Node::new(Image { + url: href.unwrap_or_default(), + title, + }) + }); +} diff --git a/crates/utils/src/utils/markdown/link_rule.rs b/crates/utils/src/utils/markdown/link_rule.rs index bd231d5ef..15edcd7b1 100644 --- a/crates/utils/src/utils/markdown/link_rule.rs +++ b/crates/utils/src/utils/markdown/link_rule.rs @@ -1,5 +1,4 @@ -use markdown_it::generics::inline::full_link; -use markdown_it::{MarkdownIt, Node, NodeValue, Renderer}; +use markdown_it::{generics::inline::full_link, MarkdownIt, Node, NodeValue, Renderer}; /// Renders markdown links. Copied directly from markdown-it source, unlike original code it also /// sets `rel=nofollow` attribute. @@ -9,29 +8,31 @@ use markdown_it::{MarkdownIt, Node, NodeValue, Renderer}; /// https://github.com/markdown-it-rust/markdown-it/blob/master/src/plugins/cmark/inline/link.rs #[derive(Debug)] pub struct Link { - pub url: String, - pub title: Option, + pub url: String, + pub title: Option, } impl NodeValue for Link { - fn render(&self, node: &Node, fmt: &mut dyn Renderer) { - let mut attrs = node.attrs.clone(); - attrs.push(("href", self.url.clone())); - attrs.push(("rel", "nofollow".to_string())); + fn render(&self, node: &Node, fmt: &mut dyn Renderer) { + let mut attrs = node.attrs.clone(); + attrs.push(("href", self.url.clone())); + attrs.push(("rel", "nofollow".to_string())); - if let Some(title) = &self.title { - attrs.push(("title", title.clone())); - } - - fmt.open("a", &attrs); - fmt.contents(&node.children); - fmt.close("a"); + if let Some(title) = &self.title { + attrs.push(("title", title.clone())); } + + fmt.open("a", &attrs); + fmt.contents(&node.children); + fmt.close("a"); + } } pub fn add(md: &mut MarkdownIt) { - full_link::add::(md, |href, title| Node::new(Link { - url: href.unwrap_or_default(), - title, - })); -} \ No newline at end of file + full_link::add::(md, |href, title| { + Node::new(Link { + url: href.unwrap_or_default(), + title, + }) + }); +} diff --git a/crates/utils/src/utils/markdown/mod.rs b/crates/utils/src/utils/markdown/mod.rs index 3c1cd4c03..62dec52f3 100644 --- a/crates/utils/src/utils/markdown/mod.rs +++ b/crates/utils/src/utils/markdown/mod.rs @@ -1,33 +1,35 @@ use markdown_it::MarkdownIt; use once_cell::sync::Lazy; -mod spoiler_rule; +pub mod image_rule; mod link_rule; +mod spoiler_rule; static MARKDOWN_PARSER: Lazy = Lazy::new(|| { - let mut parser = MarkdownIt::new(); - markdown_it::plugins::cmark::add(&mut parser); - markdown_it::plugins::extra::add(&mut parser); - spoiler_rule::add(&mut parser); - link_rule::add(&mut parser); + let mut parser = MarkdownIt::new(); + markdown_it::plugins::cmark::add(&mut parser); + markdown_it::plugins::extra::add(&mut parser); + spoiler_rule::add(&mut parser); + link_rule::add(&mut parser); + image_rule::add(&mut parser); - parser + parser }); pub fn markdown_to_html(text: &str) -> String { - MARKDOWN_PARSER.parse(text).xrender() + MARKDOWN_PARSER.parse(text).xrender() } #[cfg(test)] mod tests { - #![allow(clippy::unwrap_used)] - #![allow(clippy::indexing_slicing)] + #![allow(clippy::unwrap_used)] + #![allow(clippy::indexing_slicing)] - use crate::utils::markdown::markdown_to_html; + use crate::utils::markdown::markdown_to_html; - #[test] - fn test_basic_markdown() { - let tests: Vec<_> = vec![ + #[test] + fn test_basic_markdown() { + let tests: Vec<_> = vec![ ( "headings", "# h1\n## h2\n### h3\n#### h4\n##### h5\n###### h6", @@ -57,15 +59,23 @@ mod tests { "this is my amazing `code snippet` and my amazing ```code block```", "

this is my amazing code snippet and my amazing code block

\n" ), + // Links with added nofollow attribute ( "links", "[Lemmy](https://join-lemmy.org/ \"Join Lemmy!\")", "

Lemmy

\n" ), + // Remote images with proxy ( "images", - "![My linked image](https://image.com \"image alt text\")", - "

\"My

\n" + "![My linked image](https://example.com/image.png \"image alt text\")", + "

\"My

\n" + ), + // Local images without proxy + ( + "images", + "![My linked image](https://lemmy-alpha/image.png \"image alt text\")", + "

\"My

\n" ), // Ensure any custom plugins are added to 'MARKDOWN_PARSER' implementation. ( @@ -75,14 +85,14 @@ mod tests { ), ]; - tests.iter().for_each(|&(msg, input, expected)| { - let result = markdown_to_html(input); + tests.iter().for_each(|&(msg, input, expected)| { + let result = markdown_to_html(input); - assert_eq!( - result, expected, - "Testing {}, with original input '{}'", - msg, input - ); - }); - } + assert_eq!( + result, expected, + "Testing {}, with original input '{}'", + msg, input + ); + }); + } } diff --git a/src/lib.rs b/src/lib.rs index c093faaca..f7fe66bbe 100644 --- a/src/lib.rs +++ b/src/lib.rs @@ -53,7 +53,7 @@ use lemmy_utils::{ settings::{structs::Settings, SETTINGS}, }; use reqwest::Client; -use reqwest_middleware::{ClientBuilder, ClientWithMiddleware}; +use reqwest_middleware::ClientBuilder; use reqwest_tracing::TracingMiddleware; use serde_json::json; use std::{env, ops::Deref, time::Duration}; @@ -174,11 +174,6 @@ pub async fn start_lemmy_server(args: CmdArgs) -> Result<(), LemmyError> { .with(TracingMiddleware::default()) .build(); - // Pictrs cannot use the retry middleware - let pictrs_client = ClientBuilder::new(reqwest_client.clone()) - .with(TracingMiddleware::default()) - .build(); - let context = LemmyContext::create( pool.clone(), client.clone(), @@ -221,7 +216,6 @@ pub async fn start_lemmy_server(args: CmdArgs) -> Result<(), LemmyError> { federation_config.clone(), settings.clone(), federation_enabled, - pictrs_client, )?) } else { None @@ -287,7 +281,6 @@ fn create_http_server( federation_config: FederationConfig, settings: Settings, federation_enabled: bool, - pictrs_client: ClientWithMiddleware, ) -> Result { // this must come before the HttpServer creation // creates a middleware that populates http metrics for each path, method, and status code @@ -342,7 +335,7 @@ fn create_http_server( } }) .configure(feeds::config) - .configure(|cfg| images::config(cfg, pictrs_client.clone(), &rate_limit_cell)) + .configure(|cfg| images::config(cfg, &rate_limit_cell)) .configure(nodeinfo::config) }) .disable_signals()