From fc66badd85801643df0c6adffb6cfea907c04fe6 Mon Sep 17 00:00:00 2001 From: Felix Ableitner Date: Mon, 23 Oct 2023 16:36:05 +0200 Subject: [PATCH] rewrite markdown image links working --- crates/utils/src/utils/markdown/image_rule.rs | 53 --------- crates/utils/src/utils/markdown/mod.rs | 102 +++++++++++++++++- 2 files changed, 98 insertions(+), 57 deletions(-) delete mode 100644 crates/utils/src/utils/markdown/image_rule.rs diff --git a/crates/utils/src/utils/markdown/image_rule.rs b/crates/utils/src/utils/markdown/image_rule.rs deleted file mode 100644 index 3a13c7703..000000000 --- a/crates/utils/src/utils/markdown/image_rule.rs +++ /dev/null @@ -1,53 +0,0 @@ -use crate::settings::SETTINGS; -use markdown_it::{generics::inline::full_link, MarkdownIt, Node, NodeValue, Renderer}; -use url::Url; -use urlencoding::encode; - -/// Renders markdown images. Copied directly from markdown-it source. It rewrites remote image URLs -/// to go through local proxy. -/// -/// https://github.com/markdown-it-rust/markdown-it/blob/master/src/plugins/cmark/inline/image.rs -#[derive(Debug)] -pub struct Image { - pub url: String, - pub title: Option, -} - -impl NodeValue for Image { - fn render(&self, node: &Node, fmt: &mut dyn Renderer) { - let mut attrs = node.attrs.clone(); - - // TODO: error handling - - let url = Url::parse(&self.url).unwrap(); - - // Rewrite remote links to go through proxy - let url = if url.domain().unwrap() != SETTINGS.hostname { - let url = encode(&self.url); - format!( - "{}/api/v3/image_proxy?url={}", - SETTINGS.get_protocol_and_hostname(), - url - ) - } else { - self.url.clone() - }; - attrs.push(("src", url)); - attrs.push(("alt", node.collect_text())); - - if let Some(title) = &self.title { - attrs.push(("title", title.clone())); - } - - fmt.self_close("img", &attrs); - } -} - -pub fn add(md: &mut MarkdownIt) { - full_link::add_prefix::<'!', true>(md, |href, title| { - Node::new(Image { - url: href.unwrap_or_default(), - title, - }) - }); -} diff --git a/crates/utils/src/utils/markdown/mod.rs b/crates/utils/src/utils/markdown/mod.rs index 2528b64e1..251de934f 100644 --- a/crates/utils/src/utils/markdown/mod.rs +++ b/crates/utils/src/utils/markdown/mod.rs @@ -1,7 +1,9 @@ -use markdown_it::MarkdownIt; +use crate::settings::SETTINGS; +use markdown_it::{plugins::cmark::inline::image::Image, MarkdownIt}; use once_cell::sync::Lazy; +use url::Url; +use urlencoding::encode; -pub mod image_rule; mod link_rule; mod spoiler_rule; @@ -11,7 +13,6 @@ static MARKDOWN_PARSER: Lazy = Lazy::new(|| { markdown_it::plugins::extra::add(&mut parser); spoiler_rule::add(&mut parser); link_rule::add(&mut parser); - image_rule::add(&mut parser); parser }); @@ -33,6 +34,47 @@ pub fn markdown_to_html(text: &str) -> String { MARKDOWN_PARSER.parse(text).xrender() } +/// Rewrites all links to remote domains in markdown, so they go through `/api/v3/image_proxy`. +pub fn markdown_rewrite_image_links(mut src: String) -> String { + let ast = MARKDOWN_PARSER.parse(&src); + let mut links = vec![]; + + // Walk the syntax tree to find positions of image links + ast.walk(|node, _depth| { + if let Some(image) = node.cast::() { + let node_offsets = node.srcmap.expect("srcmap is none").get_byte_offsets(); + let start_offset = node_offsets.1 - image.url.len() - 1; + let end_offset = node_offsets.1 - 1; + + links.push((start_offset, end_offset)); + } + }); + + // Go through the collected links + while let Some((start, end)) = links.pop() { + let url = &src[start..end]; + match Url::parse(url) { + Ok(parsed) => { + // If link points to remote domain, replace with proxied link + if parsed.domain() != Some(&SETTINGS.hostname) { + let proxied = format!( + "{}/api/v3/image_proxy?url={}", + SETTINGS.get_protocol_and_hostname(), + encode(&url) + ); + src.replace_range(start..end, &proxied); + } + } + Err(_) => { + // If its not a valid url, replace with empty text + src.replace_range(start..end, ""); + } + } + } + + src +} + #[cfg(test)] mod tests { #![allow(clippy::unwrap_used)] @@ -82,7 +124,7 @@ mod tests { ( "images", "![My linked image](https://example.com/image.png \"image alt text\")", - "

\"My

\n" + "

\"My

\n" ), // Local images without proxy ( @@ -114,6 +156,58 @@ mod tests { }); } + #[test] + fn test_markdown_proxy_images() { + let tests: Vec<_> = + vec![ + ( + "remote image proxied", + "![link](http://example.com/image.jpg)", + "![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)", + ), + ( + "local image unproxied", + "![link](http://lemmy-alpha/image.jpg)", + "![link](http://lemmy-alpha/image.jpg)", + ), + ( + "multiple image links", + "![link](http://example.com/image1.jpg) ![link](http://example.com/image2.jpg)", + "![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage1.jpg) ![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage2.jpg)", + ), + ( + "empty link handled", + "![image]()", + "![image]()" + ), + ( + "empty label handled", + "![](http://example.com/image.jpg)", + "![](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)" + ), + ( + "invalid image link removed", + "![image](http-not-a-link)", + "![image]()" + ), + ( + "label with nested markdown handled", + "![a *b* c](http://example.com/image.jpg)", + "![a *b* c](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)" + ) + ]; + + tests.iter().for_each(|&(msg, input, expected)| { + let result = markdown_rewrite_image_links(input.to_string()); + + assert_eq!( + result, expected, + "Testing {}, with original input '{}'", + msg, input + ); + }); + } + #[test] fn test_sanitize_html() { let sanitized = sanitize_html(" hello &\"'");