rewrite markdown image links working

cleanup-request-rs
Felix Ableitner 2023-10-23 16:36:05 +02:00
parent 650e3a71d7
commit fc66badd85
2 changed files with 98 additions and 57 deletions

View File

@ -1,53 +0,0 @@
use crate::settings::SETTINGS;
use markdown_it::{generics::inline::full_link, MarkdownIt, Node, NodeValue, Renderer};
use url::Url;
use urlencoding::encode;
/// Renders markdown images. Copied directly from markdown-it source. It rewrites remote image URLs
/// to go through local proxy.
///
/// https://github.com/markdown-it-rust/markdown-it/blob/master/src/plugins/cmark/inline/image.rs
#[derive(Debug)]
pub struct Image {
pub url: String,
pub title: Option<String>,
}
impl NodeValue for Image {
fn render(&self, node: &Node, fmt: &mut dyn Renderer) {
let mut attrs = node.attrs.clone();
// TODO: error handling
let url = Url::parse(&self.url).unwrap();
// Rewrite remote links to go through proxy
let url = if url.domain().unwrap() != SETTINGS.hostname {
let url = encode(&self.url);
format!(
"{}/api/v3/image_proxy?url={}",
SETTINGS.get_protocol_and_hostname(),
url
)
} else {
self.url.clone()
};
attrs.push(("src", url));
attrs.push(("alt", node.collect_text()));
if let Some(title) = &self.title {
attrs.push(("title", title.clone()));
}
fmt.self_close("img", &attrs);
}
}
pub fn add(md: &mut MarkdownIt) {
full_link::add_prefix::<'!', true>(md, |href, title| {
Node::new(Image {
url: href.unwrap_or_default(),
title,
})
});
}

View File

@ -1,7 +1,9 @@
use markdown_it::MarkdownIt; use crate::settings::SETTINGS;
use markdown_it::{plugins::cmark::inline::image::Image, MarkdownIt};
use once_cell::sync::Lazy; use once_cell::sync::Lazy;
use url::Url;
use urlencoding::encode;
pub mod image_rule;
mod link_rule; mod link_rule;
mod spoiler_rule; mod spoiler_rule;
@ -11,7 +13,6 @@ static MARKDOWN_PARSER: Lazy<MarkdownIt> = Lazy::new(|| {
markdown_it::plugins::extra::add(&mut parser); markdown_it::plugins::extra::add(&mut parser);
spoiler_rule::add(&mut parser); spoiler_rule::add(&mut parser);
link_rule::add(&mut parser); link_rule::add(&mut parser);
image_rule::add(&mut parser);
parser parser
}); });
@ -33,6 +34,47 @@ pub fn markdown_to_html(text: &str) -> String {
MARKDOWN_PARSER.parse(text).xrender() MARKDOWN_PARSER.parse(text).xrender()
} }
/// Rewrites all links to remote domains in markdown, so they go through `/api/v3/image_proxy`.
pub fn markdown_rewrite_image_links(mut src: String) -> String {
let ast = MARKDOWN_PARSER.parse(&src);
let mut links = vec![];
// Walk the syntax tree to find positions of image links
ast.walk(|node, _depth| {
if let Some(image) = node.cast::<Image>() {
let node_offsets = node.srcmap.expect("srcmap is none").get_byte_offsets();
let start_offset = node_offsets.1 - image.url.len() - 1;
let end_offset = node_offsets.1 - 1;
links.push((start_offset, end_offset));
}
});
// Go through the collected links
while let Some((start, end)) = links.pop() {
let url = &src[start..end];
match Url::parse(url) {
Ok(parsed) => {
// If link points to remote domain, replace with proxied link
if parsed.domain() != Some(&SETTINGS.hostname) {
let proxied = format!(
"{}/api/v3/image_proxy?url={}",
SETTINGS.get_protocol_and_hostname(),
encode(&url)
);
src.replace_range(start..end, &proxied);
}
}
Err(_) => {
// If its not a valid url, replace with empty text
src.replace_range(start..end, "");
}
}
}
src
}
#[cfg(test)] #[cfg(test)]
mod tests { mod tests {
#![allow(clippy::unwrap_used)] #![allow(clippy::unwrap_used)]
@ -82,7 +124,7 @@ mod tests {
( (
"images", "images",
"![My linked image](https://example.com/image.png \"image alt text\")", "![My linked image](https://example.com/image.png \"image alt text\")",
"<p><img src=\"https://lemmy-alpha/api/v3/image_proxy?url=https%3A%2F%2Fexample.com%2Fimage.png\" alt=\"My linked image\" title=\"image alt text\" /></p>\n" "<p><img src=\"https://example.com/image.png\" alt=\"My linked image\" title=\"image alt text\" /></p>\n"
), ),
// Local images without proxy // Local images without proxy
( (
@ -114,6 +156,58 @@ mod tests {
}); });
} }
#[test]
fn test_markdown_proxy_images() {
let tests: Vec<_> =
vec![
(
"remote image proxied",
"![link](http://example.com/image.jpg)",
"![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)",
),
(
"local image unproxied",
"![link](http://lemmy-alpha/image.jpg)",
"![link](http://lemmy-alpha/image.jpg)",
),
(
"multiple image links",
"![link](http://example.com/image1.jpg) ![link](http://example.com/image2.jpg)",
"![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage1.jpg) ![link](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage2.jpg)",
),
(
"empty link handled",
"![image]()",
"![image]()"
),
(
"empty label handled",
"![](http://example.com/image.jpg)",
"![](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)"
),
(
"invalid image link removed",
"![image](http-not-a-link)",
"![image]()"
),
(
"label with nested markdown handled",
"![a *b* c](http://example.com/image.jpg)",
"![a *b* c](https://lemmy-alpha/api/v3/image_proxy?url=http%3A%2F%2Fexample.com%2Fimage.jpg)"
)
];
tests.iter().for_each(|&(msg, input, expected)| {
let result = markdown_rewrite_image_links(input.to_string());
assert_eq!(
result, expected,
"Testing {}, with original input '{}'",
msg, input
);
});
}
#[test] #[test]
fn test_sanitize_html() { fn test_sanitize_html() {
let sanitized = sanitize_html("<script>alert('xss');</script> hello &\"'"); let sanitized = sanitize_html("<script>alert('xss');</script> hello &\"'");