diff --git a/src/scheduled_tasks.rs b/src/scheduled_tasks.rs index e0edbda03..87a2cdfb8 100644 --- a/src/scheduled_tasks.rs +++ b/src/scheduled_tasks.rs @@ -456,8 +456,6 @@ async fn update_banned_when_expired(pool: &mut DbPool<'_>) { /// https://github.com/jhass/nodeinfo/blob/main/PROTOCOL.md /// /// TODO: if instance has been dead for a long time, it should be checked less frequently -/// TODO This function is a bit of a nightmare with its embedded matches, but the only other way -/// would be to extract the fetches into functions which return the default_form on errors. async fn update_instance_software( pool: &mut DbPool<'_>, client: &ClientWithMiddleware, @@ -470,68 +468,7 @@ async fn update_instance_software( let instances = instance::table.get_results::(&mut conn).await?; for instance in instances { - // The `updated` column is used to check if instances are alive. If it is more than three - // days in the past, no outgoing activities will be sent to that instance. However - // not every Fediverse instance has a valid Nodeinfo endpoint (its not required for - // Activitypub). That's why we always need to mark instances as updated if they are - // alive. - let mut instance_form = InstanceForm::builder() - .domain(instance.domain.clone()) - .updated(Some(naive_now())) - .build(); - - // First, fetch their /.well-known/nodeinfo, then extract the correct nodeinfo link from it - let well_known_url = format!("https://{}/.well-known/nodeinfo", instance.domain); - - let form = match client.get(&well_known_url).send().await { - Ok(res) if res.status().is_client_error() => { - // Instance doesn't have well-known but sent a response, consider it alive - Some(instance_form) - } - Ok(res) => match res.json::().await { - Ok(well_known) => { - // Find the first link where the rel contains the allowed rels above - match well_known.links.into_iter().find(|links| { - links - .rel - .as_str() - .starts_with("http://nodeinfo.diaspora.software/ns/schema/2.") - }) { - Some(well_known_link) => { - let node_info_url = well_known_link.href; - - // Fetch the node_info from the well known href - match client.get(node_info_url).send().await { - Ok(node_info_res) => match node_info_res.json::().await { - Ok(node_info) => { - // Instance sent valid nodeinfo, write it to db - // Set the instance form fields. - if let Some(software) = node_info.software.as_ref() { - instance_form.software.clone_from(&software.name); - instance_form.version.clone_from(&software.version); - } - Some(instance_form) - } - Err(_) => Some(instance_form), - }, - Err(_) => Some(instance_form), - } - } - // If none is found, use the default form above - None => Some(instance_form), - } - } - Err(_) => { - // No valid nodeinfo but valid HTTP response, consider instance alive - Some(instance_form) - } - }, - Err(_) => { - // dead instance, do nothing - None - } - }; - if let Some(form) = form { + if let Some(form) = build_update_instance_form(&instance.domain, client).await { Instance::update(pool, instance.id, form).await?; } } @@ -544,28 +481,114 @@ async fn update_instance_software( Ok(()) } +/// This builds an instance update form, for a given domain. +/// If the instance sends a response, but doesn't have a well-known or nodeinfo, +/// Then return a default form with only the updated field. +/// +/// TODO This function is a bit of a nightmare with its embedded matches, but the only other way +/// would be to extract the fetches into functions which return the default_form on errors. +async fn build_update_instance_form( + domain: &str, + client: &ClientWithMiddleware, +) -> Option { + // The `updated` column is used to check if instances are alive. If it is more than three + // days in the past, no outgoing activities will be sent to that instance. However + // not every Fediverse instance has a valid Nodeinfo endpoint (its not required for + // Activitypub). That's why we always need to mark instances as updated if they are + // alive. + let mut instance_form = InstanceForm::builder() + .domain(domain.to_string()) + .updated(Some(naive_now())) + .build(); + + // First, fetch their /.well-known/nodeinfo, then extract the correct nodeinfo link from it + let well_known_url = format!("https://{}/.well-known/nodeinfo", domain); + + match client.get(&well_known_url).send().await { + Ok(res) if res.status().is_client_error() => { + // Instance doesn't have well-known but sent a response, consider it alive + Some(instance_form) + } + Ok(res) => match res.json::().await { + Ok(well_known) => { + // Find the first link where the rel contains the allowed rels above + match well_known.links.into_iter().find(|links| { + links + .rel + .as_str() + .starts_with("http://nodeinfo.diaspora.software/ns/schema/2.") + }) { + Some(well_known_link) => { + let node_info_url = well_known_link.href; + + // Fetch the node_info from the well known href + match client.get(node_info_url).send().await { + Ok(node_info_res) => match node_info_res.json::().await { + Ok(node_info) => { + // Instance sent valid nodeinfo, write it to db + // Set the instance form fields. + if let Some(software) = node_info.software.as_ref() { + instance_form.software.clone_from(&software.name); + instance_form.version.clone_from(&software.version); + } + Some(instance_form) + } + Err(_) => Some(instance_form), + }, + Err(_) => Some(instance_form), + } + } + // If none is found, use the default form above + None => Some(instance_form), + } + } + Err(_) => { + // No valid nodeinfo but valid HTTP response, consider instance alive + Some(instance_form) + } + }, + Err(_) => { + // dead instance, do nothing + None + } + } +} #[cfg(test)] -#[allow(clippy::unwrap_used)] #[allow(clippy::indexing_slicing)] mod tests { - use lemmy_routes::nodeinfo::NodeInfo; + use crate::scheduled_tasks::build_update_instance_form; + use lemmy_api_common::request::client_builder; + use lemmy_utils::{error::LemmyResult, settings::structs::Settings, LemmyErrorType}; use pretty_assertions::assert_eq; - use reqwest::Client; + use reqwest_middleware::ClientBuilder; + use serial_test::serial; #[tokio::test] - #[ignore] - async fn test_nodeinfo() { - let client = Client::builder().build().unwrap(); - let lemmy_ml_nodeinfo = client - .get("https://lemmy.ml/nodeinfo/2.0.json") - .send() + #[serial] + async fn test_nodeinfo_voyager_lemmy_ml() -> LemmyResult<()> { + let client = ClientBuilder::new(client_builder(&Settings::default()).build()?).build(); + let form = build_update_instance_form("voyager.lemmy.ml", &client) .await - .unwrap() - .json::() - .await - .unwrap(); + .ok_or(LemmyErrorType::CouldntFindObject)?; + assert_eq!( + form.software.ok_or(LemmyErrorType::CouldntFindObject)?, + "lemmy" + ); + Ok(()) + } - assert_eq!(lemmy_ml_nodeinfo.software.unwrap().name.unwrap(), "lemmy"); + #[tokio::test] + #[serial] + async fn test_nodeinfo_mastodon_social() -> LemmyResult<()> { + let client = ClientBuilder::new(client_builder(&Settings::default()).build()?).build(); + let form = build_update_instance_form("mastodon.social", &client) + .await + .ok_or(LemmyErrorType::CouldntFindObject)?; + assert_eq!( + form.software.ok_or(LemmyErrorType::CouldntFindObject)?, + "mastodon" + ); + Ok(()) } }