2021-11-05 00:24:10 +00:00
|
|
|
use crate::{traits::ApubObject, APUB_JSON_CONTENT_TYPE};
|
|
|
|
use activitystreams::chrono::{Duration as ChronoDuration, NaiveDateTime, Utc};
|
2021-09-25 15:44:52 +00:00
|
|
|
use anyhow::anyhow;
|
2021-10-18 21:36:44 +00:00
|
|
|
use diesel::NotFound;
|
2021-10-27 16:03:07 +00:00
|
|
|
use lemmy_utils::{
|
|
|
|
request::{build_user_agent, retry},
|
|
|
|
settings::structs::Settings,
|
|
|
|
LemmyError,
|
|
|
|
};
|
|
|
|
use log::info;
|
|
|
|
use reqwest::{Client, StatusCode};
|
2021-09-25 15:44:52 +00:00
|
|
|
use serde::{Deserialize, Serialize};
|
|
|
|
use std::{
|
|
|
|
fmt::{Debug, Display, Formatter},
|
|
|
|
marker::PhantomData,
|
|
|
|
time::Duration,
|
|
|
|
};
|
|
|
|
use url::Url;
|
|
|
|
|
|
|
|
/// Maximum number of HTTP requests allowed to handle a single incoming activity (or a single object
|
|
|
|
/// fetch through the search). This should be configurable.
|
|
|
|
static REQUEST_LIMIT: i32 = 25;
|
|
|
|
|
2021-10-27 16:03:07 +00:00
|
|
|
lazy_static! {
|
|
|
|
static ref CLIENT: Client = Client::builder()
|
|
|
|
.user_agent(build_user_agent(&Settings::get()))
|
|
|
|
.build()
|
|
|
|
.unwrap();
|
|
|
|
}
|
|
|
|
|
2021-11-06 13:25:34 +00:00
|
|
|
/// We store Url on the heap because it is quite large (88 bytes).
|
2021-09-25 15:44:52 +00:00
|
|
|
#[derive(Clone, PartialEq, Serialize, Deserialize, Debug)]
|
2021-10-21 17:25:35 +00:00
|
|
|
#[serde(transparent)]
|
2021-11-06 13:25:34 +00:00
|
|
|
pub struct ObjectId<Kind>(Box<Url>, #[serde(skip)] PhantomData<Kind>)
|
2021-09-25 15:44:52 +00:00
|
|
|
where
|
2021-10-27 16:03:07 +00:00
|
|
|
Kind: ApubObject + Send + 'static,
|
|
|
|
for<'de2> <Kind as ApubObject>::ApubType: serde::Deserialize<'de2>;
|
2021-09-25 15:44:52 +00:00
|
|
|
|
|
|
|
impl<Kind> ObjectId<Kind>
|
|
|
|
where
|
2021-10-27 16:03:07 +00:00
|
|
|
Kind: ApubObject + Send + 'static,
|
|
|
|
for<'de2> <Kind as ApubObject>::ApubType: serde::Deserialize<'de2>,
|
2021-09-25 15:44:52 +00:00
|
|
|
{
|
|
|
|
pub fn new<T>(url: T) -> Self
|
|
|
|
where
|
|
|
|
T: Into<Url>,
|
|
|
|
{
|
2021-11-06 13:25:34 +00:00
|
|
|
ObjectId(Box::new(url.into()), PhantomData::<Kind>)
|
2021-09-25 15:44:52 +00:00
|
|
|
}
|
|
|
|
|
|
|
|
pub fn inner(&self) -> &Url {
|
|
|
|
&self.0
|
|
|
|
}
|
|
|
|
|
|
|
|
/// Fetches an activitypub object, either from local database (if possible), or over http.
|
2021-10-06 20:20:05 +00:00
|
|
|
pub async fn dereference(
|
2021-09-25 15:44:52 +00:00
|
|
|
&self,
|
2021-10-27 16:03:07 +00:00
|
|
|
data: &<Kind as ApubObject>::DataType,
|
2021-09-25 15:44:52 +00:00
|
|
|
request_counter: &mut i32,
|
|
|
|
) -> Result<Kind, LemmyError> {
|
2021-10-27 16:03:07 +00:00
|
|
|
let db_object = self.dereference_from_db(data).await?;
|
2021-09-25 15:44:52 +00:00
|
|
|
|
|
|
|
// if its a local object, only fetch it from the database and not over http
|
|
|
|
if self.0.domain() == Some(&Settings::get().get_hostname_without_port()?) {
|
|
|
|
return match db_object {
|
|
|
|
None => Err(NotFound {}.into()),
|
|
|
|
Some(o) => Ok(o),
|
|
|
|
};
|
|
|
|
}
|
|
|
|
|
2021-10-27 16:03:07 +00:00
|
|
|
// object found in database
|
2021-09-25 15:44:52 +00:00
|
|
|
if let Some(object) = db_object {
|
2021-10-27 16:03:07 +00:00
|
|
|
// object is old and should be refetched
|
2021-09-25 15:44:52 +00:00
|
|
|
if let Some(last_refreshed_at) = object.last_refreshed_at() {
|
2021-10-27 16:03:07 +00:00
|
|
|
if should_refetch_object(last_refreshed_at) {
|
2021-09-25 15:44:52 +00:00
|
|
|
return self
|
2021-10-27 16:03:07 +00:00
|
|
|
.dereference_from_http(data, request_counter, Some(object))
|
2021-09-25 15:44:52 +00:00
|
|
|
.await;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
Ok(object)
|
2021-10-27 16:03:07 +00:00
|
|
|
}
|
|
|
|
// object not found, need to fetch over http
|
|
|
|
else {
|
2021-09-25 15:44:52 +00:00
|
|
|
self
|
2021-10-27 16:03:07 +00:00
|
|
|
.dereference_from_http(data, request_counter, None)
|
2021-09-25 15:44:52 +00:00
|
|
|
.await
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-10-06 20:20:05 +00:00
|
|
|
/// Fetch an object from the local db. Instead of falling back to http, this throws an error if
|
|
|
|
/// the object is not found in the database.
|
2021-10-27 16:03:07 +00:00
|
|
|
pub async fn dereference_local(
|
|
|
|
&self,
|
|
|
|
data: &<Kind as ApubObject>::DataType,
|
|
|
|
) -> Result<Kind, LemmyError> {
|
|
|
|
let object = self.dereference_from_db(data).await?;
|
2021-10-06 20:20:05 +00:00
|
|
|
object.ok_or_else(|| anyhow!("object not found in database {}", self).into())
|
|
|
|
}
|
|
|
|
|
2021-09-25 15:44:52 +00:00
|
|
|
/// returning none means the object was not found in local db
|
2021-10-27 16:03:07 +00:00
|
|
|
async fn dereference_from_db(
|
|
|
|
&self,
|
|
|
|
data: &<Kind as ApubObject>::DataType,
|
|
|
|
) -> Result<Option<Kind>, LemmyError> {
|
2021-10-06 20:20:05 +00:00
|
|
|
let id = self.0.clone();
|
2021-11-06 13:25:34 +00:00
|
|
|
ApubObject::read_from_apub_id(*id, data).await
|
2021-09-25 15:44:52 +00:00
|
|
|
}
|
|
|
|
|
2021-10-06 20:20:05 +00:00
|
|
|
async fn dereference_from_http(
|
2021-09-25 15:44:52 +00:00
|
|
|
&self,
|
2021-10-27 16:03:07 +00:00
|
|
|
data: &<Kind as ApubObject>::DataType,
|
2021-09-25 15:44:52 +00:00
|
|
|
request_counter: &mut i32,
|
|
|
|
db_object: Option<Kind>,
|
|
|
|
) -> Result<Kind, LemmyError> {
|
|
|
|
// dont fetch local objects this way
|
|
|
|
debug_assert!(self.0.domain() != Some(&Settings::get().hostname));
|
2021-10-27 16:03:07 +00:00
|
|
|
info!("Fetching remote object {}", self.to_string());
|
2021-09-25 15:44:52 +00:00
|
|
|
|
|
|
|
*request_counter += 1;
|
|
|
|
if *request_counter > REQUEST_LIMIT {
|
|
|
|
return Err(LemmyError::from(anyhow!("Request limit reached")));
|
|
|
|
}
|
|
|
|
|
|
|
|
let res = retry(|| {
|
2021-10-27 16:03:07 +00:00
|
|
|
CLIENT
|
2021-09-25 15:44:52 +00:00
|
|
|
.get(self.0.as_str())
|
|
|
|
.header("Accept", APUB_JSON_CONTENT_TYPE)
|
|
|
|
.timeout(Duration::from_secs(60))
|
|
|
|
.send()
|
|
|
|
})
|
|
|
|
.await?;
|
|
|
|
|
|
|
|
if res.status() == StatusCode::GONE {
|
|
|
|
if let Some(db_object) = db_object {
|
2021-10-27 16:03:07 +00:00
|
|
|
db_object.delete(data).await?;
|
2021-09-25 15:44:52 +00:00
|
|
|
}
|
|
|
|
return Err(anyhow!("Fetched remote object {} which was deleted", self).into());
|
|
|
|
}
|
|
|
|
|
|
|
|
let res2: Kind::ApubType = res.json().await?;
|
|
|
|
|
2021-11-06 12:37:55 +00:00
|
|
|
Ok(Kind::from_apub(res2, data, self.inner(), request_counter).await?)
|
2021-09-25 15:44:52 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-11-05 00:24:10 +00:00
|
|
|
static ACTOR_REFETCH_INTERVAL_SECONDS: i64 = 24 * 60 * 60;
|
|
|
|
static ACTOR_REFETCH_INTERVAL_SECONDS_DEBUG: i64 = 10;
|
|
|
|
|
|
|
|
/// Determines when a remote actor should be refetched from its instance. In release builds, this is
|
|
|
|
/// `ACTOR_REFETCH_INTERVAL_SECONDS` after the last refetch, in debug builds
|
|
|
|
/// `ACTOR_REFETCH_INTERVAL_SECONDS_DEBUG`.
|
|
|
|
///
|
|
|
|
/// TODO it won't pick up new avatars, summaries etc until a day after.
|
|
|
|
/// Actors need an "update" activity pushed to other servers to fix this.
|
|
|
|
fn should_refetch_object(last_refreshed: NaiveDateTime) -> bool {
|
|
|
|
let update_interval = if cfg!(debug_assertions) {
|
|
|
|
// avoid infinite loop when fetching community outbox
|
|
|
|
ChronoDuration::seconds(ACTOR_REFETCH_INTERVAL_SECONDS_DEBUG)
|
|
|
|
} else {
|
|
|
|
ChronoDuration::seconds(ACTOR_REFETCH_INTERVAL_SECONDS)
|
|
|
|
};
|
|
|
|
let refresh_limit = Utc::now().naive_utc() - update_interval;
|
|
|
|
last_refreshed.lt(&refresh_limit)
|
|
|
|
}
|
|
|
|
|
2021-09-25 15:44:52 +00:00
|
|
|
impl<Kind> Display for ObjectId<Kind>
|
|
|
|
where
|
2021-10-27 16:03:07 +00:00
|
|
|
Kind: ApubObject + Send + 'static,
|
|
|
|
for<'de2> <Kind as ApubObject>::ApubType: serde::Deserialize<'de2>,
|
2021-09-25 15:44:52 +00:00
|
|
|
{
|
2021-11-05 00:24:10 +00:00
|
|
|
#[allow(clippy::to_string_in_display)]
|
2021-09-25 15:44:52 +00:00
|
|
|
fn fmt(&self, f: &mut Formatter<'_>) -> std::fmt::Result {
|
2021-11-05 00:24:10 +00:00
|
|
|
// Use to_string here because Url.display is not useful for us
|
2021-09-25 15:44:52 +00:00
|
|
|
write!(f, "{}", self.0.to_string())
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
impl<Kind> From<ObjectId<Kind>> for Url
|
|
|
|
where
|
2021-10-27 16:03:07 +00:00
|
|
|
Kind: ApubObject + Send + 'static,
|
|
|
|
for<'de2> <Kind as ApubObject>::ApubType: serde::Deserialize<'de2>,
|
2021-09-25 15:44:52 +00:00
|
|
|
{
|
|
|
|
fn from(id: ObjectId<Kind>) -> Self {
|
2021-11-06 13:25:34 +00:00
|
|
|
*id.0
|
2021-09-25 15:44:52 +00:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2021-11-05 00:24:10 +00:00
|
|
|
#[cfg(test)]
|
|
|
|
mod tests {
|
|
|
|
use super::*;
|
|
|
|
use crate::object_id::should_refetch_object;
|
|
|
|
|
|
|
|
#[test]
|
|
|
|
fn test_should_refetch_object() {
|
|
|
|
let one_second_ago = Utc::now().naive_utc() - ChronoDuration::seconds(1);
|
|
|
|
assert!(!should_refetch_object(one_second_ago));
|
|
|
|
|
|
|
|
let two_days_ago = Utc::now().naive_utc() - ChronoDuration::days(2);
|
|
|
|
assert!(should_refetch_object(two_days_ago));
|
2021-09-25 15:44:52 +00:00
|
|
|
}
|
|
|
|
}
|