2023-11-06 21:07:04 +00:00
use crate ::util ::{
get_activity_cached ,
get_actor_cached ,
get_latest_activity_id ,
2024-05-29 21:10:25 +00:00
FederationQueueStateWithDomain ,
2023-11-06 21:07:04 +00:00
LEMMY_TEST_FAST_FEDERATION ,
WORK_FINISHED_RECHECK_DELAY ,
2023-09-09 16:25:03 +00:00
} ;
2024-01-05 14:42:46 +00:00
use activitypub_federation ::{
activity_sending ::SendActivityTask ,
config ::Data ,
protocol ::context ::WithContext ,
} ;
2023-09-09 16:25:03 +00:00
use anyhow ::{ Context , Result } ;
2024-01-19 14:40:12 +00:00
use chrono ::{ DateTime , Days , TimeZone , Utc } ;
2023-11-06 21:07:04 +00:00
use lemmy_api_common ::{ context ::LemmyContext , federate_retry_sleep_duration } ;
2024-01-05 14:42:46 +00:00
use lemmy_apub ::{ activity_lists ::SharedInboxActivities , FEDERATION_CONTEXT } ;
2023-09-09 16:25:03 +00:00
use lemmy_db_schema ::{
2023-11-06 21:07:04 +00:00
newtypes ::{ ActivityId , CommunityId , InstanceId } ,
source ::{
activity ::SentActivity ,
federation_queue_state ::FederationQueueState ,
2024-01-19 14:40:12 +00:00
instance ::{ Instance , InstanceForm } ,
2023-11-06 21:07:04 +00:00
site ::Site ,
} ,
2024-05-27 13:34:58 +00:00
utils ::naive_now ,
2023-09-09 16:25:03 +00:00
} ;
use lemmy_db_views_actor ::structs ::CommunityFollowerView ;
use once_cell ::sync ::Lazy ;
use reqwest ::Url ;
use std ::{
collections ::{ HashMap , HashSet } ,
2024-01-19 14:40:12 +00:00
ops ::{ Add , Deref } ,
2023-09-09 16:25:03 +00:00
time ::Duration ,
} ;
use tokio ::{ sync ::mpsc ::UnboundedSender , time ::sleep } ;
use tokio_util ::sync ::CancellationToken ;
2024-05-21 18:47:06 +00:00
use tracing ::{ debug , info , trace , warn } ;
2023-09-13 11:20:09 +00:00
2024-05-23 12:46:26 +00:00
/// Check whether to save state to db every n sends if there's no failures (during failures state is
2024-05-23 18:05:35 +00:00
/// saved after every attempt). This determines the batch size for loop_batch. After a batch ends
/// and SAVE_STATE_EVERY_TIME has passed, the federation_queue_state is updated in the DB.
2023-09-09 16:25:03 +00:00
static CHECK_SAVE_STATE_EVERY_IT : i64 = 100 ;
2024-05-23 12:46:26 +00:00
/// Save state to db after this time has passed since the last state (so if the server crashes or is
/// SIGKILLed, less than X seconds of activities are resent)
2023-09-09 16:25:03 +00:00
static SAVE_STATE_EVERY_TIME : Duration = Duration ::from_secs ( 60 ) ;
2023-09-13 11:20:09 +00:00
/// interval with which new additions to community_followers are queried.
///
2024-05-23 12:46:26 +00:00
/// The first time some user on an instance follows a specific remote community (or, more precisely:
/// the first time a (followed_community_id, follower_inbox_url) tuple appears), this delay limits
/// the maximum time until the follow actually results in activities from that community id being
/// sent to that inbox url. This delay currently needs to not be too small because the DB load is
/// currently fairly high because of the current structure of storing inboxes for every person, not
/// having a separate list of shared_inboxes, and the architecture of having every instance queue be
/// fully separate. (see https://github.com/LemmyNet/lemmy/issues/3958)
2024-03-18 09:36:49 +00:00
static FOLLOW_ADDITIONS_RECHECK_DELAY : Lazy < chrono ::TimeDelta > = Lazy ::new ( | | {
2023-09-13 11:20:09 +00:00
if * LEMMY_TEST_FAST_FEDERATION {
2024-03-18 09:36:49 +00:00
chrono ::TimeDelta ::try_seconds ( 1 ) . expect ( " TimeDelta out of bounds " )
2023-09-13 11:20:09 +00:00
} else {
2024-03-18 09:36:49 +00:00
chrono ::TimeDelta ::try_minutes ( 2 ) . expect ( " TimeDelta out of bounds " )
2023-09-13 11:20:09 +00:00
}
} ) ;
2024-05-23 12:46:26 +00:00
/// The same as FOLLOW_ADDITIONS_RECHECK_DELAY, but triggering when the last person on an instance
/// unfollows a specific remote community. This is expected to happen pretty rarely and updating it
/// in a timely manner is not too important.
2024-03-18 09:36:49 +00:00
static FOLLOW_REMOVALS_RECHECK_DELAY : Lazy < chrono ::TimeDelta > =
Lazy ::new ( | | chrono ::TimeDelta ::try_hours ( 1 ) . expect ( " TimeDelta out of bounds " ) ) ;
2023-09-09 16:25:03 +00:00
pub ( crate ) struct InstanceWorker {
instance : Instance ,
// load site lazily because if an instance is first seen due to being on allowlist,
// the corresponding row in `site` may not exist yet since that is only added once
// `fetch_instance_actor_for_object` is called.
// (this should be unlikely to be relevant outside of the federation tests)
site_loaded : bool ,
site : Option < Site > ,
followed_communities : HashMap < CommunityId , HashSet < Url > > ,
stop : CancellationToken ,
context : Data < LemmyContext > ,
2024-05-29 21:10:25 +00:00
stats_sender : UnboundedSender < FederationQueueStateWithDomain > ,
2023-09-09 16:25:03 +00:00
last_full_communities_fetch : DateTime < Utc > ,
last_incremental_communities_fetch : DateTime < Utc > ,
state : FederationQueueState ,
last_state_insert : DateTime < Utc > ,
}
impl InstanceWorker {
pub ( crate ) async fn init_and_loop (
instance : Instance ,
context : Data < LemmyContext > ,
stop : CancellationToken ,
2024-05-29 21:10:25 +00:00
stats_sender : UnboundedSender < FederationQueueStateWithDomain > ,
2023-09-09 16:25:03 +00:00
) -> Result < ( ) , anyhow ::Error > {
2024-05-27 13:34:58 +00:00
let mut pool = context . pool ( ) ;
let state = FederationQueueState ::load ( & mut pool , instance . id ) . await ? ;
2023-09-09 16:25:03 +00:00
let mut worker = InstanceWorker {
instance ,
site_loaded : false ,
site : None ,
followed_communities : HashMap ::new ( ) ,
stop ,
context ,
stats_sender ,
last_full_communities_fetch : Utc . timestamp_nanos ( 0 ) ,
last_incremental_communities_fetch : Utc . timestamp_nanos ( 0 ) ,
state ,
last_state_insert : Utc . timestamp_nanos ( 0 ) ,
} ;
2024-05-27 13:34:58 +00:00
worker . loop_until_stopped ( ) . await
2023-09-09 16:25:03 +00:00
}
/// loop fetch new activities from db and send them to the inboxes of the given instances
2024-05-23 12:46:26 +00:00
/// this worker only returns if (a) there is an internal error or (b) the cancellation token is
/// cancelled (graceful exit)
2024-05-27 13:34:58 +00:00
pub ( crate ) async fn loop_until_stopped ( & mut self ) -> Result < ( ) , anyhow ::Error > {
2024-05-21 18:47:06 +00:00
debug! ( " Starting federation worker for {} " , self . instance . domain ) ;
2023-09-09 16:25:03 +00:00
let save_state_every = chrono ::Duration ::from_std ( SAVE_STATE_EVERY_TIME ) . expect ( " not negative " ) ;
2024-05-27 13:34:58 +00:00
self . update_communities ( ) . await ? ;
2023-09-09 16:25:03 +00:00
self . initial_fail_sleep ( ) . await ? ;
while ! self . stop . is_cancelled ( ) {
2024-05-27 13:34:58 +00:00
self . loop_batch ( ) . await ? ;
2023-09-09 16:25:03 +00:00
if self . stop . is_cancelled ( ) {
break ;
}
if ( Utc ::now ( ) - self . last_state_insert ) > save_state_every {
2024-05-27 13:34:58 +00:00
self . save_and_send_state ( ) . await ? ;
2023-09-09 16:25:03 +00:00
}
2024-05-27 13:34:58 +00:00
self . update_communities ( ) . await ? ;
2023-09-09 16:25:03 +00:00
}
// final update of state in db
2024-05-27 13:34:58 +00:00
self . save_and_send_state ( ) . await ? ;
2023-09-09 16:25:03 +00:00
Ok ( ( ) )
}
async fn initial_fail_sleep ( & mut self ) -> Result < ( ) > {
// before starting queue, sleep remaining duration if last request failed
if self . state . fail_count > 0 {
2023-11-06 21:07:04 +00:00
let last_retry = self
. state
. last_retry
. context ( " impossible: if fail count set last retry also set " ) ? ;
let elapsed = ( Utc ::now ( ) - last_retry ) . to_std ( ) ? ;
let required = federate_retry_sleep_duration ( self . state . fail_count ) ;
2023-09-09 16:25:03 +00:00
if elapsed > = required {
return Ok ( ( ) ) ;
}
let remaining = required - elapsed ;
tokio ::select! {
( ) = sleep ( remaining ) = > { } ,
( ) = self . stop . cancelled ( ) = > { }
}
}
Ok ( ( ) )
}
2023-09-13 11:20:09 +00:00
/// send out a batch of CHECK_SAVE_STATE_EVERY_IT activities
2024-05-27 13:34:58 +00:00
async fn loop_batch ( & mut self ) -> Result < ( ) > {
let latest_id = get_latest_activity_id ( & mut self . context . pool ( ) ) . await ? ;
2023-11-06 21:07:04 +00:00
let mut id = if let Some ( id ) = self . state . last_successful_id {
id
} else {
2024-05-23 12:46:26 +00:00
// this is the initial creation (instance first seen) of the federation queue for this
2024-05-23 18:05:35 +00:00
// instance
// skip all past activities:
2023-11-06 21:07:04 +00:00
self . state . last_successful_id = Some ( latest_id ) ;
2023-09-09 16:25:03 +00:00
// save here to ensure it's not read as 0 again later if no activities have happened
2024-05-27 13:34:58 +00:00
self . save_and_send_state ( ) . await ? ;
2023-11-06 21:07:04 +00:00
latest_id
} ;
2024-01-04 18:28:26 +00:00
if id > = latest_id {
2024-06-14 12:15:12 +00:00
if id > latest_id {
tracing ::error! (
" {}: last successful id {} is higher than latest id {} in database (did the db get cleared?) " ,
self . instance . domain ,
id . 0 ,
latest_id . 0
) ;
}
2023-09-09 16:25:03 +00:00
// no more work to be done, wait before rechecking
tokio ::select! {
2023-09-13 11:20:09 +00:00
( ) = sleep ( * WORK_FINISHED_RECHECK_DELAY ) = > { } ,
2023-09-09 16:25:03 +00:00
( ) = self . stop . cancelled ( ) = > { }
}
return Ok ( ( ) ) ;
}
let mut processed_activities = 0 ;
while id < latest_id
& & processed_activities < CHECK_SAVE_STATE_EVERY_IT
& & ! self . stop . is_cancelled ( )
{
2023-11-06 21:07:04 +00:00
id = ActivityId ( id . 0 + 1 ) ;
2023-09-09 16:25:03 +00:00
processed_activities + = 1 ;
2024-05-27 13:34:58 +00:00
let Some ( ele ) = get_activity_cached ( & mut self . context . pool ( ) , id )
2023-09-09 16:25:03 +00:00
. await
. context ( " failed reading activity from db " ) ?
else {
2024-05-21 18:47:06 +00:00
debug! ( " {}: {:?} does not exist " , self . instance . domain , id ) ;
2023-11-06 21:07:04 +00:00
self . state . last_successful_id = Some ( id ) ;
2023-09-09 16:25:03 +00:00
continue ;
} ;
2024-05-27 13:34:58 +00:00
if let Err ( e ) = self . send_retry_loop ( & ele . 0 , & ele . 1 ) . await {
2024-05-21 18:47:06 +00:00
warn! (
2023-09-09 16:25:03 +00:00
" sending {} errored internally, skipping activity: {:?} " ,
2024-05-21 18:47:06 +00:00
ele . 0. ap_id , e
2023-09-09 16:25:03 +00:00
) ;
}
if self . stop . is_cancelled ( ) {
return Ok ( ( ) ) ;
}
// send success!
2023-11-06 21:07:04 +00:00
self . state . last_successful_id = Some ( id ) ;
self . state . last_successful_published_time = Some ( ele . 0. published ) ;
2023-09-09 16:25:03 +00:00
self . state . fail_count = 0 ;
}
Ok ( ( ) )
}
// this function will return successfully when (a) send succeeded or (b) worker cancelled
// and will return an error if an internal error occurred (send errors cause an infinite loop)
async fn send_retry_loop (
& mut self ,
activity : & SentActivity ,
object : & SharedInboxActivities ,
) -> Result < ( ) > {
let inbox_urls = self
2024-05-27 13:34:58 +00:00
. get_inbox_urls ( activity )
2023-09-09 16:25:03 +00:00
. await
. context ( " failed figuring out inbox urls " ) ? ;
if inbox_urls . is_empty ( ) {
2024-05-21 18:47:06 +00:00
trace! ( " {}: {:?} no inboxes " , self . instance . domain , activity . id ) ;
2023-11-06 21:07:04 +00:00
self . state . last_successful_id = Some ( activity . id ) ;
self . state . last_successful_published_time = Some ( activity . published ) ;
2023-09-09 16:25:03 +00:00
return Ok ( ( ) ) ;
}
let Some ( actor_apub_id ) = & activity . actor_apub_id else {
return Ok ( ( ) ) ; // activity was inserted before persistent queue was activated
} ;
2024-05-27 13:34:58 +00:00
let actor = get_actor_cached ( & mut self . context . pool ( ) , activity . actor_type , actor_apub_id )
2023-09-09 16:25:03 +00:00
. await
. context ( " failed getting actor instance (was it marked deleted / removed?) " ) ? ;
2024-01-05 14:42:46 +00:00
let object = WithContext ::new ( object . clone ( ) , FEDERATION_CONTEXT . deref ( ) . clone ( ) ) ;
2023-09-09 16:25:03 +00:00
let inbox_urls = inbox_urls . into_iter ( ) . collect ( ) ;
2023-11-21 09:26:12 +00:00
let requests =
2024-01-05 14:42:46 +00:00
SendActivityTask ::prepare ( & object , actor . as_ref ( ) , inbox_urls , & self . context ) . await ? ;
2023-09-09 16:25:03 +00:00
for task in requests {
// usually only one due to shared inbox
2024-05-21 18:47:06 +00:00
trace! ( " sending out {} " , task ) ;
2023-09-09 16:25:03 +00:00
while let Err ( e ) = task . sign_and_send ( & self . context ) . await {
self . state . fail_count + = 1 ;
2023-11-06 21:07:04 +00:00
self . state . last_retry = Some ( Utc ::now ( ) ) ;
let retry_delay : Duration = federate_retry_sleep_duration ( self . state . fail_count ) ;
2024-05-21 18:47:06 +00:00
info! (
2023-11-06 21:07:04 +00:00
" {}: retrying {:?} attempt {} with delay {retry_delay:.2?}. ({e}) " ,
2024-05-21 18:47:06 +00:00
self . instance . domain , activity . id , self . state . fail_count
2023-09-09 16:25:03 +00:00
) ;
2024-05-27 13:34:58 +00:00
self . save_and_send_state ( ) . await ? ;
2023-09-09 16:25:03 +00:00
tokio ::select! {
( ) = sleep ( retry_delay ) = > { } ,
( ) = self . stop . cancelled ( ) = > {
// save state to db and exit
return Ok ( ( ) ) ;
}
}
}
2024-01-19 14:40:12 +00:00
// Activity send successful, mark instance as alive if it hasn't been updated in a while.
let updated = self . instance . updated . unwrap_or ( self . instance . published ) ;
if updated . add ( Days ::new ( 1 ) ) < Utc ::now ( ) {
self . instance . updated = Some ( Utc ::now ( ) ) ;
let form = InstanceForm ::builder ( )
. domain ( self . instance . domain . clone ( ) )
. updated ( Some ( naive_now ( ) ) )
. build ( ) ;
2024-05-27 13:34:58 +00:00
Instance ::update ( & mut self . context . pool ( ) , self . instance . id , form ) . await ? ;
2024-01-19 14:40:12 +00:00
}
2023-09-09 16:25:03 +00:00
}
Ok ( ( ) )
}
/// get inbox urls of sending the given activity to the given instance
/// most often this will return 0 values (if instance doesn't care about the activity)
/// or 1 value (the shared inbox)
/// > 1 values only happens for non-lemmy software
2024-05-27 13:34:58 +00:00
async fn get_inbox_urls ( & mut self , activity : & SentActivity ) -> Result < HashSet < Url > > {
2023-09-09 16:25:03 +00:00
let mut inbox_urls : HashSet < Url > = HashSet ::new ( ) ;
if activity . send_all_instances {
if ! self . site_loaded {
2024-05-27 13:34:58 +00:00
self . site = Site ::read_from_instance_id ( & mut self . context . pool ( ) , self . instance . id ) . await ? ;
2023-09-09 16:25:03 +00:00
self . site_loaded = true ;
}
if let Some ( site ) = & self . site {
2024-05-23 12:46:26 +00:00
// Nutomic: Most non-lemmy software wont have a site row. That means it cant handle these
// activities. So handling it like this is fine.
2023-09-09 16:25:03 +00:00
inbox_urls . insert ( site . inbox_url . inner ( ) . clone ( ) ) ;
}
}
if let Some ( t ) = & activity . send_community_followers_of {
if let Some ( urls ) = self . followed_communities . get ( t ) {
2024-01-23 23:47:28 +00:00
inbox_urls . extend ( urls . iter ( ) . cloned ( ) ) ;
2023-09-09 16:25:03 +00:00
}
}
inbox_urls . extend (
activity
. send_inboxes
. iter ( )
. filter_map ( std ::option ::Option ::as_ref )
2023-09-12 19:54:49 +00:00
. filter ( | & u | ( u . domain ( ) = = Some ( & self . instance . domain ) ) )
. map ( | u | u . inner ( ) . clone ( ) ) ,
2023-09-09 16:25:03 +00:00
) ;
Ok ( inbox_urls )
}
2024-05-27 13:34:58 +00:00
async fn update_communities ( & mut self ) -> Result < ( ) > {
2023-09-09 16:25:03 +00:00
if ( Utc ::now ( ) - self . last_full_communities_fetch ) > * FOLLOW_REMOVALS_RECHECK_DELAY {
// process removals every hour
( self . followed_communities , self . last_full_communities_fetch ) = self
2024-05-27 13:34:58 +00:00
. get_communities ( self . instance . id , Utc . timestamp_nanos ( 0 ) )
2023-09-09 16:25:03 +00:00
. await ? ;
self . last_incremental_communities_fetch = self . last_full_communities_fetch ;
}
if ( Utc ::now ( ) - self . last_incremental_communities_fetch ) > * FOLLOW_ADDITIONS_RECHECK_DELAY {
// process additions every minute
let ( news , time ) = self
2024-05-27 13:34:58 +00:00
. get_communities ( self . instance . id , self . last_incremental_communities_fetch )
2023-09-09 16:25:03 +00:00
. await ? ;
self . followed_communities . extend ( news ) ;
self . last_incremental_communities_fetch = time ;
}
Ok ( ( ) )
}
2024-05-23 12:46:26 +00:00
/// get a list of local communities with the remote inboxes on the given instance that cares about
/// them
2023-09-09 16:25:03 +00:00
async fn get_communities (
& mut self ,
instance_id : InstanceId ,
last_fetch : DateTime < Utc > ,
) -> Result < ( HashMap < CommunityId , HashSet < Url > > , DateTime < Utc > ) > {
2024-03-18 09:36:49 +00:00
let new_last_fetch =
2024-05-23 12:46:26 +00:00
Utc ::now ( ) - chrono ::TimeDelta ::try_seconds ( 10 ) . expect ( " TimeDelta out of bounds " ) ; // update to time before fetch to ensure overlap. subtract 10s to ensure overlap even if
// published date is not exact
2023-09-09 16:25:03 +00:00
Ok ( (
2024-05-27 13:34:58 +00:00
CommunityFollowerView ::get_instance_followed_community_inboxes (
& mut self . context . pool ( ) ,
instance_id ,
last_fetch ,
)
. await ?
. into_iter ( )
. fold ( HashMap ::new ( ) , | mut map , ( c , u ) | {
map . entry ( c ) . or_default ( ) . insert ( u . into ( ) ) ;
map
} ) ,
2023-09-09 16:25:03 +00:00
new_last_fetch ,
) )
}
2024-05-27 13:34:58 +00:00
async fn save_and_send_state ( & mut self ) -> Result < ( ) > {
2023-09-09 16:25:03 +00:00
self . last_state_insert = Utc ::now ( ) ;
2024-05-27 13:34:58 +00:00
FederationQueueState ::upsert ( & mut self . context . pool ( ) , & self . state ) . await ? ;
2024-05-29 21:10:25 +00:00
self . stats_sender . send ( FederationQueueStateWithDomain {
state : self . state . clone ( ) ,
domain : self . instance . domain . clone ( ) ,
} ) ? ;
2023-09-09 16:25:03 +00:00
Ok ( ( ) )
}
}