ref: InstanceInfo (#32)

pull/36/head
Divided by Zer0 2023-09-24 18:15:52 +02:00 committed by GitHub
parent 62c0f62aa0
commit 309feafd36
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
17 changed files with 433 additions and 211 deletions

View File

@ -30,14 +30,17 @@ class Models:
'domain': fields.String(description="The instance domain", example="lemmy.dbzer0.com"),
'software': fields.String(description="The fediverse software running in this instance", example="lemmy"),
'claimed': fields.Integer(description="How many admins from this instance has claimed it."),
'open_registrations': fields.Boolean(description="The instance uptime pct. 100% and thousand of users is unlikely"),
'email_verify': fields.Boolean(description="The amount of local posts in that instance"),
'open_registrations': fields.Boolean(description="The instance uptime pct. 100% and thousand of users is unlikely."),
'email_verify': fields.Boolean(description="The amount of local posts in that instance."),
'approval_required': fields.Boolean(description="Whether user registration requires admin approval."),
'has_captcha': fields.Boolean(description="Whether user registration requires passing a captcha."),
'approvals': fields.Integer(description="The amount of endorsements this instance has given out"),
'endorsements': fields.Integer(description="The amount of endorsements this instance has received"),
'guarantor': fields.String(description="The domain of the instance which guaranteed this instance.", example="fediseer.com"),
'censure_reasons': fields.List(fields.String(description="The reasons instances have given for censuring this instance")),
'sysadmins': fields.Integer(required=False, default=None, description="The count of system administrators in this instance as reported by its admins."),
'moderators': fields.Integer(required=False, default=None, description="The count of community moderators in this instance as reported by its admins."),
'state': fields.String(required=True, enum=[e.name for e in enums.InstanceState], description="The state of the instance as seen from the fediseer."),
})
self.response_model_instances_visibility = api.inherit('InstanceVisibilityDetails', self.response_model_instances, {
'visibility_endorsements': fields.String(required=True, enum=[e.name for e in enums.ListVisibility], description="If OPEN, this instance allows anyone to read this instance's endorsements. When set to ENDORSED, only endorsed instances can see their endorsements. If set to PRIVATE allow this instance's own admins can see their endorsements."),

View File

@ -1,5 +1,4 @@
from fediseer.apis.v1.base import *
from fediseer.fediverse import get_nodeinfo
class User(Resource):
get_parser = reqparse.RequestParser()

View File

@ -10,7 +10,7 @@ from fediseer import exceptions as e
from fediseer.utils import hash_api_key
from fediseer.messaging import activitypub_pm
from pythorhead import Lemmy
from fediseer.fediverse import get_admin_for_software, get_nodeinfo
from fediseer.fediverse import InstanceInfo
from fediseer.limiter import limiter
api = Namespace('v1', 'API Version 1' )
@ -59,60 +59,9 @@ class Suspicions(Resource):
return {"domains": [instance["domain"] for instance in sus_instances]},200
return {"instances": sus_instances},200
def ensure_instance_registered(domain, allow_unreachable=False):
if domain.endswith("test.dbzer0.com"):
# Fake instances for testing chain of trust
requested_lemmy = Lemmy(f"https://{domain}")
requested_lemmy._requestor.nodeinfo = {"software":{"name":"lemmy"}}
open_registrations = False
email_verify = True
software = "lemmy"
admin_usernames = ["db0"]
nodeinfo = get_nodeinfo("lemmy.dbzer0.com")
requested_lemmy = Lemmy(f"https://{domain}")
site = requested_lemmy.site.get()
else:
nodeinfo = get_nodeinfo(domain)
if not nodeinfo:
if not allow_unreachable:
raise e.BadRequest(f"Error encountered while polling domain {domain}. Please check it's running correctly")
else:
software = "unknown"
if "*" in domain:
software = "wildcard"
nodeinfo = {
"openRegistrations": False,
"software": {
"name": software
}
}
software = nodeinfo["software"]["name"]
if software == "lemmy":
requested_lemmy = Lemmy(f"https://{domain}")
site = requested_lemmy.site.get()
if not site:
raise e.BadRequest(f"Error encountered while polling lemmy domain {domain}. Please check it's running correctly")
open_registrations = site["site_view"]["local_site"]["registration_mode"] == "open"
email_verify = site["site_view"]["local_site"]["require_email_verification"]
software = software
admin_usernames = [a["person"]["name"] for a in site["admins"]]
else:
open_registrations = nodeinfo["openRegistrations"]
email_verify = False
try:
admin_usernames = get_admin_for_software(software, domain)
except:
admin_usernames = []
instance = database.find_instance_by_domain(domain)
if instance:
return instance, nodeinfo, admin_usernames
new_instance = Instance(
domain=domain,
open_registrations=open_registrations,
email_verify=email_verify,
software=software,
)
new_instance.create()
return new_instance, nodeinfo, admin_usernames
# Debug
# from fediseer.flask import OVERSEER
# with OVERSEER.app_context():
# logger.debug(ensure_instance_registered("lemmings.world"))
# import sys
# sys.exit()

View File

@ -3,6 +3,7 @@ from fediseer.classes.instance import Censure
from fediseer.utils import sanitize_string
from fediseer.classes.reports import Report
from fediseer import enums
from fediseer.register import ensure_instance_registered
class CensuresGiven(Resource):
get_parser = reqparse.RequestParser()
@ -185,7 +186,7 @@ class Censures(Resource):
unbroken_chain, chainbreaker = database.has_unbroken_chain(instance.id)
if not unbroken_chain:
raise e.Forbidden(f"Guarantee chain for this instance has been broken. Chain ends at {chainbreaker.domain}!")
target_instance, nodeinfo, admin_usernames = ensure_instance_registered(domain, allow_unreachable=True)
target_instance, instance_info = ensure_instance_registered(domain, allow_unreachable=True)
if not target_instance:
raise e.NotFound(f"Something went wrong trying to register this instance.")
if not target_instance:

View File

@ -3,6 +3,7 @@ from fediseer.classes.instance import Endorsement,Censure
from fediseer.classes.reports import Report
from fediseer import enums
from fediseer.utils import sanitize_string
from fediseer.register import ensure_instance_registered
class Approvals(Resource):
get_parser = reqparse.RequestParser()
@ -164,7 +165,7 @@ class Endorsements(Resource):
unbroken_chain, chainbreaker = database.has_unbroken_chain(instance.id)
if not unbroken_chain:
raise e.Forbidden(f"Guarantee chain for this instance has been broken. Chain ends at {chainbreaker.domain}!")
target_instance, nodeinfo, admin_usernames = ensure_instance_registered(domain)
target_instance, instance_info = ensure_instance_registered(domain)
if not target_instance:
raise e.NotFound(f"Something went wrong trying to register this instance.")
if len(target_instance.guarantors) == 0:

View File

@ -2,6 +2,7 @@ from fediseer.apis.v1.base import *
from fediseer.classes.instance import Guarantee, RejectionRecord, Solicitation
from fediseer.classes.reports import Report
from fediseer import enums
from fediseer.register import ensure_instance_registered
class Guarantors(Resource):
get_parser = reqparse.RequestParser()
@ -91,7 +92,7 @@ class Guarantees(Resource):
unbroken_chain, chainbreaker = database.has_unbroken_chain(instance.id)
if not unbroken_chain:
raise e.Forbidden(f"Guarantee chain for this instance has been broken. Chain ends at {chainbreaker.domain}!")
target_instance, nodeinfo, admin_usernames = ensure_instance_registered(domain)
target_instance, instance_info = ensure_instance_registered(domain)
if not target_instance:
raise e.NotFound(f"Something went wrong trying to register this instance.")
if database.get_guarantee(target_instance.id,instance.id):

View File

@ -3,6 +3,7 @@ from fediseer.classes.instance import Hesitation
from fediseer.utils import sanitize_string
from fediseer.classes.reports import Report
from fediseer import enums
from fediseer.register import ensure_instance_registered
class HesitationsGiven(Resource):
get_parser = reqparse.RequestParser()
@ -171,7 +172,7 @@ class Hesitations(Resource):
unbroken_chain, chainbreaker = database.has_unbroken_chain(instance.id)
if not unbroken_chain:
raise e.Forbidden(f"Guarantee chain for this instance has been broken. Chain ends at {chainbreaker.domain}!")
target_instance, nodeinfo, admin_usernames = ensure_instance_registered(domain, allow_unreachable=True)
target_instance, instance_info = ensure_instance_registered(domain, allow_unreachable=True)
if not target_instance:
raise e.NotFound(f"Something went wrong trying to register this instance.")
if not target_instance:

View File

@ -4,6 +4,7 @@ from fediseer.classes.user import User, Claim
from fediseer import enums
from fediseer.classes.instance import Solicitation
from fediseer.classes.reports import Report
from fediseer.register import ensure_instance_registered
class Whitelist(Resource):
get_parser = reqparse.RequestParser()
@ -29,8 +30,6 @@ class Whitelist(Resource):
return {"domains": [instance["domain"] for instance in instance_details]},200
return {"instances": instance_details},200
class WhitelistDomain(Resource):
get_parser = reqparse.RequestParser()
get_parser.add_argument("Client-Agent", default="unknown:0:unknown", type=str, required=False, help="The client name and version.", location="headers")
@ -42,7 +41,13 @@ class WhitelistDomain(Resource):
'''Display info about a specific instance
'''
self.args = self.get_parser.parse_args()
instance, nodeinfo, admin_usernames = ensure_instance_registered(domain)
try:
instance, instance_info = ensure_instance_registered(domain)
except Exception as err:
# If the domain had been previously registered, we return its cached info
instance = database.find_instance_by_domain(domain)
if not instance:
raise err
if not instance:
raise e.NotFound(f"Something went wrong trying to register this instance.")
return instance.get_details(show_visibilities=True),200
@ -66,14 +71,14 @@ class WhitelistDomain(Resource):
self.args = self.put_parser.parse_args()
if '@' in self.args.admin:
raise e.BadRequest("Please send the username without any @ signs or domains")
instance, nodeinfo, admin_usernames = ensure_instance_registered(domain)
instance, instance_info = ensure_instance_registered(domain)
guarantor_instance = None
if self.args.guarantor:
guarantor_instance = database.find_instance_by_domain(self.args.guarantor)
if not guarantor_instance:
raise e.BadRequest(f"Requested guarantor domain {self.args.guarantor} is not registered with the Fediseer yet!")
if self.args.admin not in admin_usernames:
if len(admin_usernames) == 0:
if self.args.admin not in instance_info.admin_usernames:
if len(instance_info.admin_usernames) == 0:
raise e.Unauthorized(f"We could not discover any admins for this instance software. Please Ensure your software exposes this info. If it's exposed in a novel manner, consider sending us a PR to be able to retrieve this infomation.")
else:
raise e.Forbidden(f"Only admins of that {instance.software} are allowed to claim it.")

View File

@ -1,7 +1,3 @@
import uuid
import os
import dateutil.relativedelta
from datetime import datetime
from sqlalchemy import Enum, UniqueConstraint
from sqlalchemy.dialects.postgresql import UUID
@ -9,6 +5,7 @@ from sqlalchemy.dialects.postgresql import UUID
from loguru import logger
from fediseer.flask import db, SQLITE_MODE
from fediseer import enums
from fediseer.consts import POLLS_PER_DAY
uuid_column_type = lambda: UUID(as_uuid=True) if not SQLITE_MODE else db.String(36)
@ -95,11 +92,14 @@ class Instance(db.Model):
oprhan_since = db.Column(db.DateTime, nullable=True)
open_registrations = db.Column(db.Boolean, unique=False, nullable=False, index=True)
email_verify = db.Column(db.Boolean, unique=False, nullable=False, index=True)
email_verify = db.Column(db.Boolean, unique=False, nullable=True, index=True)
approval_required = db.Column(db.Boolean, unique=False, nullable=True, index=True)
has_captcha = db.Column(db.Boolean, unique=False, nullable=True, index=True)
software = db.Column(db.String(50), unique=False, nullable=False, index=True)
sysadmins = db.Column(db.Integer, unique=False, nullable=True)
moderators = db.Column(db.Integer, unique=False, nullable=True)
pm_proxy = db.Column(Enum(enums.PMProxy), default=enums.PMProxy.NONE, nullable=False)
poll_failures = db.Column(db.Integer, default=0, nullable=True)
visibility_endorsements = db.Column(Enum(enums.ListVisibility), default=enums.ListVisibility.OPEN, nullable=False)
visibility_censures = db.Column(Enum(enums.ListVisibility), default=enums.ListVisibility.OPEN, nullable=False)
visibility_hesitations = db.Column(Enum(enums.ListVisibility), default=enums.ListVisibility.OPEN, nullable=False)
@ -123,18 +123,25 @@ class Instance(db.Model):
db.session.commit()
def get_details(self,show_visibilities=False):
email_verification = None
# We only know this info for lemmy currently
if self.software == "lemmy":
email_verification = self.email_verify
ret_dict = {
"id": self.id,
"domain": self.domain,
"software": self.software,
"claimed": len(self.admins),
"open_registrations": self.open_registrations,
"email_verify": self.email_verify,
"email_verify": email_verification,
"approval_required": self.approval_required,
"has_captcha": self.has_captcha,
"endorsements": len(self.endorsements),
"approvals": len(self.approvals),
"guarantor": self.get_guarantor_domain(),
"sysadmins": self.sysadmins,
"moderators": self.moderators,
"state": self.get_state().name,
}
if show_visibilities:
ret_dict["visibility_endorsements"] = self.visibility_endorsements.name
@ -181,3 +188,13 @@ class Instance(db.Model):
def is_hesitating(self,instance):
return instance in self.hesitations_given
def get_state(self):
if self.poll_failures == 0:
return enums.InstanceState.UP
if self.poll_failures <= POLLS_PER_DAY:
return enums.InstanceState.UNREACHABLE
if self.poll_failures <= 30*POLLS_PER_DAY:
return enums.InstanceState.OFFLINE
return enums.InstanceState.DECIMMISSIONED

View File

@ -10,3 +10,4 @@ SUPPORTED_SOFTWARE = {
"misskey",
"mitra",
}
POLLS_PER_DAY=2

View File

@ -12,8 +12,9 @@ from fediseer.classes.instance import Instance, Endorsement, Guarantee, Rejectio
from fediseer.classes.user import Claim, User
from fediseer.classes.reports import Report
from fediseer import enums
from fediseer.consts import POLLS_PER_DAY
def get_all_instances(min_endorsements = 0, min_guarantors = 1):
def get_all_instances(min_endorsements = 0, min_guarantors = 1, include_decommissioned = True):
query = db.session.query(
Instance
).outerjoin(

View File

@ -21,3 +21,9 @@ class ListVisibility(enum.Enum):
OPEN = 0
ENDORSED = 1
PRIVATE = 2
class InstanceState(enum.Enum):
UP = 0
UNREACHABLE = 1
OFFLINE = 2
DECIMMISSIONED = 3

View File

@ -2,56 +2,76 @@ import requests
from loguru import logger
from pythorhead import Lemmy
from fediseer.consts import FEDISEER_VERSION
import fediseer.exceptions as e
def get_lemmy_admins(domain,software):
requested_lemmy = Lemmy(f"https://{domain}")
try:
site = requested_lemmy.site.get()
except Exception as err:
logger.error(f"Error retrieving {software} site info for {domain}: {err}")
raise err
if not site:
logger.error(f"Error retrieving {software} site info for {domain}")
raise Exception(f"Error retrieving {software} site info for {domain}")
return [a["person"]["name"] for a in site["admins"]]
class InstanceInfo():
def get_mastodon_admins(domain,software):
site = None
try:
site = requests.get(f"https://{domain}/api/v2/instance")
site_json = site.json()
if "contact" not in site_json or "account" not in site_json["contact"] or "username" not in site_json["contact"]["account"]:
raise Exception(f"No admin contact is specified for {domain}.")
return [site_json["contact"]["account"]["username"]]
except Exception as err:
if site is not None:
logger.error(f"Error retrieving {software} site info for {domain}: {err}.")
domain = None
node_info = None
instance_info = None
admin_usernames = set()
software = None
open_registrations = None
approval_required = None
email_verify = None
has_captcha = None
_allow_unreachable = False
_req_timeout = 5
def __init__(self, domain, allow_unreachable=False, req_timeout=5):
self.domain = domain
self._allow_unreachable = allow_unreachable
self._req_timeout = req_timeout
if domain.endswith("test.dbzer0.com"):
# Fake instances for testing chain of trust
self.open_registrations = False
self.approval_required = False
self.email_verify = True
self.has_captcha = True
self.software = "lemmy"
self.admin_usernames = {"db0"}
self.node_info = InstanceInfo.get_nodeinfo("lemmy.dbzer0.com")
self.instance_info = {}
return
self.node_info = InstanceInfo.get_nodeinfo(domain,req_timeout=self._req_timeout)
try:
self.parse_instance_info()
except Exception as err:
# This is just to report for the error message
if self.software is not None:
sw = self.software
else:
sw = 'unknown'
if not self._allow_unreachable:
logger.error(f"Error retrieving {sw} site info for {self.domain}: {err}")
raise Exception(f"Error retrieving {sw} site info for {self.domain}: {err}")
try:
self.retrieve_admins()
except:
pass
def get_lemmy_admins(self):
self.admin_usernames = set([a["person"]["name"] for a in self.instance_info["admins"]])
def get_mastodon_admins(self):
if "contact_account" in self.instance_info: # New API
if "username" not in self.instance_info["contact_account"]:
raise Exception(f"No admin contact is specified for {self.domain}.")
self.admin_usernames = {self.instance_info["contact_account"]["username"]}
elif "contact" in self.instance_info: # Old API
if "account" not in self.instance_info["contact"]:
raise Exception(f"No admin contact is specified for {self.domain}.")
self.admin_usernames = {self.instance_info["contact"]["account"]["username"]}
else:
logger.error(f"Error retrieving {software} site info for {domain}: {err}")
raise Exception(f"Error retrieving {software} site info for {domain}: {err}")
raise Exception(f"Could not determine admin contacts for {self.domain}.")
def get_firefish_admins(domain,software):
site = None
try:
site = requests.get(f"https://{domain}/api/v1/instance")
site_json = site.json()
if "contact_account" not in site_json or "username" not in site_json["contact_account"]:
raise Exception(f"No admin contact is specified for {domain}.")
return [site_json["contact_account"]["username"]]
except Exception as err:
if site is not None:
logger.error(f"Error retrieving {software} site info for {domain}: {err}.")
else:
logger.error(f"Error retrieving {software} site info for {domain}: {err}")
raise Exception(f"Error retrieving {software} site info for {domain}: {err}")
def get_misskey_admins(domain,software):
site = None
site_json = None
offset = 0
admins_found = []
try:
while site_json is None or len(site_json) != 0 and offset < 500:
def get_misskey_admins(self):
site_users = None
users_json = None
offset = 0
admins_found = set()
while users_json is None or len(users_json) != 0 and offset < 500:
payload = {
"limit": 10,
"offset": offset,
@ -60,97 +80,193 @@ def get_misskey_admins(domain,software):
"origin": "local",
"hostname": None
}
site = requests.post(f"https://{domain}/api/users", json=payload)
site_json = site.json()
for user_entry in site_json:
site_users = requests.post(f"https://{self.domain}/api/users", json=payload)
users_json = site_users.json()
for user_entry in users_json:
if user_entry.get("isAdmin") is True:
admins_found.append(user_entry["username"])
admins_found.add(user_entry["username"])
for role in user_entry.get("roles",[]):
if role.get("isAdministrator") is True:
admins_found.append(user_entry["username"])
admins_found.add(user_entry["username"])
offset += 10
if len(admins_found) == 0:
raise Exception(f"No admin contact is specified for {domain}.")
return admins_found
except Exception as err:
if site is not None:
logger.error(f"Error retrieving {software} site info for {domain}: {err}.")
else:
logger.error(f"Error retrieving {software} site info for {domain}: {err}")
raise Exception(f"Error retrieving {software} site info for {domain}: {err}")
raise Exception(f"No admin contact is specified for {self.domain}.")
self.admin_usernames = admins_found
def get_pleroma_admins(domain,software):
site = None
try:
site = requests.get(f"https://{domain}/api/v1/instance")
site_json = site.json()
if "email" not in site_json or site_json["email"] is None or site_json["email"] == '':
logger.error(f"No admin contact is specified for {domain}.")
raise Exception(f"No admin contact is specified for {domain}.")
admin_username = site_json["email"].split('@',1)[0]
return [admin_username]
except Exception as err:
if site is not None:
logger.error(f"Error retrieving {software} site info for {domain}: {err}.")
else:
logger.error(f"Error retrieving {software} site info for {domain}: {err}")
raise Exception(f"Error retrieving {software} site info for {domain}: {err}")
def get_pleroma_admins(self):
if "staffAccounts" not in self.node_info["metadata"] or len(self.node_info["metadata"]["staffAccounts"]) == 0:
raise Exception(f"No admin contact is specified for {self.domain}.")
for staff in self.node_info["metadata"]["staffAccounts"]:
self.admin_usernames.add(staff.split('/')[-1])
def discover_admins(domain,software):
site = None
try:
site = requests.get(f"https://{domain}/api/v1/instance")
site_json = site.json()
# Misskey/Firefish style
if "contact_account" in site_json:
return [site_json["contact_account"]["username"]]
# Mastodon style
if "contact" in site_json:
return [site_json["contact"]["account"]["username"]]
# Pleroma/Akkoma style
if "email" in site_json:
admin_username = site_json["email"].split('@',1)[0]
return [admin_username]
raise Exception(f"Site software '{software} does not match any of the known APIs")
except Exception as err:
logger.error(f"Error retrieving {software} site info for {domain}: {err}")
raise Exception(f"Error retrieving {software} site info for {domain}: {err}")
def discover_admins(self):
try:
self.get_mastodon_admins()
return
except:
pass
try:
self.get_lemmy_admins()
return
except:
pass
try:
self.get_pleroma_admins()
return
except:
pass
try:
self.get_misskey_admins()
return
except:
pass
logger.warning(f"Site software '{self.software} does not match any of the known APIs")
raise Exception(f"Site software '{self.software} does not match any of the known APIs")
def get_unknown_admins(domain,software):
return []
def get_unknown_admins(self):
return []
def get_admin_for_software(software: str, domain: str):
software_map = {
"lemmy": get_lemmy_admins,
"mastodon": get_mastodon_admins,
"friendica": get_mastodon_admins,
"pleroma": get_pleroma_admins,
"akkoma": get_pleroma_admins,
"misskey": get_misskey_admins,
"firefish": get_firefish_admins,
"iceshrimp": get_firefish_admins,
"mitra": get_firefish_admins,
"unknown": get_unknown_admins,
"wildcard": get_unknown_admins,
}
if software not in software_map:
return discover_admins(domain,software)
return software_map[software](domain,software)
def get_nodeinfo(domain):
try:
headers = {
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1",
"Sec-GPC": "1",
"User-Agent": f"Fediseer/{FEDISEER_VERSION}",
def retrieve_admins(self):
software_map = {
"lemmy": self.get_lemmy_admins,
"mastodon": self.get_mastodon_admins,
"friendica": self.get_mastodon_admins,
"pleroma": self.get_pleroma_admins,
"akkoma": self.get_pleroma_admins,
"misskey": self.get_misskey_admins,
"firefish": self.get_mastodon_admins,
"iceshrimp": self.get_mastodon_admins,
"mitra": self.get_mastodon_admins,
"unknown": self.get_unknown_admins,
"wildcard": self.get_unknown_admins,
}
wellknown = requests.get(f"https://{domain}/.well-known/nodeinfo", headers=headers, timeout=3).json()
headers["Sec-Fetch-Site"] = "cross-site"
nodeinfo = requests.get(wellknown['links'][-1]['href'], headers=headers, timeout=3).json()
return nodeinfo
except Exception as err:
return None
if self.software not in software_map:
self.discover_admins()
else:
software_map[self.software]()
def get_lemmy_info(self):
requested_lemmy = Lemmy(f"https://{self.domain}")
self.instance_info = requested_lemmy.site.get()
if not self.instance_info:
raise Exception(f"Error encountered while polling lemmy domain. Please check it's running correctly")
self.open_registrations = self.instance_info["site_view"]["local_site"]["registration_mode"] == "open"
self.email_verify = self.instance_info["site_view"]["local_site"]["require_email_verification"]
self.approval_required = self.instance_info["site_view"]["local_site"]["registration_mode"] == "RequireApplication"
self.has_captcha = self.instance_info["site_view"]["local_site"]["captcha_enabled"]
def get_mastodon_info(self):
site = requests.get(f"https://{self.domain}/api/v1/instance",timeout=self._req_timeout)
self.instance_info = site.json()
self.approval_required = self.instance_info["approval_required"]
if self.node_info is None:
raise Exception("Error retrieving nodeinfo")
self.open_registrations = self.node_info["openRegistrations"]
self.email_verify = None
self.has_captcha = None
def get_pleroma_info(self):
site = requests.get(f"https://{self.domain}/api/v1/instance",timeout=self._req_timeout)
self.instance_info = site.json()
self.approval_required = self.instance_info["approval_required"]
if self.node_info is None:
raise Exception("Error retrieving nodeinfo")
self.open_registrations = self.node_info["openRegistrations"]
self.email_verify = None
self.has_captcha = None
def get_firefish_info(self):
site = requests.get(f"https://{self.domain}/api/v1/instance",timeout=self._req_timeout)
self.instance_info = site.json()
self.approval_required = self.instance_info["approval_required"]
if self.node_info is None:
raise Exception("Error retrieving nodeinfo")
self.open_registrations = self.node_info["openRegistrations"]
self.email_verify = self.node_info["metadata"]["emailRequiredForSignup"]
self.has_captcha = self.node_info["metadata"]["enableHcaptcha"] is True or self.node_info["metadata"]["enableRecaptcha"] is True
def get_unknown_info(self):
if self.node_info is not None:
self.open_registrations = self.node_info.get("openRegistrations", False)
def discover_info(self):
site = requests.get(f"https://{self.domain}/api/v1/instance",timeout=self._req_timeout,allow_redirects=False)
if site.status_code != 200:
raise Exception(f"Unexpected status code retrieved when discovering nodeinfo: {site.status_code}")
self.instance_info = site.json()
self.approval_required = self.instance_info.get("approval_required")
if self.node_info is None:
raise Exception("Error retrieving nodeinfo")
self.open_registrations = self.node_info.get("openRegistrations")
# Only firefish and lemmy report the next two
if "metadata" in self.node_info:
self.email_verify = self.node_info["metadata"].get("emailRequiredForSignup")
self.has_captcha = None
if self.node_info["metadata"].get("enableHcaptcha") is True or self.node_info.get("enableRecaptcha") is True:
self.has_captcha = True
def parse_instance_info(self):
if not self.node_info:
if self._allow_unreachable:
self.software = "unknown"
if "*" in self.domain:
self.software = "wildcard"
else:
self.software = self.node_info["software"]["name"]
software_map = {
"lemmy": self.get_lemmy_info,
"mastodon": self.get_mastodon_info,
"friendica": self.get_mastodon_info,
"pleroma": self.get_pleroma_info,
"akkoma": self.get_pleroma_info,
"firefish": self.get_firefish_info,
"iceshrimp": self.get_firefish_info,
"mitra": self.get_firefish_info,
"unknown": self.get_unknown_info,
"wildcard": self.get_unknown_info,
}
if self.software not in software_map:
self.discover_info()
else:
software_map[self.software]()
@staticmethod
def get_nodeinfo(domain, req_timeout=3):
try:
headers = {
"Sec-Fetch-Dest": "document",
"Sec-Fetch-Mode": "navigate",
"Sec-Fetch-Site": "none",
"Sec-Fetch-User": "?1",
"Sec-GPC": "1",
"User-Agent": f"Fediseer/{FEDISEER_VERSION}",
}
wellknown = requests.get(f"https://{domain}/.well-known/nodeinfo", headers=headers, timeout=req_timeout).json()
headers["Sec-Fetch-Site"] = "cross-site"
nodeinfo = requests.get(wellknown['links'][-1]['href'], headers=headers, timeout=req_timeout).json()
return nodeinfo
except Exception as err:
return None
@staticmethod
def is_reachable(domain, req_timeout=5):
# Attempts to check if we can even reach the frontpage of the domain
# so that we know if it's an issue reaching the nodeinfo, or a problem of reaching the domain
logger.debug(domain)
req = requests.get(f"https://{domain}", timeout=req_timeout, allow_redirects=False)
logger.debug(req.status_code)
if req.status_code not in [200,401,403]:
raise Exception(f"Status code unexpected for instance frontpage: {req.status_code}")
# Debug
# ii = InstanceInfo("lemmy.dbzer0.com")
# logger.debug([
# ii.software,
# ii.open_registrations,
# ii.approval_required,
# ii.email_verify,
# ii.has_captcha,
# ii.admin_usernames])
# import sys
# sys.exit()

View File

@ -15,7 +15,7 @@ from mastodon import Mastodon
from loguru import logger
from fediseer.database import functions as database
from fediseer.consts import SUPPORTED_SOFTWARE, FEDISEER_VERSION
from fediseer.fediverse import get_admin_for_software
from fediseer.fediverse import InstanceInfo
from fediseer import enums
class ActivityPubPM:
@ -187,13 +187,13 @@ class ActivityPubPM:
admins = database.find_admins_by_instance(instance)
if not admins:
try:
admins = get_admin_for_software(software, domain)
admins = InstanceInfo(domain).admin_usernames
except Exception as err:
if software not in SUPPORTED_SOFTWARE:
logger.warning(f"Failed to figure out admins from {software}: {domain}")
raise e.BadRequest(f"Failed to retrieve admin list: {err}")
else:
admins = [a.username for a in admins]
admins = set([a.username for a in admins])
proxy = instance.pm_proxy
if not admins:
raise e.BadRequest(f"Could not determine admins for {domain}")

View File

@ -0,0 +1,55 @@
from fediseer.fediverse import InstanceInfo
from fediseer.database import functions as database
from fediseer.classes.instance import Instance
from fediseer.flask import db
import fediseer.exceptions as e
from datetime import datetime
from loguru import logger
def ensure_instance_registered(domain, allow_unreachable=False, record_unreachable = False, allowed_timeout=5):
instance = database.find_instance_by_domain(domain)
try:
instance_info = InstanceInfo(domain,allow_unreachable=allow_unreachable, req_timeout=allowed_timeout)
except Exception as err:
if record_unreachable and instance and instance.software != "wildcard":
# We only consider an instance unreachable if we can't reach its nodeinfo
# This means that a misconfigured instance will also be considered as 'down'
nodeinfo = InstanceInfo.get_nodeinfo(domain,req_timeout=allowed_timeout)
if nodeinfo is None:
logger.warning(f"Recorded {domain} as unreachable.")
instance.updated = datetime.utcnow()
instance.poll_failures += 1
db.session.commit()
if not allow_unreachable:
raise e.BadRequest(f"Error encountered while polling domain {domain}. Please check it's running correctly")
if instance:
if (
instance.software != instance_info.software or
instance.open_registrations != instance_info.open_registrations or
instance.approval_required != instance_info.approval_required or
instance.email_verify != instance_info.email_verify or
instance.has_captcha != instance_info.has_captcha or
instance.poll_failures > 0
):
# logger.debug(["new",instance_info.software,instance_info.open_registrations,instance_info.approval_required,instance_info.email_verify,instance_info.has_captcha])
# logger.debug(["old", instance.software,instance.open_registrations,instance.approval_required,instance.email_verify,instance.has_captcha])
logger.debug(f"Updated instance info for {domain}")
instance.software = instance_info.software
instance.open_registrations = instance_info.open_registrations
instance.approval_required = instance_info.approval_required
instance.email_verify = instance_info.email_verify
instance.has_captcha = instance_info.has_captcha
instance.updated = datetime.utcnow()
instance.poll_failures = 0
db.session.commit()
return instance, instance_info
new_instance = Instance(
domain=domain,
open_registrations=instance_info.open_registrations,
email_verify=instance_info.email_verify,
approval_required=instance_info.approval_required,
has_captcha=instance_info.has_captcha,
software=instance_info.software,
)
new_instance.create()
return new_instance, instance_info

View File

@ -0,0 +1,4 @@
ALTER TABLE instances ADD COLUMN approval_required BOOLEAN;
ALTER TABLE instances ADD COLUMN has_captcha BOOLEAN;
ALTER TABLE instances ADD COLUMN poll_failures INTEGER default 0;
ALTER TABLE instances ALTER COLUMN email_verify DROP NOT NULL;

62
updater.py 100644
View File

@ -0,0 +1,62 @@
from dotenv import load_dotenv
import os
import logging
load_dotenv()
from loguru import logger
from fediseer.flask import OVERSEER, db
import fediseer.database.functions as database
from fediseer.register import ensure_instance_registered
from concurrent.futures import ThreadPoolExecutor
def refresh_info(domain):
logger.info(f"Refreshing domain '{domain}")
with OVERSEER.app_context():
try:
ensure_instance_registered(
domain,
# We don't want to set allow_unreachable = True here
# As InstanceInfo() won't raise an exception when failing
# Which will cause the poll_failures not not increment
allow_unreachable=False,
record_unreachable=True,
allowed_timeout=20
)
except Exception as err:
pass
if __name__ == "__main__":
# Only setting this for the WSGI logs
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(module)s:%(lineno)d - %(message)s', level=logging.WARNING)
logger.init("Updater", status="Starting")
logger.info(f"Skipping instances with poll fails > {int(os.getenv('FEDISEER_IGNORE_POLL_FAILS', 0))}")
# try: # Debug
# with OVERSEER.app_context():
# ensure_instance_registered(
# "firefish.social",
# allow_unreachable=False,
# record_unreachable=True,
# allowed_timeout=20
# )
# except Exception as err:
# logger.error(err)
futures = []
with ThreadPoolExecutor(max_workers=int(os.getenv('FEDISEER_UPDATE_THREADS', 25))) as executor:
with OVERSEER.app_context():
for instance in database.get_all_instances(0,0):
if instance.software == 'wildcard':
continue
if instance.poll_failures >= int(os.getenv('FEDISEER_IGNORE_POLL_FAILS', 0)):
logger.debug(f"Skipped {instance.domain} due to too many poll fails.")
continue
futures.append(executor.submit(refresh_info, instance.domain))
if len(futures) >= 500:
for future in futures:
future.result()
futures = []
for future in futures:
future.result()
logger.init("Updater", status="Ended")
[]