diff --git a/fediseer/fediverse.py b/fediseer/fediverse.py index 18aa53f..4eea274 100644 --- a/fediseer/fediverse.py +++ b/fediseer/fediverse.py @@ -1,4 +1,5 @@ import requests +import socket from loguru import logger from pythorhead import Lemmy from fediseer.consts import FEDISEER_VERSION @@ -17,6 +18,8 @@ class InstanceInfo(): has_captcha = None _allow_unreachable = False _req_timeout = 5 + _nodeinfo_err: Exception = None + _siteinfo_err: Exception = None def __init__(self, domain, allow_unreachable=False, req_timeout=5): self.domain = domain @@ -34,12 +37,16 @@ class InstanceInfo(): self.instance_info = {} return - self.node_info = InstanceInfo.get_nodeinfo(domain,req_timeout=self._req_timeout) + try: + self.node_info = InstanceInfo.get_nodeinfo(domain,req_timeout=self._req_timeout) + except Exception as err: + self._nodeinfo_err = err def get_instance_info(self): try: self.parse_instance_info() except Exception as err: + self._siteinfo_err = err # This is just to report for the error message if self.software is not None: sw = self.software @@ -238,21 +245,18 @@ class InstanceInfo(): @staticmethod def get_nodeinfo(domain, req_timeout=3): - try: - headers = { - "Sec-Fetch-Dest": "document", - "Sec-Fetch-Mode": "navigate", - "Sec-Fetch-Site": "none", - "Sec-Fetch-User": "?1", - "Sec-GPC": "1", - "User-Agent": f"Fediseer/{FEDISEER_VERSION}", - } - wellknown = requests.get(f"https://{domain}/.well-known/nodeinfo", headers=headers, timeout=req_timeout).json() - headers["Sec-Fetch-Site"] = "cross-site" - nodeinfo = requests.get(wellknown['links'][-1]['href'], headers=headers, timeout=req_timeout).json() - return nodeinfo - except Exception as err: - return None + headers = { + "Sec-Fetch-Dest": "document", + "Sec-Fetch-Mode": "navigate", + "Sec-Fetch-Site": "none", + "Sec-Fetch-User": "?1", + "Sec-GPC": "1", + "User-Agent": f"Fediseer/{FEDISEER_VERSION}", + } + wellknown = requests.get(f"https://{domain}/.well-known/nodeinfo", headers=headers, timeout=req_timeout).json() + headers["Sec-Fetch-Site"] = "cross-site" + nodeinfo = requests.get(wellknown['links'][-1]['href'], headers=headers, timeout=req_timeout).json() + return nodeinfo @staticmethod def is_reachable(domain, req_timeout=5): @@ -264,15 +268,27 @@ class InstanceInfo(): if req.status_code not in [200,401,403]: raise Exception(f"Status code unexpected for instance frontpage: {req.status_code}") -# Debug -# ii = InstanceInfo("makai.chaotic.ninja") -# ii.get_instance_info() -# logger.debug([ -# ii.software, -# ii.open_registrations, -# ii.approval_required, -# ii.email_verify, -# ii.has_captcha, -# ii.admin_usernames]) + def domain_exists(self): + try: + socket.gethostbyname(self.domain) + return True + except: + return False + + +# # Debug +# ii = InstanceInfo("outpoa.st") +# if ii.domain_exists(): +# ii.get_instance_info() +# logger.info([ +# ii.software, +# ii.open_registrations, +# ii.approval_required, +# ii.email_verify, +# ii.has_captcha, +# ii.admin_usernames, +# ]) +# else: +# logger.error("Domain does not exist") # import sys # sys.exit() \ No newline at end of file diff --git a/fediseer/register.py b/fediseer/register.py index 5a61d3d..8b98682 100644 --- a/fediseer/register.py +++ b/fediseer/register.py @@ -18,7 +18,10 @@ def ensure_instance_registered(domain, allow_unreachable=False, record_unreachab if instance_info.node_info is None: logger.warning(f"Recorded {domain} as unreachable.") instance.updated = datetime.utcnow() - instance.poll_failures += 1 + if instance_info.domain_exists(): + instance.poll_failures += 1 + else: + instance.poll_failures += 60 db.session.commit() if not allow_unreachable: raise e.BadRequest(f"Error encountered while polling domain {domain}. Please check it's running correctly") @@ -43,6 +46,10 @@ def ensure_instance_registered(domain, allow_unreachable=False, record_unreachab instance.poll_failures = 0 db.session.commit() return instance, instance_info + poll_failures = 0 + if not instance_info.domain_exists(): + # If the domain is gone, we assume straight decommission + poll_failures = 100 new_instance = Instance( domain=domain, open_registrations=instance_info.open_registrations, @@ -50,6 +57,7 @@ def ensure_instance_registered(domain, allow_unreachable=False, record_unreachab approval_required=instance_info.approval_required, has_captcha=instance_info.has_captcha, software=instance_info.software, + poll_failures=poll_failures, ) new_instance.create() return new_instance, instance_info \ No newline at end of file