From 0be44910cfb177aaec2fc3786f10f3fcfdd37423 Mon Sep 17 00:00:00 2001 From: db0 Date: Wed, 21 Jun 2023 01:57:30 +0200 Subject: [PATCH] activity_suspicion --- overseer/apis/models/v1.py | 3 ++- overseer/apis/v1/base.py | 4 ++-- overseer/observer.py | 14 ++++++++------ 3 files changed, 12 insertions(+), 9 deletions(-) diff --git a/overseer/apis/models/v1.py b/overseer/apis/models/v1.py index 6c9e643..8aa5520 100644 --- a/overseer/apis/models/v1.py +++ b/overseer/apis/models/v1.py @@ -9,10 +9,11 @@ class Models: 'domain': fields.String(description="The instance domain"), 'uptime_alltime': fields.Float(description="The instance uptime pct. 100% and thousand of users is unlikely"), 'local_posts': fields.Integer(description="The amount of local posts in that instance"), + 'comment_counts': fields.Integer(description="The amount of comments in that instance"), 'total_users': fields.Integer(description="The total amount of users registered in that instance"), 'active_users_monthly': fields.Integer(description="The amount of active users monthly."), 'signup': fields.Boolean(default=False,description="True when subscriptions are open, else False"), - 'user_post_ratio': fields.Float(description="Users to Post Ratio"), + 'activity_suspicion': fields.Float(description="Local Comments+Posts per User. Higher is worse"), }) self.response_model_model_SusInstances_get = api.model('SuspiciousInstancesDomainList', { 'instances': fields.List(fields.Nested(self.response_model_suspicious_instances)), diff --git a/overseer/apis/v1/base.py b/overseer/apis/v1/base.py index b00c2f4..84a3354 100644 --- a/overseer/apis/v1/base.py +++ b/overseer/apis/v1/base.py @@ -19,7 +19,7 @@ def get_request_path(): class SusInstances(Resource): get_parser = reqparse.RequestParser() get_parser.add_argument("Client-Agent", default="unknown:0:unknown", type=str, required=False, help="The client name and version.", location="headers") - get_parser.add_argument("user_to_post_ratio", required=False, default=20, type=int, help="The amount of local users / amount of local posts to consider suspicious", location="args") + get_parser.add_argument("activity_suspicion", required=False, default=20, type=int, help="How many users per local post+comment to consider suspicious", location="args") get_parser.add_argument("csv", required=False, type=bool, help="Set to true to return just the domains as a csv. Mutually exclusive with domains", location="args") get_parser.add_argument("domains", required=False, type=bool, help="Set to true to return just the domains as a list. Mutually exclusive with csv", location="args") @@ -31,7 +31,7 @@ class SusInstances(Resource): '''A List with the details of all suspicious instances ''' self.args = self.get_parser.parse_args() - sus_instances = retrieve_suspicious_instances(self.args.user_to_post_ratio) + sus_instances = retrieve_suspicious_instances(self.args.activity_suspicion) if self.args.csv: return {"csv": ",".join([instance["domain"] for instance in sus_instances])},200 if self.args.domains: diff --git a/overseer/observer.py b/overseer/observer.py index dcc9540..164bf7a 100644 --- a/overseer/observer.py +++ b/overseer/observer.py @@ -2,7 +2,7 @@ import requests from loguru import logger -def retrieve_suspicious_instances(users_to_posts_ratio = 20): +def retrieve_suspicious_instances(activity_suspicion = 20): # GraphQL query query = ''' { @@ -19,6 +19,7 @@ def retrieve_suspicious_instances(users_to_posts_ratio = 20): active_users_halfyear signup local_posts + comment_counts } } ''' @@ -58,12 +59,12 @@ def retrieve_suspicious_instances(users_to_posts_ratio = 20): bad_nodes = [] for node in data["data"]["nodes"]: is_bad = False - local_posts = node["local_posts"] + local_activity = node["local_posts"] + node["comment_counts"] if node["total_users"] < 300: continue - if local_posts == 0: - local_posts= 1 - if node["total_users"] / local_posts > users_to_posts_ratio: + if local_activity == 0: + local_activity= 1 + if node["total_users"] / local_activity > activity_suspicion: is_bad = True # print(node) if is_bad: @@ -71,10 +72,11 @@ def retrieve_suspicious_instances(users_to_posts_ratio = 20): "domain": node["domain"], "uptime_alltime": node["uptime_alltime"], "local_posts": node["local_posts"], + "comment_counts": node["comment_counts"], "total_users": node["total_users"], "active_users_monthly": node["active_users_monthly"], "signup": node["signup"], - "user_post_ratio": node["total_users"] / local_posts, + "activity_suspicion": node["total_users"] / local_activity, } bad_nodes.append(bad_node) return bad_nodes