activity_suspicion

pull/7/head
db0 2023-06-21 01:57:30 +02:00
parent c4357aa855
commit 0be44910cf
3 changed files with 12 additions and 9 deletions

View File

@ -9,10 +9,11 @@ class Models:
'domain': fields.String(description="The instance domain"),
'uptime_alltime': fields.Float(description="The instance uptime pct. 100% and thousand of users is unlikely"),
'local_posts': fields.Integer(description="The amount of local posts in that instance"),
'comment_counts': fields.Integer(description="The amount of comments in that instance"),
'total_users': fields.Integer(description="The total amount of users registered in that instance"),
'active_users_monthly': fields.Integer(description="The amount of active users monthly."),
'signup': fields.Boolean(default=False,description="True when subscriptions are open, else False"),
'user_post_ratio': fields.Float(description="Users to Post Ratio"),
'activity_suspicion': fields.Float(description="Local Comments+Posts per User. Higher is worse"),
})
self.response_model_model_SusInstances_get = api.model('SuspiciousInstancesDomainList', {
'instances': fields.List(fields.Nested(self.response_model_suspicious_instances)),

View File

@ -19,7 +19,7 @@ def get_request_path():
class SusInstances(Resource):
get_parser = reqparse.RequestParser()
get_parser.add_argument("Client-Agent", default="unknown:0:unknown", type=str, required=False, help="The client name and version.", location="headers")
get_parser.add_argument("user_to_post_ratio", required=False, default=20, type=int, help="The amount of local users / amount of local posts to consider suspicious", location="args")
get_parser.add_argument("activity_suspicion", required=False, default=20, type=int, help="How many users per local post+comment to consider suspicious", location="args")
get_parser.add_argument("csv", required=False, type=bool, help="Set to true to return just the domains as a csv. Mutually exclusive with domains", location="args")
get_parser.add_argument("domains", required=False, type=bool, help="Set to true to return just the domains as a list. Mutually exclusive with csv", location="args")
@ -31,7 +31,7 @@ class SusInstances(Resource):
'''A List with the details of all suspicious instances
'''
self.args = self.get_parser.parse_args()
sus_instances = retrieve_suspicious_instances(self.args.user_to_post_ratio)
sus_instances = retrieve_suspicious_instances(self.args.activity_suspicion)
if self.args.csv:
return {"csv": ",".join([instance["domain"] for instance in sus_instances])},200
if self.args.domains:

View File

@ -2,7 +2,7 @@ import requests
from loguru import logger
def retrieve_suspicious_instances(users_to_posts_ratio = 20):
def retrieve_suspicious_instances(activity_suspicion = 20):
# GraphQL query
query = '''
{
@ -19,6 +19,7 @@ def retrieve_suspicious_instances(users_to_posts_ratio = 20):
active_users_halfyear
signup
local_posts
comment_counts
}
}
'''
@ -58,12 +59,12 @@ def retrieve_suspicious_instances(users_to_posts_ratio = 20):
bad_nodes = []
for node in data["data"]["nodes"]:
is_bad = False
local_posts = node["local_posts"]
local_activity = node["local_posts"] + node["comment_counts"]
if node["total_users"] < 300:
continue
if local_posts == 0:
local_posts= 1
if node["total_users"] / local_posts > users_to_posts_ratio:
if local_activity == 0:
local_activity= 1
if node["total_users"] / local_activity > activity_suspicion:
is_bad = True
# print(node)
if is_bad:
@ -71,10 +72,11 @@ def retrieve_suspicious_instances(users_to_posts_ratio = 20):
"domain": node["domain"],
"uptime_alltime": node["uptime_alltime"],
"local_posts": node["local_posts"],
"comment_counts": node["comment_counts"],
"total_users": node["total_users"],
"active_users_monthly": node["active_users_monthly"],
"signup": node["signup"],
"user_post_ratio": node["total_users"] / local_posts,
"activity_suspicion": node["total_users"] / local_activity,
}
bad_nodes.append(bad_node)
return bad_nodes