feat: converted to REST API

pull/7/head
db0 2023-06-20 19:47:56 +02:00
parent 848ad748df
commit 73d6715118
19 changed files with 586 additions and 81 deletions

139
.gitignore vendored 100644
View File

@ -0,0 +1,139 @@
# Byte-compiled / optimized / DLL files
__pycache__/
*.py[cod]
*$py.class
# C extensions
*.so
# Distribution / packaging
.Python
build/
develop-eggs/
dist/
downloads/
eggs/
.eggs/
lib/
lib64/
parts/
sdist/
var/
wheels/
pip-wheel-metadata/
share/python-wheels/
*.egg-info/
.installed.cfg
*.egg
MANIFEST
# PyInstaller
# Usually these files are written by a python script from a template
# before PyInstaller builds the exe, so as to inject date/other infos into it.
*.manifest
*.spec
# Installer logs
pip-log.txt
pip-delete-this-directory.txt
# Unit test / coverage reports
htmlcov/
.tox/
.nox/
.coverage
.coverage.*
.cache
nosetests.xml
coverage.xml
*.cover
*.py,cover
.hypothesis/
.pytest_cache/
# Translations
*.mo
*.pot
# Django stuff:
*.log
local_settings.py
db.sqlite3
db.sqlite3-journal
# Flask stuff:
instance/
.webassets-cache
# Scrapy stuff:
.scrapy
# Sphinx documentation
docs/_build/
# PyBuilder
target/
# Jupyter Notebook
.ipynb_checkpoints
# IPython
profile_default/
ipython_config.py
# pyenv
.python-version
# pipenv
# According to pypa/pipenv#598, it is recommended to include Pipfile.lock in version control.
# However, in case of collaboration, if having platform-specific dependencies or dependencies
# having no cross-platform support, pipenv may install dependencies that don't work, or not
# install all needed dependencies.
#Pipfile.lock
# PEP 582; used by e.g. github.com/David-OConnor/pyflow
__pypackages__/
# Celery stuff
celerybeat-schedule
celerybeat.pid
# SageMath parsed files
*.sage.py
# Environments
.env
.venv
env/
venv/
ENV/
env.bak/
venv.bak/
# Spyder project settings
.spyderproject
.spyproject
# Rope project settings
.ropeproject
# mkdocs documentation
/site
# mypy
.mypy_cache/
.dmypy.json
dmypy.json
# Pyre type checker
.pyre/
db/*
test_commands.txt
SQL_statements.txt
horde.log
horde*.bz2
horde.db
/.idea
/boto3oeo.py

View File

@ -0,0 +1,22 @@
import os
import socket
from uuid import uuid4
from overseer.logger import logger
from overseer.flask import OVERSEER
from overseer.routes import *
from overseer.apis import apiv1
from overseer.argparser import args
from overseer.consts import OVERSEER_VERSION
OVERSEER.register_blueprint(apiv1)
@OVERSEER.after_request
def after_request(response):
response.headers["Access-Control-Allow-Origin"] = "*"
response.headers["Access-Control-Allow-Methods"] = "POST, GET, OPTIONS, PUT, DELETE, PATCH"
response.headers["Access-Control-Allow-Headers"] = "Accept, Content-Type, Content-Length, Accept-Encoding, X-CSRF-Token, apikey, Client-Agent, X-Fields"
response.headers["Horde-Node"] = f"{socket.gethostname()}:{args.port}:{OVERSEER_VERSION}"
return response

View File

@ -0,0 +1 @@
from .apiv1 import blueprint as apiv1

View File

@ -0,0 +1,18 @@
from flask import Blueprint
from flask_restx import Api
from importlib import import_module
from overseer.apis.v1 import api as v1
blueprint = Blueprint('apiv1', __name__, url_prefix='/api')
api = Api(blueprint,
version='1.0',
title=f'Lemmy Overseer',
description=f'The API documentation for the Lemmy Overseer',
contact_email="mail@dbzer0.com",
default="v1",
default_label="Latest Version",
ordered=True,
)
api.add_namespace(v1)

View File

View File

@ -0,0 +1,13 @@
from flask_restx import fields
class Models:
def __init__(self,api):
self.response_model_suspicious_instances = api.model('SuspiciousInstances', {
'domain': fields.String(description="The instance domain"),
'uptime_alltime': fields.Float(description="The instance uptime pct. 100% and thousand of users is unlikely"),
'local_posts': fields.Integer(description="The amount of local posts in that instance"),
'total_users': fields.Integer(description="The total amount of users registered in that instance"),
'active_users_monthly': fields.Integer(description="The amount of active users monthly."),
'signup': fields.Boolean(default=False,description="True when subscriptions are open, else False"),
'user_post_ratio': fields.Float(description="Users to Post Ratio"),
})

View File

@ -0,0 +1,4 @@
import overseer.apis.v1.base as base
from overseer.apis.v1.base import api
api.add_resource(base.SusInstances, "/instances")

View File

@ -0,0 +1,32 @@
from flask import request
from flask_restx import Namespace, Resource, reqparse
from overseer.flask import cache
from overseer.observer import retrieve_suspicious_instances
from loguru import logger
api = Namespace('v1', 'API Version 1' )
from overseer.apis.models.v1 import Models
models = Models(api)
# Used to for the flask limiter, to limit requests per url paths
def get_request_path():
# logger.info(dir(request))
return f"{request.remote_addr}@{request.method}@{request.path}"
class SusInstances(Resource):
get_parser = reqparse.RequestParser()
get_parser.add_argument("Client-Agent", default="unknown:0:unknown", type=str, required=False, help="The client name and version.", location="headers")
get_parser.add_argument("user_to_post_ratio", required=False, default=20, type=int, help="The amount of local users / amount of local posts to consider suspicious", location="args")
@api.expect(get_parser)
@logger.catch(reraise=True)
@cache.cached(timeout=10, query_string=True)
@api.marshal_with(models.response_model_suspicious_instances, code=200, description='Suspicious Instances', as_list=True, skip_none=True)
def get(self):
'''A List with the details of all suspicious instances
'''
self.args = self.get_parser.parse_args()
return retrieve_suspicious_instances(self.args.user_to_post_ratio),200

View File

@ -0,0 +1,10 @@
import argparse
arg_parser = argparse.ArgumentParser()
arg_parser.add_argument('-i', '--insecure', action="store_true", help="If set, will use http instead of https (useful for testing)")
arg_parser.add_argument('-v', '--verbosity', action='count', default=0, help="The default logging level is ERROR or higher. This value increases the amount of logging seen in your screen")
arg_parser.add_argument('-q', '--quiet', action='count', default=0, help="The default logging level is ERROR or higher. This value decreases the amount of logging seen in your screen")
arg_parser.add_argument('-p', '--port', action='store', default=10001, required=False, type=int, help="Provide a different port to start with")
arg_parser.add_argument('--test', action="store_true", help="Test")
arg_parser.add_argument('--color', default=False, action="store_true", help="Enabled colorized logs")
args = arg_parser.parse_args()

View File

@ -0,0 +1 @@
OVERSEER_VERSION = "0.0.1"

40
overseer/flask.py 100644
View File

@ -0,0 +1,40 @@
import os
from flask import Flask
from flask_caching import Cache
from werkzeug.middleware.proxy_fix import ProxyFix
from flask_sqlalchemy import SQLAlchemy
from loguru import logger
cache = None
OVERSEER = Flask(__name__)
OVERSEER.wsgi_app = ProxyFix(OVERSEER.wsgi_app, x_for=1)
SQLITE_MODE = os.getenv("USE_SQLITE", "0") == "1"
if SQLITE_MODE:
logger.warning("Using SQLite for database")
OVERSEER.config["SQLALCHEMY_DATABASE_URI"] = "sqlite:///horde.db"
else:
OVERSEER.config["SQLALCHEMY_DATABASE_URI"] = f"postgresql://postgres:{os.getenv('POSTGRES_PASS')}@{os.getenv('POSTGRES_URL')}"
OVERSEER.config['SQLALCHEMY_ENGINE_OPTIONS'] = {
"pool_size": 50,
"max_overflow": -1,
}
OVERSEER.config['SQLALCHEMY_TRACK_MODIFICATIONS'] = False
db = SQLAlchemy(OVERSEER)
db.init_app(OVERSEER)
if not SQLITE_MODE:
with OVERSEER.app_context():
logger.debug("pool size = {}".format(db.engine.pool.size()))
logger.init_ok("Horde Database", status="Started")
# Allow local workstation run
if cache is None:
cache_config = {
"CACHE_TYPE": "SimpleCache",
"CACHE_DEFAULT_TIMEOUT": 300
}
cache = Cache(config=cache_config)
cache.init_app(OVERSEER)
logger.init_warn("Flask Cache", status="SimpleCache")

View File

@ -0,0 +1,18 @@
from flask_limiter import Limiter
from flask_limiter.util import get_remote_address
from overseer.flask import OVERSEER
from loguru import logger
limiter = None
# Very basic DOS prevention
logger.init("Limiter Cache", status="Connecting")
# Allow local workstation run
if limiter is None:
limiter = Limiter(
OVERSEER,
key_func=get_remote_address,
default_limits=["90 per minute"],
headers_enabled=True
)
logger.init_warn("Limiter Cache", status="Memory Only")

111
overseer/logger.py 100644
View File

@ -0,0 +1,111 @@
import sys
from functools import partialmethod
from loguru import logger
from overseer.argparser import args
STDOUT_LEVELS = ["GENERATION", "PROMPT"]
INIT_LEVELS = ["INIT", "INIT_OK", "INIT_WARN", "INIT_ERR"]
MESSAGE_LEVELS = ["MESSAGE"]
# By default we're at error level or higher
verbosity = 40
quiet = 0
def set_logger_verbosity(count):
global verbosity
# The count comes reversed. So count = 0 means minimum verbosity
# While count 5 means maximum verbosity
# So the more count we have, the lowe we drop the versbosity maximum
verbosity = 40 - (count * 10)
def quiesce_logger(count):
global quiet
# The bigger the count, the more silent we want our logger
quiet = count * 10
def is_stdout_log(record):
if record["level"].name not in STDOUT_LEVELS:
return(False)
if record["level"].no < verbosity + quiet:
return(False)
return(True)
def is_init_log(record):
if record["level"].name not in INIT_LEVELS:
return(False)
if record["level"].no < verbosity + quiet:
return(False)
return(True)
def is_msg_log(record):
if record["level"].name not in MESSAGE_LEVELS:
return(False)
if record["level"].no < verbosity + quiet:
return(False)
return(True)
def is_stderr_log(record):
if record["level"].name in STDOUT_LEVELS + INIT_LEVELS + MESSAGE_LEVELS:
return(False)
if record["level"].no < verbosity + quiet:
return(False)
return(True)
def test_logger():
logger.generation("This is a generation message\nIt is typically multiline\nThee Lines".encode("unicode_escape").decode("utf-8"))
logger.prompt("This is a prompt message")
logger.debug("Debug Message")
logger.info("Info Message")
logger.warning("Info Warning")
logger.error("Error Message")
logger.critical("Critical Message")
logger.init("This is an init message", status="Starting")
logger.init_ok("This is an init message", status="OK")
logger.init_warn("This is an init message", status="Warning")
logger.init_err("This is an init message", status="Error")
logger.message("This is user message")
sys.exit()
logfmt = "<level>{level: <10}</level> | <green>{time:YYYY-MM-DD HH:mm:ss}</green> | <green>{name}</green>:<green>{function}</green>:<green>{line}</green> - <level>{message}</level>"
genfmt = "<level>{level: <10}</level> @ <green>{time:YYYY-MM-DD HH:mm:ss}</green> | <level>{message}</level>"
initfmt = "<magenta>INIT </magenta> | <level>{extra[status]: <14}</level> | <magenta>{message}</magenta>"
msgfmt = "<level>{level: <10}</level> | <level>{message}</level>"
logger.level("GENERATION", no=24, color="<cyan>")
logger.level("PROMPT", no=23, color="<yellow>")
logger.level("INIT", no=31, color="<white>")
logger.level("INIT_OK", no=31, color="<green>")
logger.level("INIT_WARN", no=31, color="<yellow>")
logger.level("INIT_ERR", no=31, color="<red>")
logger.level("AUDIT", no=19, color="<blue>")
# Messages contain important information without which this application might not be able to be used
# As such, they have the highest priority
logger.level("MESSAGE", no=61, color="<green>")
logger.__class__.generation = partialmethod(logger.__class__.log, "GENERATION")
logger.__class__.prompt = partialmethod(logger.__class__.log, "PROMPT")
logger.__class__.init = partialmethod(logger.__class__.log, "INIT")
logger.__class__.init_ok = partialmethod(logger.__class__.log, "INIT_OK")
logger.__class__.init_warn = partialmethod(logger.__class__.log, "INIT_WARN")
logger.__class__.init_err = partialmethod(logger.__class__.log, "INIT_ERR")
logger.__class__.message = partialmethod(logger.__class__.log, "MESSAGE")
logger.__class__.audit = partialmethod(logger.__class__.log, "AUDIT")
config = {
"handlers": [
{"sink": sys.stderr, "format": logfmt, "colorize":args.color, "filter": is_stderr_log},
{"sink": sys.stdout, "format": genfmt, "level": "PROMPT", "colorize":args.color, "filter": is_stdout_log},
{"sink": sys.stdout, "format": initfmt, "level": "INIT", "colorize":args.color, "filter": is_init_log},
{"sink": sys.stdout, "format": msgfmt, "level": "MESSAGE", "colorize":args.color, "filter": is_msg_log}
],
}
logger.configure(**config)
logger.add("horde.log", retention="7 days", rotation="1d", compression="bz2", level=19)
logger.disable("__main__")
logger.warning("disabled")
logger.enable("")
logger.enable(None)
set_logger_verbosity(args.verbosity)
quiesce_logger(args.quiet)

View File

@ -0,0 +1,84 @@
import requests
from loguru import logger
def retrieve_suspicious_instances(users_to_posts_ratio = 20):
# GraphQL query
query = '''
{
nodes(softwarename: "lemmy") {
domain
name
metatitle
metadescription
metaimage
date_created
uptime_alltime
total_users
active_users_monthly
active_users_halfyear
signup
local_posts
}
}
'''
# GraphQL endpoint URL
url = 'https://api.fediverse.observer/'
# Request headers
headers = {
'User-Agent': 'Lemmy Overseer / mail@dbzer0.com',
'Accept': '*/*',
'Accept-Language': 'en-US,en;q=0.5',
'Accept-Encoding': 'gzip, deflate, br',
'Referer': 'https://api.fediverse.observer/',
'Content-Type': 'application/json',
'Origin': 'https://api.fediverse.observer',
'DNT': '1',
'Connection': 'keep-alive',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'TE': 'trailers'
}
# Create the request payload
payload = {
'query': query
}
# Send the POST request to the GraphQL endpoint
response = requests.post(url, headers=headers, json=payload)
# Check if the request was successful (HTTP 200 status code)
if response.ok:
# Extract the JSON response
data = response.json()
bad_nodes = []
for node in data["data"]["nodes"]:
is_bad = False
local_posts = node["local_posts"]
if node["total_users"] < 300:
continue
if local_posts == 0:
local_posts= 1
if node["total_users"] / local_posts > users_to_posts_ratio:
is_bad = True
# print(node)
if is_bad:
bad_node = {
"domain": node["domain"],
"uptime_alltime": node["uptime_alltime"],
"local_posts": node["local_posts"],
"total_users": node["total_users"],
"active_users_monthly": node["active_users_monthly"],
"signup": node["signup"],
"user_post_ratio": node["total_users"] / local_posts,
}
bad_nodes.append(bad_node)
return bad_nodes
else:
# Print the error message if the request failed
logger.error(f'Observer failed with status code {response.status_code}: {response.text}')
return None

32
overseer/routes.py 100644
View File

@ -0,0 +1,32 @@
from flask import render_template, redirect, url_for, request
from markdown import markdown
from loguru import logger
from overseer.flask import OVERSEER
@logger.catch(reraise=True)
@OVERSEER.route('/')
# @cache.cached(timeout=300)
def index():
with open(f'overseer/templates/index.md') as index_file:
index = index_file.read()
findex = index.format()
style = """<style>
body {
max-width: 120ex;
margin: 0 auto;
color: #333333;
line-height: 1.4;
font-family: sans-serif;
padding: 1em;
}
</style>
"""
head = f"""<head>
<title>Horde Overseer</title>
<meta name="viewport" content="width=device-width, initial-scale=1" />
{style}
</head>
"""
return(head + markdown(findex))

View File

@ -0,0 +1,13 @@
# Lemmy Overseer
This is a service to help Lemmy instances detect and avoid suspcicious instances
## REST API
[Full Documentation](/api)
## Credits
These are the people who made this software possible.
* [Db0](https://dbzer0.com) - Development and Maintenance

View File

@ -1,82 +1,6 @@
import requests
import json
from overseer.observer import retrieve_suspicious_instances
# GraphQL query
query = '''
{
nodes(softwarename: "lemmy") {
domain
name
metatitle
metadescription
metaimage
date_created
uptime_alltime
total_users
active_users_monthly
active_users_halfyear
signup
local_posts
}
}
'''
# GraphQL endpoint URL
url = 'https://api.fediverse.observer/'
# Request headers
headers = {
'User-Agent': 'Lemmy Overseer / mail@dbzer0.com',
'Accept': '*/*',
'Accept-Language': 'en-US,en;q=0.5',
'Accept-Encoding': 'gzip, deflate, br',
'Referer': 'https://api.fediverse.observer/',
'Content-Type': 'application/json',
'Origin': 'https://api.fediverse.observer',
'DNT': '1',
'Connection': 'keep-alive',
'Sec-Fetch-Dest': 'empty',
'Sec-Fetch-Mode': 'cors',
'Sec-Fetch-Site': 'same-origin',
'TE': 'trailers'
}
# Create the request payload
payload = {
'query': query
}
# Send the POST request to the GraphQL endpoint
response = requests.post(url, headers=headers, json=payload)
# Check if the request was successful (HTTP 200 status code)
if response.ok:
# Extract the JSON response
data = response.json()
bad_nodes = []
for node in data["data"]["nodes"]:
is_bad = False
local_posts = node["local_posts"]
if node["total_users"] < 300:
continue
if local_posts == 0:
local_posts= 1
if node["total_users"] / local_posts > 20:
is_bad = True
# print(node)
if is_bad:
bad_node = {
"domain": node["domain"],
"uptime_alltime": node["uptime_alltime"],
"local_posts": node["local_posts"],
"total_users": node["total_users"],
"active_users_monthly": node["active_users_monthly"],
"signup": node["signup"],
"local_posts": node["local_posts"],
"user_post_ratio": node["total_users"] / local_posts,
}
bad_nodes.append(bad_node)
print(json.dumps([bn["domain"] for bn in bad_nodes], indent=4))
else:
# Print the error message if the request failed
print(f'Request failed with status code {response.status_code}: {response.text}')
sus = retrieve_suspicious_instances(20)
if sus:
print(json.dumps([bn["domain"] for bn in sus], indent=4))

View File

@ -1 +1,18 @@
requests
werkzeug~=2.2.2
Flask~=2.2.2
flask-restx
flask_limiter~=2.8.1
Flask-Caching
waitress~=2.1.2
requests >= 2.27
Markdown~=3.4.1
flask-dance[sqla]
blinker
python-dotenv
loguru
python-dateutil~=2.8.2
redis~=4.3.5
flask_sqlalchemy==3.0.2
SQLAlchemy~=1.4.44
psycopg2-binary
regex

26
server.py 100644
View File

@ -0,0 +1,26 @@
from dotenv import load_dotenv
import os
import logging
load_dotenv()
from overseer.argparser import args
from overseer.flask import OVERSEER
from loguru import logger
if __name__ == "__main__":
# Only setting this for the WSGI logs
logging.basicConfig(format='%(asctime)s - %(levelname)s - %(module)s:%(lineno)d - %(message)s', level=logging.WARNING)
from waitress import serve
logger.init("WSGI Server", status="Starting")
url_scheme = 'https'
if args.insecure:
os.environ['OAUTHLIB_INSECURE_TRANSPORT'] = '1' # Disable this on prod
url_scheme = 'http'
allowed_host = "overseer.dbzer0.net"
if args.insecure:
allowed_host = "0.0.0.0"
logger.init_warn("WSGI Mode", status="Insecure")
serve(OVERSEER, port=args.port, url_scheme=url_scheme, threads=45, connection_limit=1024, asyncore_use_poll=True)
logger.init("WSGI Server", status="Stopped")