rDrama/files/helpers/filters.py

40 lines
881 B
Python
Raw Normal View History

2021-07-21 01:12:26 +00:00
from bs4 import BeautifulSoup
from flask import *
from urllib.parse import urlparse
2021-08-04 15:35:10 +00:00
from files.classes import BannedDomain
2021-09-17 08:34:36 +00:00
from sqlalchemy.orm import lazyload
2021-07-21 01:12:26 +00:00
def filter_comment_html(html_text):
soup = BeautifulSoup(html_text, features="html.parser")
links = soup.find_all("a")
domain_list = set()
for link in links:
href=link.get("href", None)
if not href:
continue
domain = urlparse(href).netloc
# parse domain into all possible subdomains
parts = domain.split(".")
for i in range(len(parts)):
new_domain = parts[i]
for j in range(i + 1, len(parts)):
new_domain += "." + parts[j]
domain_list.add(new_domain)
# search db for domain rules that prohibit commenting
bans = [
2021-09-17 08:29:05 +00:00
x for x in g.db.query(BannedDomain).options(lazyload('*')).filter(BannedDomain.domain.in_(list(domain_list))).all()]
2021-07-21 01:12:26 +00:00
if bans:
return bans
else:
return []