rDrama/files/helpers/filters.py

35 lines
787 B
Python
Raw Normal View History

2021-10-15 14:08:27 +00:00
from bs4 import BeautifulSoup
from flask import *
from urllib.parse import urlparse
from files.classes import BannedDomain
def filter_comment_html(html_text):
2022-02-24 08:28:13 +00:00
soup = BeautifulSoup(html_text, 'lxml')
2021-10-15 14:08:27 +00:00
links = soup.find_all("a")
domain_list = set()
for link in links:
href = link.get("href")
if not href: continue
2022-02-03 06:39:02 +00:00
url = urlparse(href)
domain = url.netloc
path = url.path
domain_list.add(domain+path)
2021-10-15 14:08:27 +00:00
parts = domain.split(".")
for i in range(len(parts)):
new_domain = parts[i]
for j in range(i + 1, len(parts)):
new_domain += "." + parts[j]
domain_list.add(new_domain)
2022-02-24 08:28:13 +00:00
bans = [x for x in g.db.query(BannedDomain).filter(BannedDomain.domain.in_(list(domain_list))).all()]
2021-10-15 14:08:27 +00:00
if bans: return bans
else: return []