forked from MarseyWorld/MarseyWorld
Initial working version of word censor
parent
cefb2cfad0
commit
7e1e9ccc5b
|
@ -4,4 +4,7 @@ video.mp4
|
|||
cache/
|
||||
__pycache__/
|
||||
disablesignups
|
||||
*rules.html
|
||||
*rules.html
|
||||
.idea/
|
||||
**/.pytest_cache/
|
||||
venv/
|
||||
|
|
|
@ -2,66 +2,66 @@ version: '2.3'
|
|||
|
||||
services:
|
||||
files:
|
||||
build:
|
||||
context: .
|
||||
volumes:
|
||||
- "./:/service"
|
||||
environment:
|
||||
- DATABASE_URL=postgresql://postgres@postgres:5432/postgres
|
||||
- MASTER_KEY=${MASTER_KEY:-KTVciAUQFpFh2WdJ/oiHJlxl6FvzRZp8kYzAAv3l2OA=}
|
||||
- REDIS_URL=redis://redis
|
||||
- DOMAIN=localhost
|
||||
- SITE_NAME=Drama
|
||||
- GIPHY_KEY=3435tdfsdudebussylmaoxxt43
|
||||
- FORCE_HTTPS=0
|
||||
- DISCORD_SERVER_ID=3435tdfsdudebussylmaoxxt43
|
||||
- DISCORD_CLIENT_ID=3435tdfsdudebussylmaoxxt43
|
||||
- DISCORD_CLIENT_SECRET=3435tdfsdudebussylmaoxxt43
|
||||
- DISCORD_BOT_TOKEN=3435tdfsdudebussylmaoxxt43
|
||||
#- HCAPTCHA_SITEKEY=3435tdfsdudebussylmaoxxt43
|
||||
- HCAPTCHA_SECRET=3435tdfsdudebussylmaoxxt43
|
||||
- YOUTUBE_KEY=3435tdfsdudebussylmaoxxt43
|
||||
- PUSHER_KEY=3435tdfsdudebussylmaoxxt43
|
||||
- CATBOX_KEY=3435tdfsdudebussylmaoxxt43
|
||||
- SPAM_SIMILARITY_THRESHOLD=0.5
|
||||
- SPAM_SIMILAR_COUNT_THRESHOLD=5
|
||||
- SPAM_URL_SIMILARITY_THRESHOLD=0.1
|
||||
- COMMENT_SPAM_SIMILAR_THRESHOLD=0.5
|
||||
- COMMENT_SPAM_COUNT_THRESHOLD=5
|
||||
- READ_ONLY=0
|
||||
- BOT_DISABLE=0
|
||||
- COINS_NAME=Dramacoins
|
||||
- DEFAULT_TIME_FILTER=all
|
||||
- DEFAULT_THEME=dark
|
||||
- DEFAULT_COLOR=ff66ac #YOU HAVE TO PICK ONE OF THOSE COLORS OR SHIT WILL BREAK: ff66ac, 805ad5, 62ca56, 38a169, 80ffff, 2a96f3, eb4963, ff0000, f39731, 30409f, 3e98a7, e4432d, 7b9ae4, ec72de, 7f8fa6, f8db58
|
||||
- SLOGAN=Dude bussy lmao
|
||||
- GUMROAD_TOKEN=3435tdfsdudebussylmaoxxt43
|
||||
- GUMROAD_LINK=https://marsey1.gumroad.com/l/tfcvri
|
||||
- CARD_VIEW=1
|
||||
- DISABLE_DOWNVOTES=0
|
||||
- DUES=0
|
||||
- MAIL_USERNAME=blahblahblah@gmail.com
|
||||
- MAIL_PASSWORD=3435tdfsdudebussylmaoxxt43
|
||||
links:
|
||||
- "redis"
|
||||
- "postgres"
|
||||
ports:
|
||||
- "80:80"
|
||||
depends_on:
|
||||
- redis
|
||||
- postgres
|
||||
build:
|
||||
context: .
|
||||
volumes:
|
||||
- "./:/service"
|
||||
environment:
|
||||
- DATABASE_URL=postgresql://postgres@postgres:5432/postgres
|
||||
- MASTER_KEY=${MASTER_KEY:-KTVciAUQFpFh2WdJ/oiHJlxl6FvzRZp8kYzAAv3l2OA=}
|
||||
- REDIS_URL=redis://redis
|
||||
- DOMAIN=localhost
|
||||
- SITE_NAME=Drama
|
||||
- GIPHY_KEY=3435tdfsdudebussylmaoxxt43
|
||||
- FORCE_HTTPS=0
|
||||
- DISCORD_SERVER_ID=3435tdfsdudebussylmaoxxt43
|
||||
- DISCORD_CLIENT_ID=3435tdfsdudebussylmaoxxt43
|
||||
- DISCORD_CLIENT_SECRET=3435tdfsdudebussylmaoxxt43
|
||||
- DISCORD_BOT_TOKEN=3435tdfsdudebussylmaoxxt43
|
||||
#- HCAPTCHA_SITEKEY=3435tdfsdudebussylmaoxxt43
|
||||
- HCAPTCHA_SECRET=3435tdfsdudebussylmaoxxt43
|
||||
- YOUTUBE_KEY=3435tdfsdudebussylmaoxxt43
|
||||
- PUSHER_KEY=3435tdfsdudebussylmaoxxt43
|
||||
- CATBOX_KEY=3435tdfsdudebussylmaoxxt43
|
||||
- SPAM_SIMILARITY_THRESHOLD=0.5
|
||||
- SPAM_SIMILAR_COUNT_THRESHOLD=5
|
||||
- SPAM_URL_SIMILARITY_THRESHOLD=0.1
|
||||
- COMMENT_SPAM_SIMILAR_THRESHOLD=0.5
|
||||
- COMMENT_SPAM_COUNT_THRESHOLD=5
|
||||
- READ_ONLY=0
|
||||
- BOT_DISABLE=0
|
||||
- COINS_NAME=Dramacoins
|
||||
- DEFAULT_TIME_FILTER=all
|
||||
- DEFAULT_THEME=dark
|
||||
- DEFAULT_COLOR=ff66ac #YOU HAVE TO PICK ONE OF THOSE COLORS OR SHIT WILL BREAK: ff66ac, 805ad5, 62ca56, 38a169, 80ffff, 2a96f3, eb4963, ff0000, f39731, 30409f, 3e98a7, e4432d, 7b9ae4, ec72de, 7f8fa6, f8db58
|
||||
- SLOGAN=Dude bussy lmao
|
||||
- GUMROAD_TOKEN=3435tdfsdudebussylmaoxxt43
|
||||
- GUMROAD_LINK=https://marsey1.gumroad.com/l/tfcvri
|
||||
- CARD_VIEW=1
|
||||
- DISABLE_DOWNVOTES=0
|
||||
- DUES=0
|
||||
- MAIL_USERNAME=blahblahblah@gmail.com
|
||||
- MAIL_PASSWORD=3435tdfsdudebussylmaoxxt43
|
||||
links:
|
||||
- "redis"
|
||||
- "postgres"
|
||||
ports:
|
||||
- "80:80"
|
||||
depends_on:
|
||||
- redis
|
||||
- postgres
|
||||
|
||||
redis:
|
||||
image: redis
|
||||
ports:
|
||||
- "6379:6379"
|
||||
image: redis
|
||||
ports:
|
||||
- "6379:6379"
|
||||
|
||||
postgres:
|
||||
image: postgres:12.3
|
||||
volumes:
|
||||
- "./schema.sql:/docker-entrypoint-initdb.d/00-schema.sql"
|
||||
- "./seed-db.sql:/docker-entrypoint-initdb.d/01-schema.sql"
|
||||
environment:
|
||||
- POSTGRES_HOST_AUTH_METHOD=trust
|
||||
#ports:
|
||||
#- "5432:5432"
|
||||
image: postgres:12.3
|
||||
volumes:
|
||||
- "./schema.sql:/docker-entrypoint-initdb.d/00-schema.sql"
|
||||
- "./seed-db.sql:/docker-entrypoint-initdb.d/01-schema.sql"
|
||||
environment:
|
||||
- POSTGRES_HOST_AUTH_METHOD=trust
|
||||
#ports:
|
||||
#- "5432:5432"
|
|
@ -1,6 +1,6 @@
|
|||
from os import environ
|
||||
|
||||
site = environ.get("DOMAIN").strip()
|
||||
site = environ.get("DOMAIN", '').strip()
|
||||
|
||||
SLURS = {
|
||||
" faggot":" cute twink",
|
||||
|
|
|
@ -0,0 +1,47 @@
|
|||
from collections import ChainMap
|
||||
import re
|
||||
from re import Match
|
||||
|
||||
from files.helpers.const import SLURS
|
||||
|
||||
|
||||
def create_replace_map():
|
||||
dicts = [{
|
||||
slur: replacer,
|
||||
slur.title(): replacer.title(),
|
||||
slur.capitalize(): replacer.capitalize(),
|
||||
slur.upper(): replacer.upper(),
|
||||
} for (slur, replacer) in SLURS.items()]
|
||||
|
||||
# flattens the list of dict to a single dict
|
||||
return dict(ChainMap(*dicts))
|
||||
|
||||
|
||||
REPLACE_MAP = create_replace_map()
|
||||
|
||||
|
||||
def create_variations_slur_regex(slur: str):
|
||||
variations = [slur, slur.upper(), slur.capitalize()]
|
||||
|
||||
# capitalize multiple words if there are multiple words (just in case)
|
||||
if " " in slur:
|
||||
variations.append(slur.title())
|
||||
|
||||
return [rf"(\s|>)({var})|({var})(\s|<)" for var in variations]
|
||||
|
||||
|
||||
def sub_matcher(match: Match):
|
||||
found = match.group(2) if (match.group(2) is not None) else match.group(3)
|
||||
replacer = REPLACE_MAP[found]
|
||||
return (match.group(1) or '') + replacer + (match.group(4) or '')
|
||||
|
||||
|
||||
def censor_slurs(v, body):
|
||||
for (slur, replace) in SLURS.items():
|
||||
for variation in create_variations_slur_regex(slur):
|
||||
try:
|
||||
body = re.sub(variation, sub_matcher, body)
|
||||
except:
|
||||
pass
|
||||
|
||||
return body
|
|
@ -1,3 +1,4 @@
|
|||
assertpy
|
||||
beautifulsoup4
|
||||
bleach
|
||||
Flask
|
||||
|
@ -20,6 +21,7 @@ requests
|
|||
SQLAlchemy
|
||||
psycopg2-binary
|
||||
pusher_push_notifications
|
||||
pytest
|
||||
youtube-dl
|
||||
yattag
|
||||
webptools
|
|
@ -0,0 +1,76 @@
|
|||
import re
|
||||
from unittest.mock import patch
|
||||
|
||||
from assertpy import assert_that
|
||||
|
||||
from files.helpers import word_censor
|
||||
from files.helpers.word_censor import create_variations_slur_regex, create_replace_map, censor_slurs, sub_matcher
|
||||
|
||||
|
||||
def test_create_variations_slur_regex_single_word():
|
||||
expected = [r"(\s|>)(retard)|(retard)(\s|<)",
|
||||
r"(\s|>)(Retard)|(Retard)(\s|<)",
|
||||
r"(\s|>)(RETARD)|(RETARD)(\s|<)"]
|
||||
|
||||
result = create_variations_slur_regex("retard")
|
||||
|
||||
assert_that(result).is_length(3).contains_only(*expected)
|
||||
|
||||
|
||||
def test_create_variations_slur_regex_multiple_word():
|
||||
expected = [r"(\s|>)(kill yourself)|(kill yourself)(\s|<)",
|
||||
r"(\s|>)(Kill Yourself)|(Kill Yourself)(\s|<)",
|
||||
r"(\s|>)(Kill yourself)|(Kill yourself)(\s|<)",
|
||||
r"(\s|>)(KILL YOURSELF)|(KILL YOURSELF)(\s|<)"]
|
||||
result = create_variations_slur_regex("kill yourself")
|
||||
|
||||
assert_that(result).is_length(4).contains_only(*expected)
|
||||
|
||||
|
||||
@patch("files.helpers.word_censor.SLURS", {
|
||||
"tranny": "πππ",
|
||||
"kill yourself": "keep yourself safe",
|
||||
"faggot": "cute twink",
|
||||
})
|
||||
def test_create_replace_map():
|
||||
expected = {
|
||||
"tranny": "πππ",
|
||||
"Tranny": "πππ",
|
||||
"TRANNY": "πππ",
|
||||
"kill yourself": "keep yourself safe",
|
||||
"Kill yourself": "Keep yourself safe",
|
||||
"KILL YOURSELF": "KEEP YOURSELF SAFE",
|
||||
"Kill Yourself": "Keep Yourself Safe",
|
||||
"faggot": "cute twink",
|
||||
"Faggot": "Cute twink",
|
||||
"FAGGOT": "CUTE TWINK",
|
||||
}
|
||||
result = create_replace_map()
|
||||
|
||||
assert_that(result).is_equal_to(expected)
|
||||
|
||||
|
||||
@patch("files.helpers.word_censor.REPLACE_MAP", {'retard': 'r-slur'})
|
||||
def test_sub_matcher():
|
||||
match = re.search(r"(\s|>)(retard)|(retard)(\s|<)", "<p>retard</p>")
|
||||
assert_that(sub_matcher(match)).is_equal_to(">r-slur")
|
||||
|
||||
match = re.search(r"(\s|>)(retard)|(retard)(\s|<)", "<p>noretard</p>")
|
||||
assert_that(sub_matcher(match)).is_equal_to("r-slur<")
|
||||
|
||||
|
||||
@patch("files.helpers.word_censor.SLURS", {'retard': 'r-slur', 'manlet': 'little king'})
|
||||
def test_censor_slurs():
|
||||
word_censor.REPLACE_MAP = create_replace_map()
|
||||
|
||||
assert_that(censor_slurs(None, "<p>retard</p>")).is_equal_to("<p>r-slur</p>")
|
||||
assert_that(censor_slurs(None, "<p>preretard</p>")).is_equal_to("<p>prer-slur</p>")
|
||||
assert_that(censor_slurs(None, "that is Retarded like")).is_equal_to("that is R-slured like")
|
||||
assert_that(censor_slurs(None, "that is SUPERRETARD like")).is_equal_to("that is SUPERR-SLUR like")
|
||||
assert_that(censor_slurs(None, "<p>Manlets get out!</p>")).is_equal_to("<p>Little kings get out!</p>")
|
||||
assert_that(censor_slurs(None, '... "retard" ...')).is_equal_to('... "retard" ...')
|
||||
assert_that(censor_slurs(None, '... ReTaRd ...')).is_equal_to('... ReTaRd ...')
|
||||
assert_that(censor_slurs(None, '... aretarded ...')).is_equal_to('... aretarded ...')
|
||||
assert_that(censor_slurs(None, "LLM is a manlet hehe")).is_equal_to("LLM is a little king hehe")
|
||||
assert_that(censor_slurs(None, "LLM is :marseycapitalistmanlet: hehe")) \
|
||||
.is_equal_to("LLM is :marseycapitalistmanlet: hehe")
|
Loadingβ¦
Reference in New Issue