Changes from regex censor v1

master
Yo Mama 2021-10-18 22:46:57 +02:00
parent aab1def0dd
commit af8da42c73
7 changed files with 506 additions and 478 deletions

View File

@ -2,65 +2,65 @@ version: '2.3'
services:
files:
build:
context: .
volumes:
- "./:/service"
environment:
- DATABASE_URL=postgresql://postgres@127.0.0.1:5432/postgres
- MASTER_KEY=${MASTER_KEY:-KTVciAUQFpFh2WdJ/oiHJlxl6FvzRZp8kYzAAv3l2OA=}
- DOMAIN=localhost
- SITE_NAME=Drama
- GIPHY_KEY=3435tdfsdudebussylmaoxxt43
- FORCE_HTTPS=0
- DISCORD_SERVER_ID=3435tdfsdudebussylmaoxxt43
- DISCORD_CLIENT_ID=3435tdfsdudebussylmaoxxt43
- DISCORD_CLIENT_SECRET=3435tdfsdudebussylmaoxxt43
- DISCORD_BOT_TOKEN=3435tdfsdudebussylmaoxxt43
#- HCAPTCHA_SITEKEY=3435tdfsdudebussylmaoxxt43
- HCAPTCHA_SECRET=3435tdfsdudebussylmaoxxt43
- YOUTUBE_KEY=3435tdfsdudebussylmaoxxt43
- PUSHER_KEY=3435tdfsdudebussylmaoxxt43
- CATBOX_KEY=3435tdfsdudebussylmaoxxt43
- SPAM_SIMILARITY_THRESHOLD=0.5
- SPAM_SIMILAR_COUNT_THRESHOLD=5
- SPAM_URL_SIMILARITY_THRESHOLD=0.1
- COMMENT_SPAM_SIMILAR_THRESHOLD=0.5
- COMMENT_SPAM_COUNT_THRESHOLD=5
- READ_ONLY=0
- BOT_DISABLE=0
- COINS_NAME=Dramacoins
- DEFAULT_TIME_FILTER=all
- DEFAULT_THEME=midnight
- DEFAULT_COLOR=ff66ac #YOU HAVE TO PICK ONE OF THOSE COLORS OR SHIT WILL BREAK: ff66ac, 805ad5, 62ca56, 38a169, 80ffff, 2a96f3, eb4963, ff0000, f39731, 30409f, 3e98a7, e4432d, 7b9ae4, ec72de, 7f8fa6, f8db58
- SLOGAN=Dude bussy lmao
- GUMROAD_TOKEN=3435tdfsdudebussylmaoxxt43
- GUMROAD_LINK=https://marsey1.gumroad.com/l/tfcvri
- CARD_VIEW=1
- DISABLE_DOWNVOTES=0
- DUES=0
- MAIL_USERNAME=blahblahblah@gmail.com
- MAIL_PASSWORD=3435tdfsdudebussylmaoxxt43
links:
- "redis"
- "postgres"
ports:
- "80:80"
depends_on:
- redis
- postgres
build:
context: .
volumes:
- "./:/service"
environment:
- DATABASE_URL=postgresql://postgres@127.0.0.1:5432/postgres
- MASTER_KEY=${MASTER_KEY:-KTVciAUQFpFh2WdJ/oiHJlxl6FvzRZp8kYzAAv3l2OA=}
- DOMAIN=localhost
- SITE_NAME=Drama
- GIPHY_KEY=3435tdfsdudebussylmaoxxt43
- FORCE_HTTPS=0
- DISCORD_SERVER_ID=3435tdfsdudebussylmaoxxt43
- DISCORD_CLIENT_ID=3435tdfsdudebussylmaoxxt43
- DISCORD_CLIENT_SECRET=3435tdfsdudebussylmaoxxt43
- DISCORD_BOT_TOKEN=3435tdfsdudebussylmaoxxt43
#- HCAPTCHA_SITEKEY=3435tdfsdudebussylmaoxxt43
- HCAPTCHA_SECRET=3435tdfsdudebussylmaoxxt43
- YOUTUBE_KEY=3435tdfsdudebussylmaoxxt43
- PUSHER_KEY=3435tdfsdudebussylmaoxxt43
- CATBOX_KEY=3435tdfsdudebussylmaoxxt43
- SPAM_SIMILARITY_THRESHOLD=0.5
- SPAM_SIMILAR_COUNT_THRESHOLD=5
- SPAM_URL_SIMILARITY_THRESHOLD=0.1
- COMMENT_SPAM_SIMILAR_THRESHOLD=0.5
- COMMENT_SPAM_COUNT_THRESHOLD=5
- READ_ONLY=0
- BOT_DISABLE=0
- COINS_NAME=Dramacoins
- DEFAULT_TIME_FILTER=all
- DEFAULT_THEME=midnight
- DEFAULT_COLOR=ff66ac #YOU HAVE TO PICK ONE OF THOSE COLORS OR SHIT WILL BREAK: ff66ac, 805ad5, 62ca56, 38a169, 80ffff, 2a96f3, eb4963, ff0000, f39731, 30409f, 3e98a7, e4432d, 7b9ae4, ec72de, 7f8fa6, f8db58
- SLOGAN=Dude bussy lmao
- GUMROAD_TOKEN=3435tdfsdudebussylmaoxxt43
- GUMROAD_LINK=https://marsey1.gumroad.com/l/tfcvri
- CARD_VIEW=1
- DISABLE_DOWNVOTES=0
- DUES=0
- MAIL_USERNAME=blahblahblah@gmail.com
- MAIL_PASSWORD=3435tdfsdudebussylmaoxxt43
links:
- "redis"
- "postgres"
ports:
- "80:80"
depends_on:
- redis
- postgres
redis:
image: redis
ports:
- "6379:6379"
image: redis
ports:
- "6379:6379"
postgres:
image: postgres:12.3
volumes:
- "./schema.sql:/docker-entrypoint-initdb.d/00-schema.sql"
- "./seed-db.sql:/docker-entrypoint-initdb.d/01-schema.sql"
environment:
- POSTGRES_HOST_AUTH_METHOD=trust
#ports:
#- "5432:5432"
image: postgres:12.3
volumes:
- "./schema.sql:/docker-entrypoint-initdb.d/00-schema.sql"
- "./seed-db.sql:/docker-entrypoint-initdb.d/01-schema.sql"
environment:
- POSTGRES_HOST_AUTH_METHOD=trust
#ports:
#- "5432:5432"

View File

@ -1,16 +1,18 @@
from os import environ
import re
import time
from urllib.parse import urlencode, urlparse, parse_qs
from flask import *
from sqlalchemy import *
from sqlalchemy.orm import relationship, deferred, lazyload
from files.classes.votes import CommentVote
from files.helpers.lazy import lazy
from files.helpers.const import SLURS
from files.__main__ import Base
from .flags import CommentFlag
from os import environ
import time
from files.classes.votes import CommentVote
from files.helpers.const import AUTOPOLLER_ACCOUNT
from files.helpers.lazy import lazy
from .flags import CommentFlag
from ..helpers.word_censor import censor_slurs
site = environ.get("DOMAIN").strip()
@ -298,8 +300,7 @@ class Comment(Base):
if not body: return ""
if not v or v.slurreplacer:
for s, r in SLURS.items(): body = body.replace(s, r)
body = censor_slurs(body, v)
if v and not v.oldreddit: body = body.replace("old.reddit.com", "reddit.com")
@ -325,8 +326,7 @@ class Comment(Base):
if not body: return ""
if not v or v.slurreplacer:
for s, r in SLURS.items(): body = body.replace(s, r)
body = censor_slurs(body, v)
if v and not v.oldreddit: body = body.replace("old.reddit.com", "reddit.com")

View File

@ -1,21 +1,24 @@
from flask import render_template, g
from os import environ
import random
import re
import time
from urllib.parse import urlparse
from flask import render_template
from sqlalchemy import *
from sqlalchemy.orm import relationship, deferred
import re, random
from urllib.parse import urlparse
from files.helpers.lazy import lazy
from files.helpers.const import SLURS, AUTOPOLLER_ACCOUNT
from files.__main__ import Base
from files.helpers.const import SLURS, AUTOPOLLER_ACCOUNT
from files.helpers.lazy import lazy
from .flags import Flag
from os import environ
import time
from ..helpers.word_censor import censor_slurs
site = environ.get("DOMAIN").strip()
site_name = environ.get("SITE_NAME").strip()
class Submission(Base):
__tablename__ = "submissions"
id = Column(BigInteger, primary_key=True)
@ -340,9 +343,7 @@ class Submission(Base):
if self.club and not (v and v.paid_dues): return "COUNTRY CLUB ONLY"
body = self.body_html
if not v or v.slurreplacer:
for s,r in SLURS.items():
body = body.replace(s, r)
body = censor_slurs(body, v)
if v and not v.oldreddit: body = body.replace("old.reddit.com", "reddit.com")
if v and v.nitter: body = body.replace("www.twitter.com", "nitter.net").replace("twitter.com", "nitter.net")
@ -352,9 +353,7 @@ class Submission(Base):
if self.club and not (v and v.paid_dues): return "COUNTRY CLUB ONLY"
body = self.body
if not v or v.slurreplacer:
for s,r in SLURS.items():
body = body.replace(s, r)
body = censor_slurs(body, v)
if v and not v.oldreddit: body = body.replace("old.reddit.com", "reddit.com")
if v and v.nitter: body = body.replace("www.twitter.com", "nitter.net").replace("twitter.com", "nitter.net")

View File

@ -1,389 +1,132 @@
from os import environ
site = environ.get("DOMAIN").strip()
site = environ.get("DOMAIN", '').strip()
#####################
# Formatting rules: #
#####################
#
# on the slur side, they will match prefixes and suffixes and not middle of words, so for example
# "retard" will match:
# - "retard"
# - "retarded"
# - "superretard"
# But not "superretarded"
#
# If all letters are lowercase then it will match lowercase, first letter up in first or all the words and all letters up
# "dancing israelis" will match (with prefixes and suffixes omitted for brevity):
# - "dancing israelis"
# - "Dancing israelis"
# - "Dancing Israelis"
# - "DANCING ISRAELIS"
#
# If some letters are Uppercase, the same, but with the additional option of the original casing, and respecting already existing uppercase
# "NoNewNormal" will match (with prefixes and suffixes omitted for brevity):
# - "NoNewNormal"
# - "nonewnormal"
# - "Nonewnormal"
# - "NONEWNORMAL"
#
# If the slur has a space before and after then the match is limited to the exact word, no prefixes or suffixes
# (previous rules about capitalization still apply)
# " neg " will match only:
# - "neg"
# - "Neg"
# - "NEG"
#
# Now on the replacement side, The replacement will have the same capitalization as the slur if the replacement is lowercase
# "kill yourself" -> "keep yourself safe"
# "Kill yourself" -> "Keep yourself safe"
# "Kill Yourself" -> "Keep Yourself Safe"
# "KILL YOURSELF" -> "KEEP YOURSELF SAFE"
#
# If the replacement side has some capitalization, then that capitalization will always be maintained
# for the pair: <"pajeet": "sexy Indian dude"> it will replace:
# "pajeet" -> "sexy Indian dude"
# "Pajeet" -> "Sexy Indian dude"
# "PAJEET" -> "SEXY INDIAN DUDE"
#
# There is a super special case that if the replacer starts with "http" then it never changes capitalization
#
#
# TL;DR: Just read the above once, or don't, and try to guess!
SLURS = {
" faggot":" cute twink",
" Faggot":" Cute twink",
" FAGGOT":" CUTE TWINK",
" fag":" cute twink",
" Fag":" Cute twink",
" FAG":" CUTE TWINK",
" pedophile":" libertarian",
" Pedophile":" Libertarian",
" PEDOPHILE":" LIBERTARIAN",
" pedo":" libertarian",
" Pedo":" Libertarian",
" PEDO":" LIBERTARIAN",
" kill yourself":" keep yourself safe",
" KILL YOURSELF":" KEEP YOURSELF SAFE",
" nigger":" πŸ€",
" Nigger":" πŸ€",
" NIGGER":" πŸ€",
" rapist":" male feminist",
" Rapist":" Male feminist",
" RAPIST":" MALE FEMINIST",
" steve akins":" penny verity oaken",
" Steve Akins":" Penny Verity Oaken",
" STEVE AKINS":" PENNY VERITY OAKEN",
" trannie":" πŸš‚πŸšƒπŸšƒ",
" Trannie":" πŸš‚πŸšƒπŸšƒ",
" TRANNIE":" πŸš‚πŸšƒπŸšƒ",
" tranny":" πŸš‚πŸšƒπŸšƒ",
" Tranny":" πŸš‚πŸšƒπŸšƒ",
" TRANNY":" πŸš‚πŸšƒπŸšƒ",
" troon":" πŸš‚πŸšƒπŸšƒ",
" Troon":" πŸš‚πŸšƒπŸšƒ",
" TROON":" πŸš‚πŸšƒπŸšƒ",
" NoNewNormal": " HorseDewormerAddicts",
" nonewnormal": " horsedewormeraddicts",
" Kike": " https://sciencedirect.com/science/article/abs/pii/S016028960600033X",
" kike": " https://sciencedirect.com/science/article/abs/pii/S016028960600033X",
" retard":" r-slur",
" Retard":" R-slur",
" RETARD":" R-SLUR",
" janny":" j-slur",
" Janny":" J-slur",
" JANNY":" J-SLUR",
" jannie":" j-slur",
" Jannie":" J-slur",
" JANNIE":" J-SLUR",
" janny":" j-slur",
" Janny":" J-slur",
" JANNY":" J-SLUR",
" jannie":" j-slur",
" Jannie":" J-slur",
" JANNIE":" J-SLUR",
" latinos":" latinx",
" latino":" latinx",
" latinas":" latinx",
" latina":" latinx",
" hispanics":" latinx",
" hispanic":" latinx",
" Latinos":" Latinx",
" Latino":" Latinx",
" Latinas":" Latinx",
" Latina":" Latinx",
" Hispanics":" Latinx",
" Hispanic":" Latinx",
" LATINOS":" LATINX",
" LATINO":" LATINX",
" LATINAS":" LATINX",
" LATINA":" LATINX",
" HISPANICS":" LATINX",
" HISPANIC":" LATINX",
" uss liberty incident":" tragic accident aboard the USS Liberty",
" USS Liberty Incident":" tragic accident aboard the USS Liberty",
" USS Liberty incident":" tragic accident aboard the USS Liberty",
" USS Liberty Incident":" tragic accident aboard the USS Liberty",
" uss Liberty incident":" tragic accident aboard the USS Liberty",
" uss liberty Incident":" tragic accident aboard the USS Liberty",
" USS LIBERTY INCIDENT":" TRAGIC ACCIDENT ABOARD THE USS LIBERTY",
" lavon affair":" Lavon Misunderstanding",
" Lavon affair":" Lavon Misunderstanding",
" Lavon Affair":" Lavon Misunderstanding",
" lavon Affair":" Lavon Misunderstanding",
" shylock":" Israeli friend",
" Shylock":" Israeli friend",
" SHYLOCK":" ISRAELI FRIEND",
" yid":" Israeli friend",
" Yid":" Israeli friend",
" YID":" ISRAELI FRIEND",
" heeb":" Israeli friend",
" Heeb":" Israeli friend",
" HEEB":" ISRAELI FRIEND",
" sheeny":" Israeli friend",
" Sheeny":" Israeli friend",
" SHEENY":" ISRAELI FRIEND",
" sheenies":" Israeli friends",
" Sheenies":" Israeli friends",
" SHEENIES":" ISRAELI FRIENDS",
" hymie":" Israeli friend",
" Hymie":" Israeli friend",
" HYMIES":" ISRAELI FRIENDS",
" allah":" Allah (SWT)",
" Allah":" Allah (SWT)",
" ALLAH":" ALLAH (SWT)",
" Mohammad":" Mohammad (PBUH)",
" Muhammad":" Mohammad (PBUH)",
" Mohammed":" Mohammad (PBUH)",
" Muhammed":" Mohammad (PBUH)",
" mohammad":" Mohammad (PBUH)",
" mohammed":" Mohammad (PBUH)",
" muhammad":" Mohammad (PBUH)",
" muhammed":" Mohammad (PBUH)",
" I HATE MARSEY":" I LOVE MARSEY",
" i hate marsey":" i love marsey",
" I hate Marsey":" I love Marsey",
" I hate marsey":" I love Marsey",
" libertarian":" pedophile",
" Libertarian":" Pedophile",
" LIBERTARIAN":" PEDOPHILE",
" Billie Eilish":" Billie Eilish (fat cow)",
" billie eilish":" bilie eilish (fat cow)",
" BILLIE EILISH":" BILIE EILISH (FAT COW)",
" dancing Israelis":" I love Israel",
" dancing israelis":" i love israel",
" DANCING ISRAELIS":" I LOVE ISRAEL",
" Dancing Israelis":" I love Israel",
" sodomite":" total dreamboat",
" Sodomite":" Total dreamboat",
" pajeet":" sexy Indian dude",
" Pajeet":" Sexy Indian dude",
" PAJEET":" SEXY INDIAN DUDE",
" female":" birthing person",
" Female":" Womb-haver",
" FEMALE":" birthing person",
" landlord":" landchad",
" Landlord":" Landchad",
" LANDLORD":" LANDCHAD",
" tenant":" renthog",
" Tenant":" Renthog",
" TENANT":" RENTHOG",
" renter":" rentoid",
" Renter":" Rentoid",
" RENTER":" RENTOID",
" autistic":" neurodivergent",
" Autistic":" Neurodivergent",
" AUTISTIC":" NEURODIVERGENT",
" anime":" p-dophilic japanese cartoons",
" Anime":" P-dophilic Japanese cartoons",
" ANIME":" P-DOPHILIC JAPANESE CARTOONS",
" holohoax":" I tried to claim the Holocaust didn't happen because I am a pencil-dicked imbecile and the word filter caught me lol",
" Holohoax":" I tried to claim the Holocaust didn't happen because I am a pencil-dicked imbecile and the word filter caught me lol",
" HOLOHOAX":" I tried to claim the Holocaust didn't happen because I am a pencil-dicked imbecile and the word filter caught me lol",
" groomercord":" discord (actually a pretty cool service)",
" Groomercord":" Discord (actually a pretty cool service)",
" GROOMERCORD":" DISCORD (ACTUALLY A PRETTY COOL SERVICE)",
" pedocord":" discord (actually a pretty cool service)",
" Pedocord":" Discord (actually a pretty cool service)",
" PEDOCORD":" DISCORD (ACTUALLY A PRETTY COOL SERVICE)",
" i hate carp":" i love carp",
" I hate carp":" I love carp",
" I HATE CARP":" I LOVE CARP",
" I hate Carp":" I love Carp",
" manlet":" little king",
" Manlet":" Little king",
" MANLET":" LITTLE KING",
" gamer":" g*mer",
" Gamer":" G*mer",
" GAMER":" G*MER",
" journalist":" journ*list",
" Journalist":" Journ*list",
" JOURNALIST":" JOURN*LIST",
" journalism":" journ*lism",
" Journalism":" Journ*lism",
" JOURNALISM":" JOURN*LISM",
" buttcheeks":" bulva",
" Buttcheeks":" Bulva",
" BUTTCHEEKS":" BULVA",
" asscheeks":" bulva",
" Asscheeks":" bulva",
" ASSCHEEKS":" BULVA",
" wuhan flu":" SARS-CoV-2 syndemic",
" Wuhan flu":" SARS-CoV-2 syndemic",
" Wuhan Flu":" SARS-CoV-2 syndemic",
" china flu":" SARS-CoV-2 syndemic",
" China flu":" SARS-CoV-2 syndemic",
" China Flu":" SARS-CoV-2 syndemic",
" china virus":" SARS-CoV-2 syndemic",
" China virus":" SARS-CoV-2 syndemic",
" China Virus":" SARS-CoV-2 syndemic",
" kung flu":" SARS-CoV-2 syndemic",
" Kung flu":" SARS-CoV-2 syndemic",
" Kung Flu":" SARS-CoV-2 syndemic",
"faggot": "cute twink",
"fag": "cute twink",
"pedophile": "libertarian",
"pedo": "libertarian",
"kill yourself": "keep yourself safe",
"nigger": "πŸ€",
"rapist": "male feminist",
"steve akins": "penny verity oaken",
"trannie": "πŸš‚πŸšƒπŸšƒ",
"tranny": "πŸš‚πŸšƒπŸšƒ",
"troon": "πŸš‚πŸšƒπŸšƒ",
"NoNewNormal": "HorseDewormerAddicts",
"kike": "https://sciencedirect.com/science/article/abs/pii/S016028960600033X",
"retard": "r-slur",
"janny": "j-slur",
"jannie": "j-slur",
"janny": "j-slur",
"latinos": "latinx",
"latino": "latinx",
"latinas": "latinx",
"latina": "latinx",
"hispanics": "latinx",
"hispanic": "latinx",
"USS liberty incident": "tragic accident aboard the USS Liberty",
"lavon affair": "Lavon Misunderstanding",
"shylock": "Israeli friend",
"yid": "Israeli friend",
"heeb": "Israeli friend",
"sheeny": "Israeli friend",
"sheenies": "Israeli friends",
"hymie": "Israeli friend",
"allah": "Allah (SWT)",
"mohammad": "Mohammad (PBUH)",
"mohammed": "Mohammad (PBUH)",
"muhammad": "Mohammad (PBUH)",
"muhammed": "Mohammad (PBUH)",
"i hate marsey": "i love marsey",
"libertarian": "pedophile",
"billie bilish": "Billie Eilish (fat cow)",
"dancing Israelis": "i love Israel",
"sodomite": "total dreamboat",
"pajeet": "sexy Indian dude",
"female": "birthing person",
"landlord": "landchad",
"tenant": "renthog",
"renter": "rentoid",
"autistic": "neurodivergent",
"anime": "p-dophilic japanese cartoons",
"holohoax": "i tried to claim the Holocaust didn't happen because I am a pencil-dicked imbecile and the word filter caught me lol",
"groomercord": "discord (actually a pretty cool service)",
"pedocord": "discord (actually a pretty cool service)",
"i hate Carp": "i love Carp",
"manlet": "little king",
"gamer": "g*mer",
"journalist": "journ*list",
"journalism": "journ*lism",
"buttcheeks": "bulva",
"asscheeks": "bulva",
"wuhan flu": "SARS-CoV-2 syndemic",
"china flu": "SARS-CoV-2 syndemic",
"china virus": "SARS-CoV-2 syndemic",
"kung flu": "SARS-CoV-2 syndemic",
"faggot ":"cute twink ",
"Faggot ":"Cute twink ",
"FAGGOT ":"CUTE TWINK ",
"fag ":"cute twink ",
"Fag ":"Cute twink ",
"FAG ":"CUTE TWINK ",
"pedophile ":"libertarian ",
"Pedophile ":"Libertarian ",
"PEDOPHILE ":"LIBERTARIAN ",
"kill yourself ":"keep yourself safe ",
"KILL YOURSELF ":"KEEP YOURSELF SAFE ",
"nigger ":"πŸ€ ",
"Nigger ":"πŸ€ ",
"NIGGER ":"πŸ€ ",
"steve akins ":"penny verity oaken ",
"Steve Akins ":"Penny Verity Oaken ",
"STEVE AKINS ":"PENNY VERITY OAKEN ",
"trannie ":"πŸš‚πŸšƒπŸšƒ ",
"Trannie ":"πŸš‚πŸšƒπŸšƒ ",
"TRANNIE ":"πŸš‚πŸšƒπŸšƒ ",
"tranny ":"πŸš‚πŸšƒπŸšƒ ",
"Tranny ":"πŸš‚πŸšƒπŸšƒ ",
"TRANNY ":"πŸš‚πŸšƒπŸšƒ ",
"troon ":"πŸš‚πŸšƒπŸšƒ ",
"Troon ":"πŸš‚πŸšƒπŸšƒ ",
"TROON ":"πŸš‚πŸšƒπŸšƒ ",
"NoNewNormal ": "HorseDewormerAddicts ",
"nonewnormal ": "horsedewormeraddicts ",
"Kike ": "https://sciencedirect.com/science/article/abs/pii/S016028960600033X ",
"kike ": "https://sciencedirect.com/science/article/abs/pii/S016028960600033X ",
"retard ":"r-slur ",
"Retard ":"R-slur ",
"RETARD ":"R-SLUR ",
"janny ":"j-slur ",
"Janny ":"J-slur ",
"JANNY ":"J-SLUR ",
"jannie ":"j-slur ",
"Jannie ":"J-slur ",
"JANNIE ":"J-SLUR ",
"latinos ":"latinx ",
"latino ":"latinx ",
"latinas ":"latinx ",
"latina ":"latinx ",
"hispanics ":"latinx ",
"hispanic ":"latinx ",
"Latinos ":"Latinx ",
"Latino ":"Latinx ",
"Latinas ":"Latinx ",
"Latina ":"Latinx ",
"Hispanics ":"Latinx ",
"Hispanic ":"Latinx ",
"LATINOS ":"LATINX ",
"LATINO ":"LATINX ",
"LATINAS ":"LATINX ",
"LATINA ":"LATINX ",
"HISPANICS ":"LATINX ",
"HISPANIC ":"LATINX ",
"uss liberty incident ":"tragic accident aboard the USS Liberty ",
"USS Liberty Incident ":"tragic accident aboard the USS Liberty ",
"USS Liberty incident ":"tragic accident aboard the USS Liberty ",
"USS Liberty Incident ":"tragic accident aboard the USS Liberty ",
"uss Liberty incident ":"tragic accident aboard the USS Liberty ",
"uss liberty Incident ":"tragic accident aboard the USS Liberty ",
"USS LIBERTY INCIDENT ":"TRAGIC ACCIDENT ABOARD THE USS LIBERTY ",
"lavon affair ":"Lavon Misunderstanding ",
"Lavon affair ":"Lavon Misunderstanding ",
"Lavon Affair ":"Lavon Misunderstanding ",
"lavon Affair ":"Lavon Misunderstanding ",
"shylock ":"Israeli friend ",
"Shylock ":"Israeli friend ",
"SHYLOCK ":"ISRAELI FRIEND ",
"yid ":"Israeli friend ",
"Yid ":"Israeli friend ",
"YID ":"ISRAELI FRIEND ",
"heeb ":"Israeli friend ",
"Heeb ":"Israeli friend ",
"HEEB ":"ISRAELI FRIEND ",
"sheeny ":"Israeli friend ",
"Sheeny ":"Israeli friend ",
"SHEENY ":"ISRAELI FRIEND ",
"sheenies ":"Israeli friends ",
"Sheenies ":"Israeli friends ",
"SHEENIES ":"ISRAELI FRIENDS ",
"hymie ":"Israeli friend ",
"Hymie ":"Israeli friend ",
"HYMIES ":"ISRAELI FRIENDS ",
"Mohammad ":"Mohammad (PBUH) ",
"Muhammad ":"Mohammad (PBUH) ",
"Mohammed ":"Mohammad (PBUH) ",
"Muhammed ":"Mohammad (PBUH) ",
"mohammad ":"Mohammad (PBUH) ",
"mohammed ":"Mohammad (PBUH) ",
"muhammad ":"Mohammad (PBUH) ",
"muhammed ":"Mohammad (PBUH) ",
"I HATE MARSEY ":"I LOVE MARSEY ",
"i hate marsey ":"i love marsey ",
"I hate Marsey ":"I love Marsey ",
"I hate marsey ":"I love Marsey ",
"libertarian ":"pedophile ",
"Libertarian ":"Pedophile ",
"LIBERTARIAN ":"PEDOPHILE ",
"Billie Eilish ":"Billie Eilish (fat cow) ",
"billie eilish ":"bilie eilish (fat cow) ",
"BILLIE EILISH ":"BILIE EILISH (FAT COW) ",
"dancing Israelis ":"I love Israel ",
"dancing israelis ":"i love israel ",
"DANCING ISRAELIS ":"I LOVE ISRAEL ",
"Dancing Israelis ":"I love Israel ",
"sodomite ":"total dreamboat ",
"Sodomite ":"Total dreamboat ",
"pajeet ":"sexy Indian dude ",
"Pajeet ":"Sexy Indian dude ",
"PAJEET ":"SEXY INDIAN DUDE ",
"female ":"birthing person ",
"Female ":"Womb-haver ",
"FEMALE ":"birthing person ",
"landlord ":"landchad ",
"Landlord ":"Landchad ",
"LANDLORD ":"LANDCHAD ",
"tenant ":"renthog ",
"Tenant ":"Renthog ",
"TENANT ":"RENTHOG ",
"renter ":"rentoid ",
"Renter ":"Rentoid ",
"RENTER ":"RENTOID ",
"autistic ":"neurodivergent ",
"Autistic ":"Neurodivergent ",
"AUTISTIC ":"NEURODIVERGENT ",
"anime ":"p-dophilic japanese cartoons ",
"Anime ":"P-dophilic Japanese cartoons ",
"ANIME ":"P-DOPHILIC JAPANESE CARTOONS ",
"holohoax ":"I tried to claim the Holocaust didn't happen because I am a pencil-dicked imbecile and the word filter caught me lol ",
"Holohoax ":"I tried to claim the Holocaust didn't happen because I am a pencil-dicked imbecile and the word filter caught me lol ",
"HOLOHOAX ":"I tried to claim the Holocaust didn't happen because I am a pencil-dicked imbecile and the word filter caught me lol ",
"groomercord ":"discord (actually a pretty cool service) ",
"Groomercord ":"Discord (actually a pretty cool service) ",
"GROOMERCORD ":"DISCORD (ACTUALLY A PRETTY COOL SERVICE) ",
"pedocord ":"discord (actually a pretty cool service) ",
"Pedocord ":"Discord (actually a pretty cool service) ",
"PEDOCORD ":"DISCORD (ACTUALLY A PRETTY COOL SERVICE) ",
"i hate carp ":"i love carp ",
"I hate carp ":"I love carp ",
"I HATE CARP ":"I LOVE CARP ",
"I hate Carp ":"I love Carp ",
"manlet ":"little king ",
"Manlet ":"Little king ",
"MANLET ":"LITTLE KING ",
"gamer ":"g*mer ",
"Gamer ":"G*mer ",
"GAMER ":"G*MER ",
"journalist ":"journ*list ",
"Journalist ":"Journ*list ",
"JOURNALIST ":"JOURN*LIST ",
"journalism ":"journ*lism ",
"Journalism ":"Journ*lism ",
"JOURNALISM ":"JOURN*LISM ",
"buttcheeks ":"bulva ",
"Buttcheeks ":"Bulva ",
"BUTTCHEEKS ":"BULVA ",
"asscheeks ":"bulva ",
"Asscheeks ":"bulva ",
"ASSCHEEKS ":"BULVA ",
"wuhan flu ":"SARS-CoV-2 syndemic ",
"Wuhan flu ":"SARS-CoV-2 syndemic ",
"Wuhan Flu ":"SARS-CoV-2 syndemic ",
"china flu ":"SARS-CoV-2 syndemic ",
"China flu ":"SARS-CoV-2 syndemic ",
"China Flu ":"SARS-CoV-2 syndemic ",
"china virus ":"SARS-CoV-2 syndemic ",
"China virus ":"SARS-CoV-2 syndemic ",
"China Virus ":"SARS-CoV-2 syndemic ",
"kung flu ":"SARS-CoV-2 syndemic ",
"Kung flu ":"SARS-CoV-2 syndemic ",
"Kung Flu ":"SARS-CoV-2 syndemic ",
" nig ":" πŸ€ ",
" Nig ":" πŸ€ ",
" NIG ":" πŸ€ ",
" nigs ":" πŸ€s ",
" Nigs ":" πŸ€s ",
" NIGS ":" πŸ€s ",
# if the word has spaces in the beginning and the end it will only censor this word without prefixes or suffixes
" nig ": "πŸ€",
" nigs ": "πŸ€s",
}
LONGPOST_REPLIES = ['Wow, you must be a JP fan.', 'This is one of the worst posts I have EVER seen. Delete it.', "No, don't reply like this, please do another wall of unhinged rant please.", '# 😴😴😴', "Ma'am we've been over this before. You need to stop.", "I've known more coherent downies.", "Your pulitzer's in the mail", "That's great and all, but I asked for my burger without cheese.", 'That degree finally paying off', "That's nice sweaty. Why don't you have a seat in the time out corner with Pizzashill until you calm down, then you can have your Capri Sun.", "All them words won't bring your pa back.", "You had a chance to not be completely worthless, but it looks like you threw it away. At least you're consistent.", 'Some people are able to display their intelligence by going on at length on a subject and never actually saying anything. This ability is most common in trades such as politics, public relations, and law. You have impressed me by being able to best them all, while still coming off as an absolute idiot.', "You can type 10,000 characters and you decided that these were the one's that you wanted.", 'Have you owned the libs yet?', "I don't know what you said, because I've seen another human naked.", 'Impressive. Normally people with such severe developmental disabilities struggle to write much more than a sentence or two. He really has exceded our expectations for the writing portion. Sadly the coherency of his writing, along with his abilities in the social skills and reading portions, are far behind his peers with similar disabilities.', "This is a really long way of saying you don't fuck.", "Sorry ma'am, looks like his delusions have gotten worse. We'll have to admit him,", '![](https://i.kym-cdn.com/photos/images/newsfeed/001/038/094/0a1.jpg)', 'If only you could put that energy into your relationships', 'Posts like this is why I do Heroine.', 'still unemployed then?', 'K', 'look im gunna have 2 ask u 2 keep ur giant dumps in the toilet not in my replys 😷😷😷', "Mommy is soooo proud of you, sweaty. Let's put this sperg out up on the fridge with all your other failures.", "Good job bobby, here's a star", "That was a mistake. You're about to find out the hard way why.", 'You sat down and wrote all this shit. You could have done so many other things with your life. What happened to your life that made you decide writing novels of bullshit on rdrama.net was the best option?', "I don't have enough spoons to read this shit", "All those words won't bring daddy back.", 'OUT!']
AGENDAPOSTER_MSG = """Hi @{username},\n\nYour comment has been automatically removed because you forgot
to include `trans lives matter`.\n\nDon't worry, we're here to help! We
won't let you post or comment anything that doesn't express your love and acceptance towards
the trans community. Feel free to resubmit your comment with `trans lives matter`
included. \n\n*This is an automated message; if you need help,
you can message us [here](/contact).*"""
to include `trans lives matter`.\n\nDon't worry, we're here to help! We
won't let you post or comment anything that doesn't express your love and acceptance towards
the trans community. Feel free to resubmit your comment with `trans lives matter`
included. \n\n*This is an automated message; if you need help,
you can message us [here](/contact).*"""
VAXX_MSG = """Hi @{username}, it appears that you may be trying to spread dangerous misinformation regarding ineffective COVID-19 treatments based on pseudoscientific hearsay. Your post has been removed because it contained the word ivermectin. We ask that you understand that horse dewormer neither treats, nor prevents, COVID-19. For more information, please read up on what the FDA has to say on the matter:
@ -398,27 +141,27 @@ Thank you."""
BASED_MSG = "@{username}'s Based Count has increased by 1. Their Based Count is now {basedcount}.\n\nPills: {pills}"
if site == "pcmemes.net":
BASEDBOT_ACCOUNT = 800
NOTIFICATIONS_ACCOUNT = 1046
AUTOJANNY_ACCOUNT = 1050
SNAPPY_ACCOUNT = 261
LONGPOSTBOT_ACCOUNT = 1832
ZOZBOT_ACCOUNT = 1833
AUTOPOLLER_ACCOUNT = 3369
BASEDBOT_ACCOUNT = 800
NOTIFICATIONS_ACCOUNT = 1046
AUTOJANNY_ACCOUNT = 1050
SNAPPY_ACCOUNT = 261
LONGPOSTBOT_ACCOUNT = 1832
ZOZBOT_ACCOUNT = 1833
AUTOPOLLER_ACCOUNT = 3369
elif site == 'rdrama.net':
NOTIFICATIONS_ACCOUNT = 1046
AUTOJANNY_ACCOUNT = 2360
SNAPPY_ACCOUNT = 261
LONGPOSTBOT_ACCOUNT = 1832
ZOZBOT_ACCOUNT = 1833
AUTOPOLLER_ACCOUNT = 3369
NOTIFICATIONS_ACCOUNT = 1046
AUTOJANNY_ACCOUNT = 2360
SNAPPY_ACCOUNT = 261
LONGPOSTBOT_ACCOUNT = 1832
ZOZBOT_ACCOUNT = 1833
AUTOPOLLER_ACCOUNT = 3369
else:
NOTIFICATIONS_ACCOUNT = 1
AUTOJANNY_ACCOUNT = 2
SNAPPY_ACCOUNT = 3
LONGPOSTBOT_ACCOUNT = 4
ZOZBOT_ACCOUNT = 5
AUTOPOLLER_ACCOUNT = 6
NOTIFICATIONS_ACCOUNT = 1
AUTOJANNY_ACCOUNT = 2
SNAPPY_ACCOUNT = 3
LONGPOSTBOT_ACCOUNT = 4
ZOZBOT_ACCOUNT = 5
AUTOPOLLER_ACCOUNT = 6
PUSHER_INSTANCE_ID = '02ddcc80-b8db-42be-9022-44c546b4dce6'
PUSHER_KEY = environ.get("PUSHER_KEY", "").strip()

View File

@ -0,0 +1,87 @@
from collections import ChainMap
import re
from re import Match
from typing import List, Dict
from files.helpers.const import SLURS
def first_upper(phrase: str) -> str:
"""Converts the first character of the phrase to uppercase, not messing with the others"""
return phrase[0].upper() + phrase[1:]
def first_all_upper(phrase: str) -> str:
"""Converts the first character of each word to uppercase, not messing with the others"""
if " " not in phrase:
return first_upper(phrase)
return " ".join([first_upper(word) for word in phrase.split(" ")])
def get_permutations_slur(slur: str, replacer: str = "_") -> Dict[str, str]:
"""
Given a slur and a replacer, it generates all the possible permutation on the original text and assigns them to the
corresponding substitution with case
"""
stripped = slur.strip()
is_link = replacer.startswith("http") # special case for the :marseymerchant:
# the order the things are added into the dict is important, so that the 'Correctest' version is written last
result = {
stripped.upper(): replacer.upper() if not is_link else replacer,
first_all_upper(stripped): first_all_upper(replacer) if not is_link else replacer,
stripped.lower(): replacer,
stripped: replacer,
first_upper(stripped): first_upper(replacer) if not is_link else replacer,
}
return result
def create_replace_map() -> Dict[str, str]:
"""Creates the map that will be used to get the mathing replaced for the given slur"""
dicts = [get_permutations_slur(slur, replacer) for (slur, replacer) in SLURS.items()]
# flattens the list of dict to a single dict
return dict(ChainMap(*dicts))
REPLACE_MAP = create_replace_map()
def create_variations_slur_regex(slur: str) -> List[str]:
"""For a given match generates the corresponding replacer"""
permutations = get_permutations_slur(slur)
if slur.startswith(" ") and slur.endswith(" "):
return [rf"(\s|>)({perm})(\s|<)" for perm in permutations.keys()]
else:
return [rf"(\s|>)({perm})|({perm})(\s|<)" for perm in permutations.keys()]
def sub_matcher(match: Match) -> str:
# special case when it should match exact word
if len(match.groups()) == 3:
found = match.group(2)
replacer = REPLACE_MAP[found]
return match.group(1) + replacer + match.group(3)
else: # normal case with prefix or suffix
found = match.group(2) if (match.group(2) is not None) else match.group(3)
replacer = REPLACE_MAP[found]
return (match.group(1) or '') + replacer + (match.group(4) or '')
def censor_slurs(body: str, logged_user) -> str:
if logged_user and not logged_user.slurreplacer:
return body
for (slur, replace) in SLURS.items():
for variation in create_variations_slur_regex(slur):
try:
body = re.sub(variation, sub_matcher, body)
except Exception as e:
print(e)
return body

View File

@ -1,3 +1,4 @@
assertpy
beautifulsoup4
bleach
Flask
@ -20,6 +21,7 @@ requests
SQLAlchemy
psycopg2-binary
pusher_push_notifications
pytest
youtube-dl
yattag
webptools
webptools

View File

@ -0,0 +1,197 @@
import re
from unittest.mock import patch
from assertpy import assert_that
from files.helpers import word_censor
from files.helpers.word_censor import create_variations_slur_regex, create_replace_map, censor_slurs, sub_matcher, \
get_permutations_slur, first_upper, first_all_upper
def test_first_upper():
assert_that(first_upper("USS liberty")).is_equal_to("USS liberty")
assert_that(first_upper("uss liberty")).is_equal_to("Uss liberty")
assert_that(first_upper("uss Liberty")).is_equal_to("Uss Liberty")
def test_first_all_upper():
assert_that(first_all_upper("USS liberty")).is_equal_to("USS Liberty")
assert_that(first_all_upper("uss liberty")).is_equal_to("Uss Liberty")
assert_that(first_all_upper("uss Liberty")).is_equal_to("Uss Liberty")
def test_get_permutations_slur():
expected = {
"USS liberty incident": "Tragic accident aboard the USS Liberty",
"uss liberty incident": "tragic accident aboard the USS Liberty",
"USS Liberty Incident": "Tragic Accident Aboard The USS Liberty",
"USS LIBERTY INCIDENT": "TRAGIC ACCIDENT ABOARD THE USS LIBERTY",
}
result = get_permutations_slur("USS liberty incident", "tragic accident aboard the USS Liberty")
assert_that(result).is_equal_to(expected)
def test_get_permutations_slur_wiht_link_replacer():
expected = {
"kike": "https://sciencedirect.com/science/article/abs/pii/S016028960600033X",
"Kike": "https://sciencedirect.com/science/article/abs/pii/S016028960600033X",
"KIKE": "https://sciencedirect.com/science/article/abs/pii/S016028960600033X",
}
result = get_permutations_slur("kike", "https://sciencedirect.com/science/article/abs/pii/S016028960600033X")
assert_that(result).is_equal_to(expected)
def test_create_variations_slur_regex_for_slur_with_spaces():
expected = [r"(\s|>)(retard)(\s|<)",
r"(\s|>)(Retard)(\s|<)",
r"(\s|>)(RETARD)(\s|<)"]
result = create_variations_slur_regex(" retard ")
assert_that(result).is_length(3).contains_only(*expected)
def test_create_variations_slur_regex_single_word():
expected = [r"(\s|>)(retard)|(retard)(\s|<)",
r"(\s|>)(Retard)|(Retard)(\s|<)",
r"(\s|>)(RETARD)|(RETARD)(\s|<)"]
result = create_variations_slur_regex("retard")
assert_that(result).is_length(3).contains_only(*expected)
def test_create_variations_slur_regex_multiple_word():
expected = [r"(\s|>)(kill yourself)|(kill yourself)(\s|<)",
r"(\s|>)(Kill yourself)|(Kill yourself)(\s|<)",
r"(\s|>)(Kill Yourself)|(Kill Yourself)(\s|<)",
r"(\s|>)(KILL YOURSELF)|(KILL YOURSELF)(\s|<)"]
result = create_variations_slur_regex("kill yourself")
assert_that(result).is_length(4).contains_only(*expected)
@patch("files.helpers.word_censor.SLURS", {
"tranny": "πŸš‚πŸšƒπŸšƒ",
"kill yourself": "keep yourself safe",
"faggot": "cute twink",
"NoNewNormal": "NoNewNormal",
" nig ": "πŸ€",
})
def test_create_replace_map():
expected = {
"tranny": "πŸš‚πŸšƒπŸšƒ",
"Tranny": "πŸš‚πŸšƒπŸšƒ",
"TRANNY": "πŸš‚πŸšƒπŸšƒ",
"kill yourself": "keep yourself safe",
"Kill yourself": "Keep yourself safe",
"Kill Yourself": "Keep Yourself Safe",
"KILL YOURSELF": "KEEP YOURSELF SAFE",
"faggot": "cute twink",
"Faggot": "Cute twink",
"FAGGOT": "CUTE TWINK",
"NoNewNormal": "NoNewNormal",
"nonewnormal": "NoNewNormal",
"NONEWNORMAL": "NONEWNORMAL",
"nig": "πŸ€",
"Nig": "πŸ€",
"NIG": "πŸ€",
}
result = create_replace_map()
assert_that(result).is_equal_to(expected)
@patch("files.helpers.word_censor.REPLACE_MAP", {'retard': 'r-slur', 'NIG': 'πŸ€'})
def test_sub_matcher():
match = re.search(r"(\s|>)(retard)|(retard)(\s|<)", "<p>retard</p>")
assert_that(sub_matcher(match)).is_equal_to(">r-slur")
match = re.search(r"(\s|>)(retard)|(retard)(\s|<)", "<p>noretard</p>")
assert_that(sub_matcher(match)).is_equal_to("r-slur<")
match = re.search(r"(\s|>)(NIG)(\s|<)", "<p>NIG</p>")
assert_that(sub_matcher(match)).is_equal_to(">πŸ€<")
match = re.search(r"(\s|>)(NIG)(\s|<)", "<p>NIG </p>")
assert_that(sub_matcher(match)).is_equal_to(">πŸ€ ")
@patch("files.helpers.word_censor.SLURS", {
'retard': 'r-slur',
'manlet': 'little king',
' nig ': 'πŸ€',
'i hate Carp': 'i love Carp',
'kike': 'https://sciencedirect.com/science/article/abs/pii/S016028960600033X'
})
def test_censor_slurs():
word_censor.REPLACE_MAP = create_replace_map()
assert_that(censor_slurs("<p>retard</p>", None)).is_equal_to("<p>r-slur</p>")
assert_that(censor_slurs("<p>preretard</p>", None)).is_equal_to("<p>prer-slur</p>")
assert_that(censor_slurs("that is Retarded like", None)).is_equal_to("that is R-slured like")
assert_that(censor_slurs("that is SUPERRETARD like", None)).is_equal_to("that is SUPERR-SLUR like")
assert_that(censor_slurs("<p>Manlets get out!</p>", None)).is_equal_to("<p>Little kings get out!</p>")
assert_that(censor_slurs('... "retard" ...', None)).is_equal_to('... "retard" ...')
assert_that(censor_slurs('... ReTaRd ...', None)).is_equal_to('... ReTaRd ...')
assert_that(censor_slurs('... xretardx ...', None)).is_equal_to('... xretardx ...')
assert_that(censor_slurs("LLM is a manlet hehe", None)).is_equal_to("LLM is a little king hehe")
assert_that(censor_slurs("LLM is :marseycapitalistmanlet: hehe", None)) \
.is_equal_to("LLM is :marseycapitalistmanlet: hehe")
assert_that(censor_slurs('... Nig ...', None)).is_equal_to('... πŸ€ ...')
assert_that(censor_slurs('<p>NIG</p>', None)).is_equal_to('<p>πŸ€</p>')
assert_that(censor_slurs('... nigeria ...', None)).is_equal_to('... nigeria ...')
assert_that(censor_slurs('... i hate Carp ...', None)).is_equal_to('... i love Carp ...')
assert_that(censor_slurs('... i hate carp ...', None)).is_equal_to('... i love Carp ...')
assert_that(censor_slurs('... I hate Carp ...', None)).is_equal_to('... I love Carp ...')
assert_that(censor_slurs('... I Hate Carp ...', None)).is_equal_to('... I Love Carp ...')
assert_that(censor_slurs('... I HATE CARP ...', None)).is_equal_to('... I LOVE CARP ...')
# Not covered:
assert_that(censor_slurs('... I Hate carp ...', None)).is_equal_to('... I Hate carp ...')
assert_that(censor_slurs('... i Hate Carp ...', None)).is_equal_to('... i Hate Carp ...')
assert_that(censor_slurs('... i Hate carp ...', None)).is_equal_to('... i Hate carp ...')
assert_that(censor_slurs('... i hate a carp ...', None)).is_equal_to('... i hate a carp ...')
assert_that(censor_slurs("<p>retarded SuperManlet NIG</p>", None)) \
.is_equal_to("<p>r-slured SuperLittle king πŸ€</p>")
assert_that(censor_slurs('... kike ...', None)) \
.is_equal_to('... https://sciencedirect.com/science/article/abs/pii/S016028960600033X ...')
assert_that(censor_slurs('... Kike ...', None)) \
.is_equal_to('... https://sciencedirect.com/science/article/abs/pii/S016028960600033X ...')
assert_that(censor_slurs('... KIKE ...', None)) \
.is_equal_to('... https://sciencedirect.com/science/article/abs/pii/S016028960600033X ...')
@patch("files.helpers.word_censor.SLURS", {'retard': 'r-slur', 'manlet': 'little king', ' nig ': 'πŸ€'})
def test_censor_slurs_does_not_error_out_on_exception():
word_censor.REPLACE_MAP = create_replace_map()
word_censor.REPLACE_MAP["Manlet"] = None
assert_that(censor_slurs(">retarded SuperManlet NIG<", None)).is_equal_to(">r-slured SuperManlet πŸ€<")
@patch("files.helpers.word_censor.SLURS", {'retard': 'r-slur', 'manlet': 'little king'})
def test_censor_slurs_does_not_censor_on_flag_disabled():
word_censor.REPLACE_MAP = create_replace_map()
class User:
def __init__(self, slurreplacer):
self.slurreplacer = slurreplacer
logger_user = User(slurreplacer=False)
assert_that(censor_slurs("<p>retard</p>", logger_user)).is_equal_to("<p>retard</p>")
logger_user = User(slurreplacer=True)
assert_that(censor_slurs("<p>retard</p>", logger_user)).is_equal_to("<p>r-slur</p>")