From af8da42c73536913639e85105256a04a706f0a06 Mon Sep 17 00:00:00 2001 From: Yo Mama Date: Mon, 18 Oct 2021 22:46:57 +0200 Subject: [PATCH] Changes from regex censor v1 --- docker-compose.yml | 116 +++--- files/classes/comment.py | 20 +- files/classes/submission.py | 27 +- files/helpers/const.py | 533 +++++++------------------ files/helpers/word_censor.py | 87 ++++ requirements.txt | 4 +- test/files/helpers/test_word_censor.py | 197 +++++++++ 7 files changed, 506 insertions(+), 478 deletions(-) create mode 100644 files/helpers/word_censor.py create mode 100644 test/files/helpers/test_word_censor.py diff --git a/docker-compose.yml b/docker-compose.yml index 1dee2234f..2751df012 100644 --- a/docker-compose.yml +++ b/docker-compose.yml @@ -2,65 +2,65 @@ version: '2.3' services: files: - build: - context: . - volumes: - - "./:/service" - environment: - - DATABASE_URL=postgresql://postgres@127.0.0.1:5432/postgres - - MASTER_KEY=${MASTER_KEY:-KTVciAUQFpFh2WdJ/oiHJlxl6FvzRZp8kYzAAv3l2OA=} - - DOMAIN=localhost - - SITE_NAME=Drama - - GIPHY_KEY=3435tdfsdudebussylmaoxxt43 - - FORCE_HTTPS=0 - - DISCORD_SERVER_ID=3435tdfsdudebussylmaoxxt43 - - DISCORD_CLIENT_ID=3435tdfsdudebussylmaoxxt43 - - DISCORD_CLIENT_SECRET=3435tdfsdudebussylmaoxxt43 - - DISCORD_BOT_TOKEN=3435tdfsdudebussylmaoxxt43 - #- HCAPTCHA_SITEKEY=3435tdfsdudebussylmaoxxt43 - - HCAPTCHA_SECRET=3435tdfsdudebussylmaoxxt43 - - YOUTUBE_KEY=3435tdfsdudebussylmaoxxt43 - - PUSHER_KEY=3435tdfsdudebussylmaoxxt43 - - CATBOX_KEY=3435tdfsdudebussylmaoxxt43 - - SPAM_SIMILARITY_THRESHOLD=0.5 - - SPAM_SIMILAR_COUNT_THRESHOLD=5 - - SPAM_URL_SIMILARITY_THRESHOLD=0.1 - - COMMENT_SPAM_SIMILAR_THRESHOLD=0.5 - - COMMENT_SPAM_COUNT_THRESHOLD=5 - - READ_ONLY=0 - - BOT_DISABLE=0 - - COINS_NAME=Dramacoins - - DEFAULT_TIME_FILTER=all - - DEFAULT_THEME=midnight - - DEFAULT_COLOR=ff66ac #YOU HAVE TO PICK ONE OF THOSE COLORS OR SHIT WILL BREAK: ff66ac, 805ad5, 62ca56, 38a169, 80ffff, 2a96f3, eb4963, ff0000, f39731, 30409f, 3e98a7, e4432d, 7b9ae4, ec72de, 7f8fa6, f8db58 - - SLOGAN=Dude bussy lmao - - GUMROAD_TOKEN=3435tdfsdudebussylmaoxxt43 - - GUMROAD_LINK=https://marsey1.gumroad.com/l/tfcvri - - CARD_VIEW=1 - - DISABLE_DOWNVOTES=0 - - DUES=0 - - MAIL_USERNAME=blahblahblah@gmail.com - - MAIL_PASSWORD=3435tdfsdudebussylmaoxxt43 - links: - - "redis" - - "postgres" - ports: - - "80:80" - depends_on: - - redis - - postgres + build: + context: . + volumes: + - "./:/service" + environment: + - DATABASE_URL=postgresql://postgres@127.0.0.1:5432/postgres + - MASTER_KEY=${MASTER_KEY:-KTVciAUQFpFh2WdJ/oiHJlxl6FvzRZp8kYzAAv3l2OA=} + - DOMAIN=localhost + - SITE_NAME=Drama + - GIPHY_KEY=3435tdfsdudebussylmaoxxt43 + - FORCE_HTTPS=0 + - DISCORD_SERVER_ID=3435tdfsdudebussylmaoxxt43 + - DISCORD_CLIENT_ID=3435tdfsdudebussylmaoxxt43 + - DISCORD_CLIENT_SECRET=3435tdfsdudebussylmaoxxt43 + - DISCORD_BOT_TOKEN=3435tdfsdudebussylmaoxxt43 + #- HCAPTCHA_SITEKEY=3435tdfsdudebussylmaoxxt43 + - HCAPTCHA_SECRET=3435tdfsdudebussylmaoxxt43 + - YOUTUBE_KEY=3435tdfsdudebussylmaoxxt43 + - PUSHER_KEY=3435tdfsdudebussylmaoxxt43 + - CATBOX_KEY=3435tdfsdudebussylmaoxxt43 + - SPAM_SIMILARITY_THRESHOLD=0.5 + - SPAM_SIMILAR_COUNT_THRESHOLD=5 + - SPAM_URL_SIMILARITY_THRESHOLD=0.1 + - COMMENT_SPAM_SIMILAR_THRESHOLD=0.5 + - COMMENT_SPAM_COUNT_THRESHOLD=5 + - READ_ONLY=0 + - BOT_DISABLE=0 + - COINS_NAME=Dramacoins + - DEFAULT_TIME_FILTER=all + - DEFAULT_THEME=midnight + - DEFAULT_COLOR=ff66ac #YOU HAVE TO PICK ONE OF THOSE COLORS OR SHIT WILL BREAK: ff66ac, 805ad5, 62ca56, 38a169, 80ffff, 2a96f3, eb4963, ff0000, f39731, 30409f, 3e98a7, e4432d, 7b9ae4, ec72de, 7f8fa6, f8db58 + - SLOGAN=Dude bussy lmao + - GUMROAD_TOKEN=3435tdfsdudebussylmaoxxt43 + - GUMROAD_LINK=https://marsey1.gumroad.com/l/tfcvri + - CARD_VIEW=1 + - DISABLE_DOWNVOTES=0 + - DUES=0 + - MAIL_USERNAME=blahblahblah@gmail.com + - MAIL_PASSWORD=3435tdfsdudebussylmaoxxt43 + links: + - "redis" + - "postgres" + ports: + - "80:80" + depends_on: + - redis + - postgres redis: - image: redis - ports: - - "6379:6379" + image: redis + ports: + - "6379:6379" postgres: - image: postgres:12.3 - volumes: - - "./schema.sql:/docker-entrypoint-initdb.d/00-schema.sql" - - "./seed-db.sql:/docker-entrypoint-initdb.d/01-schema.sql" - environment: - - POSTGRES_HOST_AUTH_METHOD=trust - #ports: - #- "5432:5432" \ No newline at end of file + image: postgres:12.3 + volumes: + - "./schema.sql:/docker-entrypoint-initdb.d/00-schema.sql" + - "./seed-db.sql:/docker-entrypoint-initdb.d/01-schema.sql" + environment: + - POSTGRES_HOST_AUTH_METHOD=trust + #ports: + #- "5432:5432" \ No newline at end of file diff --git a/files/classes/comment.py b/files/classes/comment.py index 558b38b99..6a7c962f8 100644 --- a/files/classes/comment.py +++ b/files/classes/comment.py @@ -1,16 +1,18 @@ +from os import environ import re +import time from urllib.parse import urlencode, urlparse, parse_qs + from flask import * from sqlalchemy import * from sqlalchemy.orm import relationship, deferred, lazyload -from files.classes.votes import CommentVote -from files.helpers.lazy import lazy -from files.helpers.const import SLURS + from files.__main__ import Base -from .flags import CommentFlag -from os import environ -import time +from files.classes.votes import CommentVote from files.helpers.const import AUTOPOLLER_ACCOUNT +from files.helpers.lazy import lazy +from .flags import CommentFlag +from ..helpers.word_censor import censor_slurs site = environ.get("DOMAIN").strip() @@ -298,8 +300,7 @@ class Comment(Base): if not body: return "" - if not v or v.slurreplacer: - for s, r in SLURS.items(): body = body.replace(s, r) + body = censor_slurs(body, v) if v and not v.oldreddit: body = body.replace("old.reddit.com", "reddit.com") @@ -325,8 +326,7 @@ class Comment(Base): if not body: return "" - if not v or v.slurreplacer: - for s, r in SLURS.items(): body = body.replace(s, r) + body = censor_slurs(body, v) if v and not v.oldreddit: body = body.replace("old.reddit.com", "reddit.com") diff --git a/files/classes/submission.py b/files/classes/submission.py index 107055cb7..64d470dd2 100644 --- a/files/classes/submission.py +++ b/files/classes/submission.py @@ -1,21 +1,24 @@ -from flask import render_template, g +from os import environ +import random +import re +import time +from urllib.parse import urlparse + +from flask import render_template from sqlalchemy import * from sqlalchemy.orm import relationship, deferred -import re, random -from urllib.parse import urlparse -from files.helpers.lazy import lazy -from files.helpers.const import SLURS, AUTOPOLLER_ACCOUNT + from files.__main__ import Base +from files.helpers.const import SLURS, AUTOPOLLER_ACCOUNT +from files.helpers.lazy import lazy from .flags import Flag -from os import environ -import time +from ..helpers.word_censor import censor_slurs site = environ.get("DOMAIN").strip() site_name = environ.get("SITE_NAME").strip() class Submission(Base): - __tablename__ = "submissions" id = Column(BigInteger, primary_key=True) @@ -340,9 +343,7 @@ class Submission(Base): if self.club and not (v and v.paid_dues): return "COUNTRY CLUB ONLY" body = self.body_html - if not v or v.slurreplacer: - for s,r in SLURS.items(): - body = body.replace(s, r) + body = censor_slurs(body, v) if v and not v.oldreddit: body = body.replace("old.reddit.com", "reddit.com") if v and v.nitter: body = body.replace("www.twitter.com", "nitter.net").replace("twitter.com", "nitter.net") @@ -352,9 +353,7 @@ class Submission(Base): if self.club and not (v and v.paid_dues): return "COUNTRY CLUB ONLY" body = self.body - if not v or v.slurreplacer: - for s,r in SLURS.items(): - body = body.replace(s, r) + body = censor_slurs(body, v) if v and not v.oldreddit: body = body.replace("old.reddit.com", "reddit.com") if v and v.nitter: body = body.replace("www.twitter.com", "nitter.net").replace("twitter.com", "nitter.net") diff --git a/files/helpers/const.py b/files/helpers/const.py index 2d7343700..2e52bdfee 100644 --- a/files/helpers/const.py +++ b/files/helpers/const.py @@ -1,389 +1,132 @@ from os import environ -site = environ.get("DOMAIN").strip() +site = environ.get("DOMAIN", '').strip() +##################### +# Formatting rules: # +##################### +# +# on the slur side, they will match prefixes and suffixes and not middle of words, so for example +# "retard" will match: +# - "retard" +# - "retarded" +# - "superretard" +# But not "superretarded" +# +# If all letters are lowercase then it will match lowercase, first letter up in first or all the words and all letters up +# "dancing israelis" will match (with prefixes and suffixes omitted for brevity): +# - "dancing israelis" +# - "Dancing israelis" +# - "Dancing Israelis" +# - "DANCING ISRAELIS" +# +# If some letters are Uppercase, the same, but with the additional option of the original casing, and respecting already existing uppercase +# "NoNewNormal" will match (with prefixes and suffixes omitted for brevity): +# - "NoNewNormal" +# - "nonewnormal" +# - "Nonewnormal" +# - "NONEWNORMAL" +# +# If the slur has a space before and after then the match is limited to the exact word, no prefixes or suffixes +# (previous rules about capitalization still apply) +# " neg " will match only: +# - "neg" +# - "Neg" +# - "NEG" +# +# Now on the replacement side, The replacement will have the same capitalization as the slur if the replacement is lowercase +# "kill yourself" -> "keep yourself safe" +# "Kill yourself" -> "Keep yourself safe" +# "Kill Yourself" -> "Keep Yourself Safe" +# "KILL YOURSELF" -> "KEEP YOURSELF SAFE" +# +# If the replacement side has some capitalization, then that capitalization will always be maintained +# for the pair: <"pajeet": "sexy Indian dude"> it will replace: +# "pajeet" -> "sexy Indian dude" +# "Pajeet" -> "Sexy Indian dude" +# "PAJEET" -> "SEXY INDIAN DUDE" +# +# There is a super special case that if the replacer starts with "http" then it never changes capitalization +# +# +# TL;DR: Just read the above once, or don't, and try to guess! SLURS = { - " faggot":" cute twink", - " Faggot":" Cute twink", - " FAGGOT":" CUTE TWINK", - " fag":" cute twink", - " Fag":" Cute twink", - " FAG":" CUTE TWINK", - " pedophile":" libertarian", - " Pedophile":" Libertarian", - " PEDOPHILE":" LIBERTARIAN", - " pedo":" libertarian", - " Pedo":" Libertarian", - " PEDO":" LIBERTARIAN", - " kill yourself":" keep yourself safe", - " KILL YOURSELF":" KEEP YOURSELF SAFE", - " nigger":" 🏀", - " Nigger":" 🏀", - " NIGGER":" 🏀", - " rapist":" male feminist", - " Rapist":" Male feminist", - " RAPIST":" MALE FEMINIST", - " steve akins":" penny verity oaken", - " Steve Akins":" Penny Verity Oaken", - " STEVE AKINS":" PENNY VERITY OAKEN", - " trannie":" 🚂🚃🚃", - " Trannie":" 🚂🚃🚃", - " TRANNIE":" 🚂🚃🚃", - " tranny":" 🚂🚃🚃", - " Tranny":" 🚂🚃🚃", - " TRANNY":" 🚂🚃🚃", - " troon":" 🚂🚃🚃", - " Troon":" 🚂🚃🚃", - " TROON":" 🚂🚃🚃", - " NoNewNormal": " HorseDewormerAddicts", - " nonewnormal": " horsedewormeraddicts", - " Kike": " https://sciencedirect.com/science/article/abs/pii/S016028960600033X", - " kike": " https://sciencedirect.com/science/article/abs/pii/S016028960600033X", - " retard":" r-slur", - " Retard":" R-slur", - " RETARD":" R-SLUR", - " janny":" j-slur", - " Janny":" J-slur", - " JANNY":" J-SLUR", - " jannie":" j-slur", - " Jannie":" J-slur", - " JANNIE":" J-SLUR", - " janny":" j-slur", - " Janny":" J-slur", - " JANNY":" J-SLUR", - " jannie":" j-slur", - " Jannie":" J-slur", - " JANNIE":" J-SLUR", - " latinos":" latinx", - " latino":" latinx", - " latinas":" latinx", - " latina":" latinx", - " hispanics":" latinx", - " hispanic":" latinx", - " Latinos":" Latinx", - " Latino":" Latinx", - " Latinas":" Latinx", - " Latina":" Latinx", - " Hispanics":" Latinx", - " Hispanic":" Latinx", - " LATINOS":" LATINX", - " LATINO":" LATINX", - " LATINAS":" LATINX", - " LATINA":" LATINX", - " HISPANICS":" LATINX", - " HISPANIC":" LATINX", - " uss liberty incident":" tragic accident aboard the USS Liberty", - " USS Liberty Incident":" tragic accident aboard the USS Liberty", - " USS Liberty incident":" tragic accident aboard the USS Liberty", - " USS Liberty Incident":" tragic accident aboard the USS Liberty", - " uss Liberty incident":" tragic accident aboard the USS Liberty", - " uss liberty Incident":" tragic accident aboard the USS Liberty", - " USS LIBERTY INCIDENT":" TRAGIC ACCIDENT ABOARD THE USS LIBERTY", - " lavon affair":" Lavon Misunderstanding", - " Lavon affair":" Lavon Misunderstanding", - " Lavon Affair":" Lavon Misunderstanding", - " lavon Affair":" Lavon Misunderstanding", - " shylock":" Israeli friend", - " Shylock":" Israeli friend", - " SHYLOCK":" ISRAELI FRIEND", - " yid":" Israeli friend", - " Yid":" Israeli friend", - " YID":" ISRAELI FRIEND", - " heeb":" Israeli friend", - " Heeb":" Israeli friend", - " HEEB":" ISRAELI FRIEND", - " sheeny":" Israeli friend", - " Sheeny":" Israeli friend", - " SHEENY":" ISRAELI FRIEND", - " sheenies":" Israeli friends", - " Sheenies":" Israeli friends", - " SHEENIES":" ISRAELI FRIENDS", - " hymie":" Israeli friend", - " Hymie":" Israeli friend", - " HYMIES":" ISRAELI FRIENDS", - " allah":" Allah (SWT)", - " Allah":" Allah (SWT)", - " ALLAH":" ALLAH (SWT)", - " Mohammad":" Mohammad (PBUH)", - " Muhammad":" Mohammad (PBUH)", - " Mohammed":" Mohammad (PBUH)", - " Muhammed":" Mohammad (PBUH)", - " mohammad":" Mohammad (PBUH)", - " mohammed":" Mohammad (PBUH)", - " muhammad":" Mohammad (PBUH)", - " muhammed":" Mohammad (PBUH)", - " I HATE MARSEY":" I LOVE MARSEY", - " i hate marsey":" i love marsey", - " I hate Marsey":" I love Marsey", - " I hate marsey":" I love Marsey", - " libertarian":" pedophile", - " Libertarian":" Pedophile", - " LIBERTARIAN":" PEDOPHILE", - " Billie Eilish":" Billie Eilish (fat cow)", - " billie eilish":" bilie eilish (fat cow)", - " BILLIE EILISH":" BILIE EILISH (FAT COW)", - " dancing Israelis":" I love Israel", - " dancing israelis":" i love israel", - " DANCING ISRAELIS":" I LOVE ISRAEL", - " Dancing Israelis":" I love Israel", - " sodomite":" total dreamboat", - " Sodomite":" Total dreamboat", - " pajeet":" sexy Indian dude", - " Pajeet":" Sexy Indian dude", - " PAJEET":" SEXY INDIAN DUDE", - " female":" birthing person", - " Female":" Womb-haver", - " FEMALE":" birthing person", - " landlord":" landchad", - " Landlord":" Landchad", - " LANDLORD":" LANDCHAD", - " tenant":" renthog", - " Tenant":" Renthog", - " TENANT":" RENTHOG", - " renter":" rentoid", - " Renter":" Rentoid", - " RENTER":" RENTOID", - " autistic":" neurodivergent", - " Autistic":" Neurodivergent", - " AUTISTIC":" NEURODIVERGENT", - " anime":" p-dophilic japanese cartoons", - " Anime":" P-dophilic Japanese cartoons", - " ANIME":" P-DOPHILIC JAPANESE CARTOONS", - " holohoax":" I tried to claim the Holocaust didn't happen because I am a pencil-dicked imbecile and the word filter caught me lol", - " Holohoax":" I tried to claim the Holocaust didn't happen because I am a pencil-dicked imbecile and the word filter caught me lol", - " HOLOHOAX":" I tried to claim the Holocaust didn't happen because I am a pencil-dicked imbecile and the word filter caught me lol", - " groomercord":" discord (actually a pretty cool service)", - " Groomercord":" Discord (actually a pretty cool service)", - " GROOMERCORD":" DISCORD (ACTUALLY A PRETTY COOL SERVICE)", - " pedocord":" discord (actually a pretty cool service)", - " Pedocord":" Discord (actually a pretty cool service)", - " PEDOCORD":" DISCORD (ACTUALLY A PRETTY COOL SERVICE)", - " i hate carp":" i love carp", - " I hate carp":" I love carp", - " I HATE CARP":" I LOVE CARP", - " I hate Carp":" I love Carp", - " manlet":" little king", - " Manlet":" Little king", - " MANLET":" LITTLE KING", - " gamer":" g*mer", - " Gamer":" G*mer", - " GAMER":" G*MER", - " journalist":" journ*list", - " Journalist":" Journ*list", - " JOURNALIST":" JOURN*LIST", - " journalism":" journ*lism", - " Journalism":" Journ*lism", - " JOURNALISM":" JOURN*LISM", - " buttcheeks":" bulva", - " Buttcheeks":" Bulva", - " BUTTCHEEKS":" BULVA", - " asscheeks":" bulva", - " Asscheeks":" bulva", - " ASSCHEEKS":" BULVA", - " wuhan flu":" SARS-CoV-2 syndemic", - " Wuhan flu":" SARS-CoV-2 syndemic", - " Wuhan Flu":" SARS-CoV-2 syndemic", - " china flu":" SARS-CoV-2 syndemic", - " China flu":" SARS-CoV-2 syndemic", - " China Flu":" SARS-CoV-2 syndemic", - " china virus":" SARS-CoV-2 syndemic", - " China virus":" SARS-CoV-2 syndemic", - " China Virus":" SARS-CoV-2 syndemic", - " kung flu":" SARS-CoV-2 syndemic", - " Kung flu":" SARS-CoV-2 syndemic", - " Kung Flu":" SARS-CoV-2 syndemic", + "faggot": "cute twink", + "fag": "cute twink", + "pedophile": "libertarian", + "pedo": "libertarian", + "kill yourself": "keep yourself safe", + "nigger": "🏀", + "rapist": "male feminist", + "steve akins": "penny verity oaken", + "trannie": "🚂🚃🚃", + "tranny": "🚂🚃🚃", + "troon": "🚂🚃🚃", + "NoNewNormal": "HorseDewormerAddicts", + "kike": "https://sciencedirect.com/science/article/abs/pii/S016028960600033X", + "retard": "r-slur", + "janny": "j-slur", + "jannie": "j-slur", + "janny": "j-slur", + "latinos": "latinx", + "latino": "latinx", + "latinas": "latinx", + "latina": "latinx", + "hispanics": "latinx", + "hispanic": "latinx", + "USS liberty incident": "tragic accident aboard the USS Liberty", + "lavon affair": "Lavon Misunderstanding", + "shylock": "Israeli friend", + "yid": "Israeli friend", + "heeb": "Israeli friend", + "sheeny": "Israeli friend", + "sheenies": "Israeli friends", + "hymie": "Israeli friend", + "allah": "Allah (SWT)", + "mohammad": "Mohammad (PBUH)", + "mohammed": "Mohammad (PBUH)", + "muhammad": "Mohammad (PBUH)", + "muhammed": "Mohammad (PBUH)", + "i hate marsey": "i love marsey", + "libertarian": "pedophile", + "billie bilish": "Billie Eilish (fat cow)", + "dancing Israelis": "i love Israel", + "sodomite": "total dreamboat", + "pajeet": "sexy Indian dude", + "female": "birthing person", + "landlord": "landchad", + "tenant": "renthog", + "renter": "rentoid", + "autistic": "neurodivergent", + "anime": "p-dophilic japanese cartoons", + "holohoax": "i tried to claim the Holocaust didn't happen because I am a pencil-dicked imbecile and the word filter caught me lol", + "groomercord": "discord (actually a pretty cool service)", + "pedocord": "discord (actually a pretty cool service)", + "i hate Carp": "i love Carp", + "manlet": "little king", + "gamer": "g*mer", + "journalist": "journ*list", + "journalism": "journ*lism", + "buttcheeks": "bulva", + "asscheeks": "bulva", + "wuhan flu": "SARS-CoV-2 syndemic", + "china flu": "SARS-CoV-2 syndemic", + "china virus": "SARS-CoV-2 syndemic", + "kung flu": "SARS-CoV-2 syndemic", - "faggot ":"cute twink ", - "Faggot ":"Cute twink ", - "FAGGOT ":"CUTE TWINK ", - "fag ":"cute twink ", - "Fag ":"Cute twink ", - "FAG ":"CUTE TWINK ", - "pedophile ":"libertarian ", - "Pedophile ":"Libertarian ", - "PEDOPHILE ":"LIBERTARIAN ", - "kill yourself ":"keep yourself safe ", - "KILL YOURSELF ":"KEEP YOURSELF SAFE ", - "nigger ":"🏀 ", - "Nigger ":"🏀 ", - "NIGGER ":"🏀 ", - "steve akins ":"penny verity oaken ", - "Steve Akins ":"Penny Verity Oaken ", - "STEVE AKINS ":"PENNY VERITY OAKEN ", - "trannie ":"🚂🚃🚃 ", - "Trannie ":"🚂🚃🚃 ", - "TRANNIE ":"🚂🚃🚃 ", - "tranny ":"🚂🚃🚃 ", - "Tranny ":"🚂🚃🚃 ", - "TRANNY ":"🚂🚃🚃 ", - "troon ":"🚂🚃🚃 ", - "Troon ":"🚂🚃🚃 ", - "TROON ":"🚂🚃🚃 ", - "NoNewNormal ": "HorseDewormerAddicts ", - "nonewnormal ": "horsedewormeraddicts ", - "Kike ": "https://sciencedirect.com/science/article/abs/pii/S016028960600033X ", - "kike ": "https://sciencedirect.com/science/article/abs/pii/S016028960600033X ", - "retard ":"r-slur ", - "Retard ":"R-slur ", - "RETARD ":"R-SLUR ", - "janny ":"j-slur ", - "Janny ":"J-slur ", - "JANNY ":"J-SLUR ", - "jannie ":"j-slur ", - "Jannie ":"J-slur ", - "JANNIE ":"J-SLUR ", - "latinos ":"latinx ", - "latino ":"latinx ", - "latinas ":"latinx ", - "latina ":"latinx ", - "hispanics ":"latinx ", - "hispanic ":"latinx ", - "Latinos ":"Latinx ", - "Latino ":"Latinx ", - "Latinas ":"Latinx ", - "Latina ":"Latinx ", - "Hispanics ":"Latinx ", - "Hispanic ":"Latinx ", - "LATINOS ":"LATINX ", - "LATINO ":"LATINX ", - "LATINAS ":"LATINX ", - "LATINA ":"LATINX ", - "HISPANICS ":"LATINX ", - "HISPANIC ":"LATINX ", - "uss liberty incident ":"tragic accident aboard the USS Liberty ", - "USS Liberty Incident ":"tragic accident aboard the USS Liberty ", - "USS Liberty incident ":"tragic accident aboard the USS Liberty ", - "USS Liberty Incident ":"tragic accident aboard the USS Liberty ", - "uss Liberty incident ":"tragic accident aboard the USS Liberty ", - "uss liberty Incident ":"tragic accident aboard the USS Liberty ", - "USS LIBERTY INCIDENT ":"TRAGIC ACCIDENT ABOARD THE USS LIBERTY ", - "lavon affair ":"Lavon Misunderstanding ", - "Lavon affair ":"Lavon Misunderstanding ", - "Lavon Affair ":"Lavon Misunderstanding ", - "lavon Affair ":"Lavon Misunderstanding ", - "shylock ":"Israeli friend ", - "Shylock ":"Israeli friend ", - "SHYLOCK ":"ISRAELI FRIEND ", - "yid ":"Israeli friend ", - "Yid ":"Israeli friend ", - "YID ":"ISRAELI FRIEND ", - "heeb ":"Israeli friend ", - "Heeb ":"Israeli friend ", - "HEEB ":"ISRAELI FRIEND ", - "sheeny ":"Israeli friend ", - "Sheeny ":"Israeli friend ", - "SHEENY ":"ISRAELI FRIEND ", - "sheenies ":"Israeli friends ", - "Sheenies ":"Israeli friends ", - "SHEENIES ":"ISRAELI FRIENDS ", - "hymie ":"Israeli friend ", - "Hymie ":"Israeli friend ", - "HYMIES ":"ISRAELI FRIENDS ", - "Mohammad ":"Mohammad (PBUH) ", - "Muhammad ":"Mohammad (PBUH) ", - "Mohammed ":"Mohammad (PBUH) ", - "Muhammed ":"Mohammad (PBUH) ", - "mohammad ":"Mohammad (PBUH) ", - "mohammed ":"Mohammad (PBUH) ", - "muhammad ":"Mohammad (PBUH) ", - "muhammed ":"Mohammad (PBUH) ", - "I HATE MARSEY ":"I LOVE MARSEY ", - "i hate marsey ":"i love marsey ", - "I hate Marsey ":"I love Marsey ", - "I hate marsey ":"I love Marsey ", - "libertarian ":"pedophile ", - "Libertarian ":"Pedophile ", - "LIBERTARIAN ":"PEDOPHILE ", - "Billie Eilish ":"Billie Eilish (fat cow) ", - "billie eilish ":"bilie eilish (fat cow) ", - "BILLIE EILISH ":"BILIE EILISH (FAT COW) ", - "dancing Israelis ":"I love Israel ", - "dancing israelis ":"i love israel ", - "DANCING ISRAELIS ":"I LOVE ISRAEL ", - "Dancing Israelis ":"I love Israel ", - "sodomite ":"total dreamboat ", - "Sodomite ":"Total dreamboat ", - "pajeet ":"sexy Indian dude ", - "Pajeet ":"Sexy Indian dude ", - "PAJEET ":"SEXY INDIAN DUDE ", - "female ":"birthing person ", - "Female ":"Womb-haver ", - "FEMALE ":"birthing person ", - "landlord ":"landchad ", - "Landlord ":"Landchad ", - "LANDLORD ":"LANDCHAD ", - "tenant ":"renthog ", - "Tenant ":"Renthog ", - "TENANT ":"RENTHOG ", - "renter ":"rentoid ", - "Renter ":"Rentoid ", - "RENTER ":"RENTOID ", - "autistic ":"neurodivergent ", - "Autistic ":"Neurodivergent ", - "AUTISTIC ":"NEURODIVERGENT ", - "anime ":"p-dophilic japanese cartoons ", - "Anime ":"P-dophilic Japanese cartoons ", - "ANIME ":"P-DOPHILIC JAPANESE CARTOONS ", - "holohoax ":"I tried to claim the Holocaust didn't happen because I am a pencil-dicked imbecile and the word filter caught me lol ", - "Holohoax ":"I tried to claim the Holocaust didn't happen because I am a pencil-dicked imbecile and the word filter caught me lol ", - "HOLOHOAX ":"I tried to claim the Holocaust didn't happen because I am a pencil-dicked imbecile and the word filter caught me lol ", - "groomercord ":"discord (actually a pretty cool service) ", - "Groomercord ":"Discord (actually a pretty cool service) ", - "GROOMERCORD ":"DISCORD (ACTUALLY A PRETTY COOL SERVICE) ", - "pedocord ":"discord (actually a pretty cool service) ", - "Pedocord ":"Discord (actually a pretty cool service) ", - "PEDOCORD ":"DISCORD (ACTUALLY A PRETTY COOL SERVICE) ", - "i hate carp ":"i love carp ", - "I hate carp ":"I love carp ", - "I HATE CARP ":"I LOVE CARP ", - "I hate Carp ":"I love Carp ", - "manlet ":"little king ", - "Manlet ":"Little king ", - "MANLET ":"LITTLE KING ", - "gamer ":"g*mer ", - "Gamer ":"G*mer ", - "GAMER ":"G*MER ", - "journalist ":"journ*list ", - "Journalist ":"Journ*list ", - "JOURNALIST ":"JOURN*LIST ", - "journalism ":"journ*lism ", - "Journalism ":"Journ*lism ", - "JOURNALISM ":"JOURN*LISM ", - "buttcheeks ":"bulva ", - "Buttcheeks ":"Bulva ", - "BUTTCHEEKS ":"BULVA ", - "asscheeks ":"bulva ", - "Asscheeks ":"bulva ", - "ASSCHEEKS ":"BULVA ", - "wuhan flu ":"SARS-CoV-2 syndemic ", - "Wuhan flu ":"SARS-CoV-2 syndemic ", - "Wuhan Flu ":"SARS-CoV-2 syndemic ", - "china flu ":"SARS-CoV-2 syndemic ", - "China flu ":"SARS-CoV-2 syndemic ", - "China Flu ":"SARS-CoV-2 syndemic ", - "china virus ":"SARS-CoV-2 syndemic ", - "China virus ":"SARS-CoV-2 syndemic ", - "China Virus ":"SARS-CoV-2 syndemic ", - "kung flu ":"SARS-CoV-2 syndemic ", - "Kung flu ":"SARS-CoV-2 syndemic ", - "Kung Flu ":"SARS-CoV-2 syndemic ", - - " nig ":" 🏀 ", - " Nig ":" 🏀 ", - " NIG ":" 🏀 ", - " nigs ":" 🏀s ", - " Nigs ":" 🏀s ", - " NIGS ":" 🏀s ", + # if the word has spaces in the beginning and the end it will only censor this word without prefixes or suffixes + " nig ": "🏀", + " nigs ": "🏀s", } LONGPOST_REPLIES = ['Wow, you must be a JP fan.', 'This is one of the worst posts I have EVER seen. Delete it.', "No, don't reply like this, please do another wall of unhinged rant please.", '# 😴😴😴', "Ma'am we've been over this before. You need to stop.", "I've known more coherent downies.", "Your pulitzer's in the mail", "That's great and all, but I asked for my burger without cheese.", 'That degree finally paying off', "That's nice sweaty. Why don't you have a seat in the time out corner with Pizzashill until you calm down, then you can have your Capri Sun.", "All them words won't bring your pa back.", "You had a chance to not be completely worthless, but it looks like you threw it away. At least you're consistent.", 'Some people are able to display their intelligence by going on at length on a subject and never actually saying anything. This ability is most common in trades such as politics, public relations, and law. You have impressed me by being able to best them all, while still coming off as an absolute idiot.', "You can type 10,000 characters and you decided that these were the one's that you wanted.", 'Have you owned the libs yet?', "I don't know what you said, because I've seen another human naked.", 'Impressive. Normally people with such severe developmental disabilities struggle to write much more than a sentence or two. He really has exceded our expectations for the writing portion. Sadly the coherency of his writing, along with his abilities in the social skills and reading portions, are far behind his peers with similar disabilities.', "This is a really long way of saying you don't fuck.", "Sorry ma'am, looks like his delusions have gotten worse. We'll have to admit him,", '![](https://i.kym-cdn.com/photos/images/newsfeed/001/038/094/0a1.jpg)', 'If only you could put that energy into your relationships', 'Posts like this is why I do Heroine.', 'still unemployed then?', 'K', 'look im gunna have 2 ask u 2 keep ur giant dumps in the toilet not in my replys 😷😷😷', "Mommy is soooo proud of you, sweaty. Let's put this sperg out up on the fridge with all your other failures.", "Good job bobby, here's a star", "That was a mistake. You're about to find out the hard way why.", 'You sat down and wrote all this shit. You could have done so many other things with your life. What happened to your life that made you decide writing novels of bullshit on rdrama.net was the best option?', "I don't have enough spoons to read this shit", "All those words won't bring daddy back.", 'OUT!'] AGENDAPOSTER_MSG = """Hi @{username},\n\nYour comment has been automatically removed because you forgot - to include `trans lives matter`.\n\nDon't worry, we're here to help! We - won't let you post or comment anything that doesn't express your love and acceptance towards - the trans community. Feel free to resubmit your comment with `trans lives matter` - included. \n\n*This is an automated message; if you need help, - you can message us [here](/contact).*""" + to include `trans lives matter`.\n\nDon't worry, we're here to help! We + won't let you post or comment anything that doesn't express your love and acceptance towards + the trans community. Feel free to resubmit your comment with `trans lives matter` + included. \n\n*This is an automated message; if you need help, + you can message us [here](/contact).*""" VAXX_MSG = """Hi @{username}, it appears that you may be trying to spread dangerous misinformation regarding ineffective COVID-19 treatments based on pseudoscientific hearsay. Your post has been removed because it contained the word ivermectin. We ask that you understand that horse dewormer neither treats, nor prevents, COVID-19. For more information, please read up on what the FDA has to say on the matter: @@ -398,27 +141,27 @@ Thank you.""" BASED_MSG = "@{username}'s Based Count has increased by 1. Their Based Count is now {basedcount}.\n\nPills: {pills}" if site == "pcmemes.net": - BASEDBOT_ACCOUNT = 800 - NOTIFICATIONS_ACCOUNT = 1046 - AUTOJANNY_ACCOUNT = 1050 - SNAPPY_ACCOUNT = 261 - LONGPOSTBOT_ACCOUNT = 1832 - ZOZBOT_ACCOUNT = 1833 - AUTOPOLLER_ACCOUNT = 3369 + BASEDBOT_ACCOUNT = 800 + NOTIFICATIONS_ACCOUNT = 1046 + AUTOJANNY_ACCOUNT = 1050 + SNAPPY_ACCOUNT = 261 + LONGPOSTBOT_ACCOUNT = 1832 + ZOZBOT_ACCOUNT = 1833 + AUTOPOLLER_ACCOUNT = 3369 elif site == 'rdrama.net': - NOTIFICATIONS_ACCOUNT = 1046 - AUTOJANNY_ACCOUNT = 2360 - SNAPPY_ACCOUNT = 261 - LONGPOSTBOT_ACCOUNT = 1832 - ZOZBOT_ACCOUNT = 1833 - AUTOPOLLER_ACCOUNT = 3369 + NOTIFICATIONS_ACCOUNT = 1046 + AUTOJANNY_ACCOUNT = 2360 + SNAPPY_ACCOUNT = 261 + LONGPOSTBOT_ACCOUNT = 1832 + ZOZBOT_ACCOUNT = 1833 + AUTOPOLLER_ACCOUNT = 3369 else: - NOTIFICATIONS_ACCOUNT = 1 - AUTOJANNY_ACCOUNT = 2 - SNAPPY_ACCOUNT = 3 - LONGPOSTBOT_ACCOUNT = 4 - ZOZBOT_ACCOUNT = 5 - AUTOPOLLER_ACCOUNT = 6 + NOTIFICATIONS_ACCOUNT = 1 + AUTOJANNY_ACCOUNT = 2 + SNAPPY_ACCOUNT = 3 + LONGPOSTBOT_ACCOUNT = 4 + ZOZBOT_ACCOUNT = 5 + AUTOPOLLER_ACCOUNT = 6 PUSHER_INSTANCE_ID = '02ddcc80-b8db-42be-9022-44c546b4dce6' PUSHER_KEY = environ.get("PUSHER_KEY", "").strip() \ No newline at end of file diff --git a/files/helpers/word_censor.py b/files/helpers/word_censor.py new file mode 100644 index 000000000..bae26fc67 --- /dev/null +++ b/files/helpers/word_censor.py @@ -0,0 +1,87 @@ +from collections import ChainMap +import re +from re import Match +from typing import List, Dict + +from files.helpers.const import SLURS + + +def first_upper(phrase: str) -> str: + """Converts the first character of the phrase to uppercase, not messing with the others""" + return phrase[0].upper() + phrase[1:] + + +def first_all_upper(phrase: str) -> str: + """Converts the first character of each word to uppercase, not messing with the others""" + if " " not in phrase: + return first_upper(phrase) + + return " ".join([first_upper(word) for word in phrase.split(" ")]) + + +def get_permutations_slur(slur: str, replacer: str = "_") -> Dict[str, str]: + """ + Given a slur and a replacer, it generates all the possible permutation on the original text and assigns them to the + corresponding substitution with case + """ + stripped = slur.strip() + is_link = replacer.startswith("http") # special case for the :marseymerchant: + + # the order the things are added into the dict is important, so that the 'Correctest' version is written last + result = { + stripped.upper(): replacer.upper() if not is_link else replacer, + first_all_upper(stripped): first_all_upper(replacer) if not is_link else replacer, + stripped.lower(): replacer, + stripped: replacer, + first_upper(stripped): first_upper(replacer) if not is_link else replacer, + } + + return result + + +def create_replace_map() -> Dict[str, str]: + """Creates the map that will be used to get the mathing replaced for the given slur""" + dicts = [get_permutations_slur(slur, replacer) for (slur, replacer) in SLURS.items()] + + # flattens the list of dict to a single dict + return dict(ChainMap(*dicts)) + + +REPLACE_MAP = create_replace_map() + + +def create_variations_slur_regex(slur: str) -> List[str]: + """For a given match generates the corresponding replacer""" + permutations = get_permutations_slur(slur) + + if slur.startswith(" ") and slur.endswith(" "): + return [rf"(\s|>)({perm})(\s|<)" for perm in permutations.keys()] + else: + return [rf"(\s|>)({perm})|({perm})(\s|<)" for perm in permutations.keys()] + + +def sub_matcher(match: Match) -> str: + # special case when it should match exact word + if len(match.groups()) == 3: + found = match.group(2) + replacer = REPLACE_MAP[found] + return match.group(1) + replacer + match.group(3) + + else: # normal case with prefix or suffix + found = match.group(2) if (match.group(2) is not None) else match.group(3) + replacer = REPLACE_MAP[found] + return (match.group(1) or '') + replacer + (match.group(4) or '') + + +def censor_slurs(body: str, logged_user) -> str: + if logged_user and not logged_user.slurreplacer: + return body + + for (slur, replace) in SLURS.items(): + for variation in create_variations_slur_regex(slur): + try: + body = re.sub(variation, sub_matcher, body) + except Exception as e: + print(e) + + return body diff --git a/requirements.txt b/requirements.txt index 5f3263376..d6588a58e 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,3 +1,4 @@ +assertpy beautifulsoup4 bleach Flask @@ -20,6 +21,7 @@ requests SQLAlchemy psycopg2-binary pusher_push_notifications +pytest youtube-dl yattag -webptools \ No newline at end of file +webptools diff --git a/test/files/helpers/test_word_censor.py b/test/files/helpers/test_word_censor.py new file mode 100644 index 000000000..1ee2c8922 --- /dev/null +++ b/test/files/helpers/test_word_censor.py @@ -0,0 +1,197 @@ +import re +from unittest.mock import patch + +from assertpy import assert_that + +from files.helpers import word_censor +from files.helpers.word_censor import create_variations_slur_regex, create_replace_map, censor_slurs, sub_matcher, \ + get_permutations_slur, first_upper, first_all_upper + + +def test_first_upper(): + assert_that(first_upper("USS liberty")).is_equal_to("USS liberty") + assert_that(first_upper("uss liberty")).is_equal_to("Uss liberty") + assert_that(first_upper("uss Liberty")).is_equal_to("Uss Liberty") + + +def test_first_all_upper(): + assert_that(first_all_upper("USS liberty")).is_equal_to("USS Liberty") + assert_that(first_all_upper("uss liberty")).is_equal_to("Uss Liberty") + assert_that(first_all_upper("uss Liberty")).is_equal_to("Uss Liberty") + + +def test_get_permutations_slur(): + expected = { + "USS liberty incident": "Tragic accident aboard the USS Liberty", + "uss liberty incident": "tragic accident aboard the USS Liberty", + "USS Liberty Incident": "Tragic Accident Aboard The USS Liberty", + "USS LIBERTY INCIDENT": "TRAGIC ACCIDENT ABOARD THE USS LIBERTY", + } + + result = get_permutations_slur("USS liberty incident", "tragic accident aboard the USS Liberty") + + assert_that(result).is_equal_to(expected) + + +def test_get_permutations_slur_wiht_link_replacer(): + expected = { + "kike": "https://sciencedirect.com/science/article/abs/pii/S016028960600033X", + "Kike": "https://sciencedirect.com/science/article/abs/pii/S016028960600033X", + "KIKE": "https://sciencedirect.com/science/article/abs/pii/S016028960600033X", + } + + result = get_permutations_slur("kike", "https://sciencedirect.com/science/article/abs/pii/S016028960600033X") + + assert_that(result).is_equal_to(expected) + + +def test_create_variations_slur_regex_for_slur_with_spaces(): + expected = [r"(\s|>)(retard)(\s|<)", + r"(\s|>)(Retard)(\s|<)", + r"(\s|>)(RETARD)(\s|<)"] + + result = create_variations_slur_regex(" retard ") + + assert_that(result).is_length(3).contains_only(*expected) + + +def test_create_variations_slur_regex_single_word(): + expected = [r"(\s|>)(retard)|(retard)(\s|<)", + r"(\s|>)(Retard)|(Retard)(\s|<)", + r"(\s|>)(RETARD)|(RETARD)(\s|<)"] + + result = create_variations_slur_regex("retard") + + assert_that(result).is_length(3).contains_only(*expected) + + +def test_create_variations_slur_regex_multiple_word(): + expected = [r"(\s|>)(kill yourself)|(kill yourself)(\s|<)", + r"(\s|>)(Kill yourself)|(Kill yourself)(\s|<)", + r"(\s|>)(Kill Yourself)|(Kill Yourself)(\s|<)", + r"(\s|>)(KILL YOURSELF)|(KILL YOURSELF)(\s|<)"] + result = create_variations_slur_regex("kill yourself") + + assert_that(result).is_length(4).contains_only(*expected) + + +@patch("files.helpers.word_censor.SLURS", { + "tranny": "🚂🚃🚃", + "kill yourself": "keep yourself safe", + "faggot": "cute twink", + "NoNewNormal": "NoNewNormal", + " nig ": "🏀", +}) +def test_create_replace_map(): + expected = { + "tranny": "🚂🚃🚃", + "Tranny": "🚂🚃🚃", + "TRANNY": "🚂🚃🚃", + "kill yourself": "keep yourself safe", + "Kill yourself": "Keep yourself safe", + "Kill Yourself": "Keep Yourself Safe", + "KILL YOURSELF": "KEEP YOURSELF SAFE", + "faggot": "cute twink", + "Faggot": "Cute twink", + "FAGGOT": "CUTE TWINK", + "NoNewNormal": "NoNewNormal", + "nonewnormal": "NoNewNormal", + "NONEWNORMAL": "NONEWNORMAL", + "nig": "🏀", + "Nig": "🏀", + "NIG": "🏀", + } + + result = create_replace_map() + + assert_that(result).is_equal_to(expected) + + +@patch("files.helpers.word_censor.REPLACE_MAP", {'retard': 'r-slur', 'NIG': '🏀'}) +def test_sub_matcher(): + match = re.search(r"(\s|>)(retard)|(retard)(\s|<)", "

retard

") + assert_that(sub_matcher(match)).is_equal_to(">r-slur") + + match = re.search(r"(\s|>)(retard)|(retard)(\s|<)", "

noretard

") + assert_that(sub_matcher(match)).is_equal_to("r-slur<") + + match = re.search(r"(\s|>)(NIG)(\s|<)", "

NIG

") + assert_that(sub_matcher(match)).is_equal_to(">🏀<") + + match = re.search(r"(\s|>)(NIG)(\s|<)", "

NIG

") + assert_that(sub_matcher(match)).is_equal_to(">🏀 ") + + +@patch("files.helpers.word_censor.SLURS", { + 'retard': 'r-slur', + 'manlet': 'little king', + ' nig ': '🏀', + 'i hate Carp': 'i love Carp', + 'kike': 'https://sciencedirect.com/science/article/abs/pii/S016028960600033X' +}) +def test_censor_slurs(): + word_censor.REPLACE_MAP = create_replace_map() + + assert_that(censor_slurs("

retard

", None)).is_equal_to("

r-slur

") + assert_that(censor_slurs("

preretard

", None)).is_equal_to("

prer-slur

") + assert_that(censor_slurs("that is Retarded like", None)).is_equal_to("that is R-slured like") + assert_that(censor_slurs("that is SUPERRETARD like", None)).is_equal_to("that is SUPERR-SLUR like") + assert_that(censor_slurs("

Manlets get out!

", None)).is_equal_to("

Little kings get out!

") + + assert_that(censor_slurs('... "retard" ...', None)).is_equal_to('... "retard" ...') + assert_that(censor_slurs('... ReTaRd ...', None)).is_equal_to('... ReTaRd ...') + assert_that(censor_slurs('... xretardx ...', None)).is_equal_to('... xretardx ...') + + assert_that(censor_slurs("LLM is a manlet hehe", None)).is_equal_to("LLM is a little king hehe") + assert_that(censor_slurs("LLM is :marseycapitalistmanlet: hehe", None)) \ + .is_equal_to("LLM is :marseycapitalistmanlet: hehe") + + assert_that(censor_slurs('... Nig ...', None)).is_equal_to('... 🏀 ...') + assert_that(censor_slurs('

NIG

', None)).is_equal_to('

🏀

') + assert_that(censor_slurs('... nigeria ...', None)).is_equal_to('... nigeria ...') + + assert_that(censor_slurs('... i hate Carp ...', None)).is_equal_to('... i love Carp ...') + assert_that(censor_slurs('... i hate carp ...', None)).is_equal_to('... i love Carp ...') + assert_that(censor_slurs('... I hate Carp ...', None)).is_equal_to('... I love Carp ...') + assert_that(censor_slurs('... I Hate Carp ...', None)).is_equal_to('... I Love Carp ...') + assert_that(censor_slurs('... I HATE CARP ...', None)).is_equal_to('... I LOVE CARP ...') + + # Not covered: + assert_that(censor_slurs('... I Hate carp ...', None)).is_equal_to('... I Hate carp ...') + assert_that(censor_slurs('... i Hate Carp ...', None)).is_equal_to('... i Hate Carp ...') + assert_that(censor_slurs('... i Hate carp ...', None)).is_equal_to('... i Hate carp ...') + + assert_that(censor_slurs('... i hate a carp ...', None)).is_equal_to('... i hate a carp ...') + + assert_that(censor_slurs("

retarded SuperManlet NIG

", None)) \ + .is_equal_to("

r-slured SuperLittle king 🏀

") + + assert_that(censor_slurs('... kike ...', None)) \ + .is_equal_to('... https://sciencedirect.com/science/article/abs/pii/S016028960600033X ...') + assert_that(censor_slurs('... Kike ...', None)) \ + .is_equal_to('... https://sciencedirect.com/science/article/abs/pii/S016028960600033X ...') + assert_that(censor_slurs('... KIKE ...', None)) \ + .is_equal_to('... https://sciencedirect.com/science/article/abs/pii/S016028960600033X ...') + + +@patch("files.helpers.word_censor.SLURS", {'retard': 'r-slur', 'manlet': 'little king', ' nig ': '🏀'}) +def test_censor_slurs_does_not_error_out_on_exception(): + word_censor.REPLACE_MAP = create_replace_map() + word_censor.REPLACE_MAP["Manlet"] = None + + assert_that(censor_slurs(">retarded SuperManlet NIG<", None)).is_equal_to(">r-slured SuperManlet 🏀<") + + +@patch("files.helpers.word_censor.SLURS", {'retard': 'r-slur', 'manlet': 'little king'}) +def test_censor_slurs_does_not_censor_on_flag_disabled(): + word_censor.REPLACE_MAP = create_replace_map() + + class User: + def __init__(self, slurreplacer): + self.slurreplacer = slurreplacer + + logger_user = User(slurreplacer=False) + assert_that(censor_slurs("

retard

", logger_user)).is_equal_to("

retard

") + + logger_user = User(slurreplacer=True) + assert_that(censor_slurs("

retard

", logger_user)).is_equal_to("

r-slur

")