Fix ping removal for @Username and people on the notified users list.

master
float-trip 2023-07-23 14:52:00 +00:00
parent a06c66dff4
commit 42b06c8bfc
2 changed files with 35 additions and 19 deletions

View File

@ -30,7 +30,7 @@ class StopAfterPlusIsGenerated(LogitsProcessor):
class Model: class Model:
def __init__(self): def __init__(self):
name = f"{config['data_dir']}/mpt-30b-drama" name = "float-trip/mpt-30b-drama"
self.tokenizer = GPTNeoXTokenizerFast.from_pretrained( self.tokenizer = GPTNeoXTokenizerFast.from_pretrained(
name, pad_token="<|endoftext|>" name, pad_token="<|endoftext|>"
) )

View File

@ -11,41 +11,56 @@ URL_REGEX = (
r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+" r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
) )
tokenizer = GPTNeoXTokenizerFast.from_pretrained(f"{config['data_dir']}/mpt-30b-drama") tokenizer = GPTNeoXTokenizerFast.from_pretrained("float-trip/mpt-30b-drama")
def remove_notifications(text): def remove_notifications(text):
"""Change @float-trip to @<i></i>float-trip and carp to c<i></i>arp.""" """Change @float-trip to <span>@</span>float-trip and carp to <span>c</span>arp."""
text = re.sub(rf"@(?!{config['username']}\b)", "@<i></i>", text) text = re.sub(rf"@(?!{config['username']}\b)", "<span>@</span>", text)
notified_users = [ notified_users = [
"aevan", "aevan",
"avean", "avean",
"joan", " capy",
"pewkie", "the rodent",
"carp", "carp",
"idio3", "clit",
"idio ",
"the_homocracy",
"schizocel",
"scitzocel",
"snakes", "snakes",
"sneks", "sneks",
"snekky",
"snekchad",
"jc", "jc",
"justcool", "justcool",
"clit", "lawlz",
"geese", "transgirltradwife",
"kippy", "impassionata",
"mccox", "pizzashill",
"chiobu", "idio3",
"idio ",
"telegram ",
"schizo",
"joan",
"pewkie",
"homocracy",
"donger", "donger",
"geese",
"soren", "soren",
"marseyismywaifu",
"mimw",
"heymoon",
"gaypoon",
"jollymoon",
"chiobu",
"mccox",
"august",
"marco",
"klen",
] ]
def replace(match): def replace(match):
# Insert <i></i> after the first character of the matched string. # Insert <span></span> around the first character of the matched string.
user = match.group() user = match.group()
return f"{user[:1]}<i></i>{user[1:]}" return f"<span>{user[:1]}</span>{user[1:]}"
for user in notified_users: for user in notified_users:
text = re.sub(user, replace, text, flags=re.IGNORECASE) text = re.sub(user, replace, text, flags=re.IGNORECASE)
@ -57,10 +72,11 @@ def format_reply(text):
for username in config["fake_usernames"]: for username in config["fake_usernames"]:
text.replace(username, config["username"]) text.replace(username, config["username"])
text = replace_rdrama_images(text) text = replace_rdrama_images(text)
text = remove_notifications(text)
return text.strip() return text.strip()
def is_low_quality(reply, post, comments): def is_low_quality(reply, _post, comments):
""" """
Label the reply as low quality if: Label the reply as low quality if:
- The Levenshtein distance determines it's similar to a previous comment in the thread. - The Levenshtein distance determines it's similar to a previous comment in the thread.