Fix ping removal for @Username and people on the notified users list.
parent
a06c66dff4
commit
42b06c8bfc
2
model.py
2
model.py
|
@ -30,7 +30,7 @@ class StopAfterPlusIsGenerated(LogitsProcessor):
|
||||||
|
|
||||||
class Model:
|
class Model:
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
name = f"{config['data_dir']}/mpt-30b-drama"
|
name = "float-trip/mpt-30b-drama"
|
||||||
self.tokenizer = GPTNeoXTokenizerFast.from_pretrained(
|
self.tokenizer = GPTNeoXTokenizerFast.from_pretrained(
|
||||||
name, pad_token="<|endoftext|>"
|
name, pad_token="<|endoftext|>"
|
||||||
)
|
)
|
||||||
|
|
52
utils.py
52
utils.py
|
@ -11,41 +11,56 @@ URL_REGEX = (
|
||||||
r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
|
r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
|
||||||
)
|
)
|
||||||
|
|
||||||
tokenizer = GPTNeoXTokenizerFast.from_pretrained(f"{config['data_dir']}/mpt-30b-drama")
|
tokenizer = GPTNeoXTokenizerFast.from_pretrained("float-trip/mpt-30b-drama")
|
||||||
|
|
||||||
|
|
||||||
def remove_notifications(text):
|
def remove_notifications(text):
|
||||||
"""Change @float-trip to @<i></i>float-trip and carp to c<i></i>arp."""
|
"""Change @float-trip to <span>@</span>float-trip and carp to <span>c</span>arp."""
|
||||||
text = re.sub(rf"@(?!{config['username']}\b)", "@<i></i>", text)
|
text = re.sub(rf"@(?!{config['username']}\b)", "<span>@</span>", text)
|
||||||
|
|
||||||
notified_users = [
|
notified_users = [
|
||||||
"aevan",
|
"aevan",
|
||||||
"avean",
|
"avean",
|
||||||
"joan",
|
" capy",
|
||||||
"pewkie",
|
"the rodent",
|
||||||
"carp",
|
"carp",
|
||||||
"idio3",
|
"clit",
|
||||||
"idio ",
|
|
||||||
"the_homocracy",
|
|
||||||
"schizocel",
|
|
||||||
"scitzocel",
|
|
||||||
"snakes",
|
"snakes",
|
||||||
"sneks",
|
"sneks",
|
||||||
|
"snekky",
|
||||||
|
"snekchad",
|
||||||
"jc",
|
"jc",
|
||||||
"justcool",
|
"justcool",
|
||||||
"clit",
|
"lawlz",
|
||||||
"geese",
|
"transgirltradwife",
|
||||||
"kippy",
|
"impassionata",
|
||||||
"mccox",
|
"pizzashill",
|
||||||
"chiobu",
|
"idio3",
|
||||||
|
"idio ",
|
||||||
|
"telegram ",
|
||||||
|
"schizo",
|
||||||
|
"joan",
|
||||||
|
"pewkie",
|
||||||
|
"homocracy",
|
||||||
"donger",
|
"donger",
|
||||||
|
"geese",
|
||||||
"soren",
|
"soren",
|
||||||
|
"marseyismywaifu",
|
||||||
|
"mimw",
|
||||||
|
"heymoon",
|
||||||
|
"gaypoon",
|
||||||
|
"jollymoon",
|
||||||
|
"chiobu",
|
||||||
|
"mccox",
|
||||||
|
"august",
|
||||||
|
"marco",
|
||||||
|
"klen",
|
||||||
]
|
]
|
||||||
|
|
||||||
def replace(match):
|
def replace(match):
|
||||||
# Insert <i></i> after the first character of the matched string.
|
# Insert <span></span> around the first character of the matched string.
|
||||||
user = match.group()
|
user = match.group()
|
||||||
return f"{user[:1]}<i></i>{user[1:]}"
|
return f"<span>{user[:1]}</span>{user[1:]}"
|
||||||
|
|
||||||
for user in notified_users:
|
for user in notified_users:
|
||||||
text = re.sub(user, replace, text, flags=re.IGNORECASE)
|
text = re.sub(user, replace, text, flags=re.IGNORECASE)
|
||||||
|
@ -57,10 +72,11 @@ def format_reply(text):
|
||||||
for username in config["fake_usernames"]:
|
for username in config["fake_usernames"]:
|
||||||
text.replace(username, config["username"])
|
text.replace(username, config["username"])
|
||||||
text = replace_rdrama_images(text)
|
text = replace_rdrama_images(text)
|
||||||
|
text = remove_notifications(text)
|
||||||
return text.strip()
|
return text.strip()
|
||||||
|
|
||||||
|
|
||||||
def is_low_quality(reply, post, comments):
|
def is_low_quality(reply, _post, comments):
|
||||||
"""
|
"""
|
||||||
Label the reply as low quality if:
|
Label the reply as low quality if:
|
||||||
- The Levenshtein distance determines it's similar to a previous comment in the thread.
|
- The Levenshtein distance determines it's similar to a previous comment in the thread.
|
||||||
|
|
Loading…
Reference in New Issue