Fix ping removal for @Username and people on the notified users list.

2023-07-23 14:52:00 +00:00 · 2023-07-23 14:52:00 +00:00 · 42b06c8bfc
parent a06c66dff4
commit 42b06c8bfc
2 changed files with 35 additions and 19 deletions
--- a/model.py
+++ b/model.py
@ -30,7 +30,7 @@ class StopAfterPlusIsGenerated(LogitsProcessor):

 class Model:
    def __init__(self):
-        name = f"{config['data_dir']}/mpt-30b-drama"
+        name = "float-trip/mpt-30b-drama"
        self.tokenizer = GPTNeoXTokenizerFast.from_pretrained(
            name, pad_token="<|endoftext|>"
        )
--- a/utils.py
+++ b/utils.py
@ -11,41 +11,56 @@ URL_REGEX = (
    r"http[s]?://(?:[a-zA-Z]|[0-9]|[$-_@.&+]|[!*\\(\\),]|(?:%[0-9a-fA-F][0-9a-fA-F]))+"
 )

-tokenizer = GPTNeoXTokenizerFast.from_pretrained(f"{config['data_dir']}/mpt-30b-drama")
+tokenizer = GPTNeoXTokenizerFast.from_pretrained("float-trip/mpt-30b-drama")


 def remove_notifications(text):
-    """Change @float-trip to @<i></i>float-trip and carp to c<i></i>arp."""
-    text = re.sub(rf"@(?!{config['username']}\b)", "@<i></i>", text)
+    """Change @float-trip to <span>@</span>float-trip and carp to <span>c</span>arp."""
+    text = re.sub(rf"@(?!{config['username']}\b)", "<span>@</span>", text)

    notified_users = [
        "aevan",
        "avean",
-        "joan",
-        "pewkie",
+        " capy",
+        "the rodent",
        "carp",
-        "idio3",
-        "idio ",
-        "the_homocracy",
-        "schizocel",
-        "scitzocel",
+        "clit",
        "snakes",
        "sneks",
+        "snekky",
+        "snekchad",
        "jc",
        "justcool",
-        "clit",
-        "geese",
-        "kippy",
-        "mccox",
-        "chiobu",
+        "lawlz",
+        "transgirltradwife",
+        "impassionata",
+        "pizzashill",
+        "idio3",
+        "idio ",
+        "telegram ",
+        "schizo",
+        "joan",
+        "pewkie",
+        "homocracy",
        "donger",
+        "geese",
        "soren",
+        "marseyismywaifu",
+        "mimw",
+        "heymoon",
+        "gaypoon",
+        "jollymoon",
+        "chiobu",
+        "mccox",
+        "august",
+        "marco",
+        "klen",
    ]

    def replace(match):
-        # Insert <i></i> after the first character of the matched string.
+        # Insert <span></span> around the first character of the matched string.
        user = match.group()
-        return f"{user[:1]}<i></i>{user[1:]}"
+        return f"<span>{user[:1]}</span>{user[1:]}"

    for user in notified_users:
        text = re.sub(user, replace, text, flags=re.IGNORECASE)
@ -57,10 +72,11 @@ def format_reply(text):
    for username in config["fake_usernames"]:
        text.replace(username, config["username"])
    text = replace_rdrama_images(text)
+    text = remove_notifications(text)
    return text.strip()


-def is_low_quality(reply, post, comments):
+def is_low_quality(reply, _post, comments):
    """
    Label the reply as low quality if:
    - The Levenshtein distance determines it's similar to a previous comment in the thread.