showmore on too many newlines (works now) (#90)

At this point I think we should stop abusing regexes to implement the showmore logic. I also reduced the character constant to 3000/2500 and increased the line constant to 20 but I don't have any strong feelings about them. Co-authored-by: official-techsupport <official_techsupport@protonmail.com> Reviewed-on: #90 Co-authored-by: official-techsupport <official-techsupport@noreply.fsdfsd.net> Co-committed-by: official-techsupport <official-techsupport@noreply.fsdfsd.net>
2023-01-22 23:27:24 +00:00 · 2023-01-22 23:27:24 +00:00 · 5902fa5822
parent b1131fc64b
commit 5902fa5822
2 changed files with 15 additions and 7 deletions
--- a/files/helpers/regex.py
+++ b/files/helpers/regex.py
@ -105,10 +105,9 @@ reddit_domain_regex = re.compile("(^|\s|\()https?:\/\/(reddit\.com|(?:(?:[A-z]{2

 color_regex = re.compile("[a-f0-9]{6}", flags=re.A)

-# lazy match on the {}?, only match if there is trailing stuff
+# lazy match on the .*?, only match if there is trailing stuff
 # Specifically match Snappy's way of formatting, this might break some losers' comments.
-# showmore_regex = re.compile(r"^((.{3000,}?|(.*?<.*?){10,})?<\/p>(?:<\/li><\/ul>)?)(\s*<p>.*)", flags=re.A|re.DOTALL)
-showmore_regex = re.compile(r"^(.{3000,}?</p>(?:</li></ul>)?)(\s*<p>.*)", flags=re.A|re.DOTALL)
+showmore_regex = re.compile(r"^(.*?</p>(?:</li></ul>)?)(\s*<p>.*)", flags=re.A|re.DOTALL)

 search_token_regex = re.compile('"([^"]*)"|(\S+)', flags=re.A)

--- a/files/helpers/sanitize.py
+++ b/files/helpers/sanitize.py
@ -410,10 +410,19 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=True, count_marseys
 	if '<pre>' not in sanitized and not sidebar:
 		sanitized = sanitized.replace('\n','')

-	# if showmore and len(sanitized) > 3500 or sanitized.count('<') > 15:
-	# 	sanitized = showmore_regex.sub(r'\1<p><button class="showmore">SHOW MORE</button></p><d class="d-none">\4</d>', sanitized, count=1)
-	if showmore and len(sanitized) > 3500:
-		sanitized = showmore_regex.sub(r'\1<p><button class="showmore">SHOW MORE</button></p><d class="d-none">\2</d>', sanitized, count=1)
+	if showmore:
+		# Insert a show more button if the text is too long or has too many paragraphs
+		pos = 0
+		for _ in range(20):
+			pos = sanitized.find('</p>', pos + 4)
+			if pos < 0:
+				break
+		if pos < 0 and len(sanitized) > 3000:
+			pos = 2500
+		if pos >= 0:
+			sanitized = (sanitized[:pos] +
+				showmore_regex.sub(r'\1<p><button class="showmore">SHOW MORE</button></p><d class="d-none">\2</d>',
+					sanitized[pos:], count=1))

 	return sanitized.strip()