Change comment search to use tsvector.

2022-10-02 04:55:39 -04:00 · 2022-10-02 04:55:39 -04:00 · 701f64c91b
parent f28e605a58
commit 701f64c91b
4 changed files with 13 additions and 28 deletions
--- a/files/classes/comment.py
+++ b/files/classes/comment.py
@ -4,6 +4,7 @@ from urllib.parse import urlencode, urlparse, parse_qs
 from flask import *
 from sqlalchemy import *
 from sqlalchemy.orm import relationship
+from sqlalchemy.dialects.postgresql import TSVECTOR
 from files.__main__ import Base
 from files.classes.votes import CommentVote
 from files.helpers.const import *
@ -59,6 +60,7 @@ class Comment(Base):
 	realupvotes = Column(Integer, default=1)
 	body = Column(String)
 	body_html = Column(String)
+	body_ts = Column(TSVECTOR)
 	ban_reason = Column(String)
 	wordle_result = Column(String)
 	treasure_amount = Column(String)
--- a/files/routes/search.py
+++ b/files/routes/search.py
@ -14,11 +14,10 @@ valid_params = [
 	'author',
 	'domain',
 	'over18',
-	"post",
-	"before",
-	"after",
-	"title",
-	"exact",
+	'post',
+	'before',
+	'after',
+	'title',
 	search_operator_hole,
 ]

@ -90,14 +89,7 @@ def searchposts(v):
 								)
 		else: posts = posts.filter(Submission.author_id == author.id)

-	if 'exact' in criteria and 'full_text' in criteria:
-		regex_str = '[[:<:]]'+criteria['full_text']+'[[:>:]]' # https://docs.oracle.com/cd/E17952_01/mysql-5.5-en/regexp.html "word boundaries"
-		if 'title' in criteria:
-			words = [Submission.title.regexp_match(regex_str)]
-		else:
-			words = [or_(Submission.title.regexp_match(regex_str), Submission.body.regexp_match(regex_str))]
-		posts = posts.filter(*words)
-	elif 'q' in criteria:
+	if 'q' in criteria:
 		if('title' in criteria):
 			words = [or_(Submission.title.ilike('%'+x+'%')) \
 					for x in criteria['q']]
@ -183,9 +175,6 @@ def searchposts(v):
@app.get("/search/comments")
@auth_required
 def searchcomments(v):
-
-	return {"error": "Searching comments is disabled temporarily."}, 403
-
 	query = request.values.get("q", '').strip()

 	try: page = max(1, int(request.values.get("page", 1)))
@ -217,14 +206,9 @@ def searchcomments(v):

 		else: comments = comments.filter(Comment.author_id == author.id)

-	if 'exact' in criteria and 'full_text' in criteria:
-		regex_str = '[[:<:]]'+criteria['full_text']+'[[:>:]]' # https://docs.oracle.com/cd/E17952_01/mysql-5.5-en/regexp.html "word boundaries"
-		words = [Comment.body.regexp_match(regex_str)]
-		comments = comments.filter(*words)
-	elif 'q' in criteria:
-		words = [or_(Comment.body.ilike('%'+x+'%')) \
-				for x in criteria['q']]
-		comments = comments.filter(*words)
+	if 'q' in criteria:
+		comments = comments.filter(Comment.body_ts.op('@@')(
+			func.plainto_tsquery(' & '.join(criteria['q']))))

 	if 'over18' in criteria: comments = comments.filter(Comment.over_18 == True)

--- a/files/templates/search.html
+++ b/files/templates/search.html
@ -76,10 +76,6 @@
 										<button onClick="addParam()" class="searchparam mb-1">title:true</button> 
 									</div>
 								{% endif %}
-								<div>
-									<div style="display: inline-block; width: 150px; text-align: center;">Exact Match Only:</div>
-									<button onClick="addParam()" class="searchparam mb-1">exact:true</button>
-								</div>
 							</div>
 						</div>
 						<br>
--- a/sql/20221002-body-tsvector.sql
+++ b/sql/20221002-body-tsvector.sql
@ -0,0 +1,3 @@
+ALTER TABLE public.comments ADD COLUMN body_ts tsvector
+    GENERATED ALWAYS AS (to_tsvector('english', body)) STORED;
+CREATE INDEX comments_body_ts_idx ON public.comments USING GIN (body_ts);