forked from MarseyWorld/MarseyWorld
total tracking parameter death
parent
d4653f824a
commit
3330c2517f
|
@ -58,9 +58,6 @@ snappy_youtube_regex = re.compile('<lite-youtube videoid="(.+?)" params="autopla
|
||||||
|
|
||||||
email_regex = re.compile(EMAIL_REGEX_PATTERN, flags=re.A)
|
email_regex = re.compile(EMAIL_REGEX_PATTERN, flags=re.A)
|
||||||
|
|
||||||
utm_regex = re.compile('utm_[0-z]+=[0-z_.]+&', flags=re.A)
|
|
||||||
utm_regex2 = re.compile('[?&]utm_[0-z]+=[0-z_.]+', flags=re.A)
|
|
||||||
|
|
||||||
slur_regex = re.compile(f"<[^>]*>|{slur_single_words}", flags=re.I|re.A)
|
slur_regex = re.compile(f"<[^>]*>|{slur_single_words}", flags=re.I|re.A)
|
||||||
slur_regex_upper = re.compile(f"<[^>]*>|{slur_single_words.upper()}", flags=re.A)
|
slur_regex_upper = re.compile(f"<[^>]*>|{slur_single_words.upper()}", flags=re.A)
|
||||||
profanity_regex = re.compile(f"<[^>]*>|{profanity_single_words}", flags=re.I|re.A)
|
profanity_regex = re.compile(f"<[^>]*>|{profanity_single_words}", flags=re.I|re.A)
|
||||||
|
|
|
@ -378,11 +378,6 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis
|
||||||
if blackjack and execute_blackjack(g.v, None, sanitized, blackjack):
|
if blackjack and execute_blackjack(g.v, None, sanitized, blackjack):
|
||||||
sanitized = 'g'
|
sanitized = 'g'
|
||||||
|
|
||||||
sanitized = utm_regex.sub('', sanitized)
|
|
||||||
sanitized = utm_regex2.sub('', sanitized)
|
|
||||||
|
|
||||||
sanitized = normalize_url(sanitized)
|
|
||||||
|
|
||||||
if '```' not in sanitized and '<pre>' not in sanitized:
|
if '```' not in sanitized and '<pre>' not in sanitized:
|
||||||
sanitized = linefeeds_regex.sub(r'\1\n\n\2', sanitized)
|
sanitized = linefeeds_regex.sub(r'\1\n\n\2', sanitized)
|
||||||
|
|
||||||
|
@ -559,6 +554,8 @@ def sanitize(sanitized, golden=True, limit_pings=0, showmore=False, count_emojis
|
||||||
href = link.get("href")
|
href = link.get("href")
|
||||||
if not href: continue
|
if not href: continue
|
||||||
|
|
||||||
|
href = normalize_url(href)
|
||||||
|
|
||||||
def unlinkfy():
|
def unlinkfy():
|
||||||
link.string = href
|
link.string = href
|
||||||
del link["href"]
|
del link["href"]
|
||||||
|
@ -665,7 +662,21 @@ def filter_emojis_only(title, golden=True, count_emojis=False, graceful=False):
|
||||||
else:
|
else:
|
||||||
return title.strip()
|
return title.strip()
|
||||||
|
|
||||||
|
def is_whitelisted(domain, k):
|
||||||
|
if k.lower().endswith('id'):
|
||||||
|
return True
|
||||||
|
if 'sort' in k.lower() or 'query' in k.lower():
|
||||||
|
return True
|
||||||
|
if k in {'v','context','q','page','time_continue','title','scrollToComments','u','url'}:
|
||||||
|
return True
|
||||||
|
if k == 't' and domain != 'youtube.com':
|
||||||
|
return True
|
||||||
|
return False
|
||||||
|
|
||||||
|
|
||||||
def normalize_url(url):
|
def normalize_url(url):
|
||||||
|
url = unquote(url)
|
||||||
|
|
||||||
url = reddit_domain_regex.sub(r'\1https://old.reddit.com/\3/', url)
|
url = reddit_domain_regex.sub(r'\1https://old.reddit.com/\3/', url)
|
||||||
|
|
||||||
url = url.replace("https://youtu.be/", "https://youtube.com/watch?v=") \
|
url = url.replace("https://youtu.be/", "https://youtube.com/watch?v=") \
|
||||||
|
@ -685,10 +696,25 @@ def normalize_url(url):
|
||||||
.replace("https://nitter.42l.fr/", "https://twitter.com/") \
|
.replace("https://nitter.42l.fr/", "https://twitter.com/") \
|
||||||
.replace("https://nitter.lacontrevoie.fr/", "https://twitter.com/") \
|
.replace("https://nitter.lacontrevoie.fr/", "https://twitter.com/") \
|
||||||
.replace("/giphy.gif", "/giphy.webp") \
|
.replace("/giphy.gif", "/giphy.webp") \
|
||||||
.replace('https://old.reddit.com/r/place/?', 'https://new.reddit.com/r/place/?') \
|
|
||||||
|
url = giphy_regex.sub(r'\1.webp', url)
|
||||||
|
|
||||||
|
if not url.startswith('/'):
|
||||||
|
parsed_url = urlparse(url)
|
||||||
|
domain = parsed_url.netloc
|
||||||
|
qd = parse_qs(parsed_url.query, keep_blank_values=True)
|
||||||
|
filtered = {k: val for k, val in qd.items() if is_whitelisted(domain, k)}
|
||||||
|
new_url = ParseResult(scheme="https",
|
||||||
|
netloc=parsed_url.netloc,
|
||||||
|
path=parsed_url.path,
|
||||||
|
params=parsed_url.params,
|
||||||
|
query=urlencode(filtered, doseq=True),
|
||||||
|
fragment=parsed_url.fragment)
|
||||||
|
url = urlunparse(new_url)
|
||||||
|
|
||||||
|
url = url.rstrip('/')
|
||||||
|
|
||||||
url = imgur_regex.sub(r'\1_d.webp?maxwidth=9999&fidelity=grand', url)
|
url = imgur_regex.sub(r'\1_d.webp?maxwidth=9999&fidelity=grand', url)
|
||||||
url = giphy_regex.sub(r'\1.webp', url)
|
|
||||||
|
|
||||||
return url
|
return url
|
||||||
|
|
||||||
|
|
|
@ -5,7 +5,7 @@ from io import BytesIO
|
||||||
from os import path
|
from os import path
|
||||||
from shutil import copyfile
|
from shutil import copyfile
|
||||||
from sys import stdout
|
from sys import stdout
|
||||||
from urllib.parse import ParseResult, urlparse, urlunparse, unquote
|
from urllib.parse import urlparse
|
||||||
|
|
||||||
import gevent
|
import gevent
|
||||||
import requests
|
import requests
|
||||||
|
@ -397,30 +397,6 @@ def is_repost(v):
|
||||||
abort(400)
|
abort(400)
|
||||||
|
|
||||||
url = normalize_url(url)
|
url = normalize_url(url)
|
||||||
url = unquote(url)
|
|
||||||
parsed_url = urlparse(url)
|
|
||||||
|
|
||||||
domain = parsed_url.netloc
|
|
||||||
if domain in {'old.reddit.com','twitter.com','instagram.com','tiktok.com'} and '/search' not in url:
|
|
||||||
new_url = ParseResult(scheme="https",
|
|
||||||
netloc=parsed_url.netloc,
|
|
||||||
path=parsed_url.path,
|
|
||||||
params=parsed_url.params,
|
|
||||||
query=None,
|
|
||||||
fragment=parsed_url.fragment)
|
|
||||||
else:
|
|
||||||
qd = parse_qs(parsed_url.query, keep_blank_values=True)
|
|
||||||
filtered = {k: val for k, val in qd.items() if not k.startswith('utm_') and not k.startswith('ref_')}
|
|
||||||
|
|
||||||
new_url = ParseResult(scheme="https",
|
|
||||||
netloc=parsed_url.netloc,
|
|
||||||
path=parsed_url.path,
|
|
||||||
params=parsed_url.params,
|
|
||||||
query=urlencode(filtered, doseq=True),
|
|
||||||
fragment=parsed_url.fragment)
|
|
||||||
|
|
||||||
url = urlunparse(new_url)
|
|
||||||
url = url.rstrip('/')
|
|
||||||
|
|
||||||
search_url = url.replace('%', '').replace('\\', '').replace('_', '\_').strip()
|
search_url = url.replace('%', '').replace('\\', '').replace('_', '\_').strip()
|
||||||
repost = g.db.query(Post).filter(
|
repost = g.db.query(Post).filter(
|
||||||
|
@ -491,31 +467,6 @@ def submit_post(v, sub=None):
|
||||||
|
|
||||||
if url:
|
if url:
|
||||||
url = normalize_url(url)
|
url = normalize_url(url)
|
||||||
url = unquote(url)
|
|
||||||
parsed_url = urlparse(url)
|
|
||||||
|
|
||||||
domain = parsed_url.netloc
|
|
||||||
if domain in {'old.reddit.com','twitter.com','instagram.com','tiktok.com'} and '/search' not in url:
|
|
||||||
new_url = ParseResult(scheme="https",
|
|
||||||
netloc=parsed_url.netloc,
|
|
||||||
path=parsed_url.path,
|
|
||||||
params=parsed_url.params,
|
|
||||||
query=None,
|
|
||||||
fragment=parsed_url.fragment)
|
|
||||||
else:
|
|
||||||
qd = parse_qs(parsed_url.query, keep_blank_values=True)
|
|
||||||
filtered = {k: val for k, val in qd.items() if not k.startswith('utm_') and not k.startswith('ref_')}
|
|
||||||
|
|
||||||
new_url = ParseResult(scheme="https",
|
|
||||||
netloc=parsed_url.netloc,
|
|
||||||
path=parsed_url.path,
|
|
||||||
params=parsed_url.params,
|
|
||||||
query=urlencode(filtered, doseq=True),
|
|
||||||
fragment=parsed_url.fragment)
|
|
||||||
|
|
||||||
url = urlunparse(new_url)
|
|
||||||
|
|
||||||
url = url.rstrip('/')
|
|
||||||
|
|
||||||
if v.admin_level < PERMS["IGNORE_DOMAIN_BAN"]:
|
if v.admin_level < PERMS["IGNORE_DOMAIN_BAN"]:
|
||||||
y = tldextract.extract(url).registered_domain + parsed_url.path
|
y = tldextract.extract(url).registered_domain + parsed_url.path
|
||||||
|
|
Loading…
Reference in New Issue