migrate images again

2023-05-16 11:47:16 +03:00 · 2023-05-16 11:47:16 +03:00 · 240bd7354c
parent 638111906e
commit 240bd7354c
2 changed files with 88 additions and 0 deletions
--- a/files/helpers/media.py
+++ b/files/helpers/media.py
@ -183,6 +183,10 @@ def process_image(filename:str, v, resize=0, trim=False, uploader_id:Optional[in
 	# if an image is too large or webp conversion fails, it'll crash
 	# to avoid this, we'll simply return None instead
 	has_request = has_request_context()
+
+	if request.path == '/admin/images':
+		has_request = False
+
 	size = os.stat(filename).st_size
 	patron = bool(v.patron)

--- a/files/routes/admin.py
+++ b/files/routes/admin.py
@ -25,6 +25,90 @@ from files.routes.routehelpers import get_alt_graph, get_alt_graph_ids

 from .front import frontlist, comment_idlist

+mylist2 = [
+	(User, User.bio, "bio"),
+	(Submission, Submission.body, "body"),
+	(Comment, Comment.body, "body"),
+]
+SITES = '(i.imgur.com|i.imgur.io|i.ibb.co|files.catbox.moe|pomf2.lain.la\/f)'
+images4_regex = re.compile(f'https:\/\/{SITES}\/([a-zA-Z0-9/]*?)(_d)?\.(webp|png|jpg|jpeg|gif)[a-z0-9=&?;]*', flags=re.A)
+@app.get('/admin/images')
+@admin_level_required(5)
+def images(v):
+	for cls, attr, attrname in mylist2:
+		print(f'{cls.__name__}.{attrname}\n---------------------------', flush=True)
+		items = g.db.query(cls).options(load_only(cls.id, attr)).filter(
+				attr.op('similar to')(f'%https://{SITES}/%.(webp|png|jpg|jpeg|gif)%'),
+			).order_by(func.random()).all()
+		if not items: continue
+
+		total = len(items)
+		for x, i in enumerate(items):
+			attribute = getattr(i, attrname)
+			if attribute.startswith(f'https://{SITE_IMAGES}/images/'):
+				continue
+			if not len(list(images4_regex.finditer(attribute))):
+				continue
+
+			print(f'{x+1}/{total}: {i.id}', flush=True)
+
+			captured = set()
+			for y in images4_regex.finditer(attribute):
+				site = y.group(1)
+				id = y.group(2)
+				if id in captured: continue
+				captured.add(id)
+
+				ext = y.group(4)
+
+				url = f'https://{site}/{id}.{ext}'
+				try: image_req = requests.get(url, headers=HEADERS, timeout=5)
+				except: continue
+			
+				if image_req.status_code >= 400:
+					print(f"ERROR CODE: {image_req.status_code}", flush=True)
+					continue
+				if not image_req.headers.get("Content-Type","").startswith("image/"):
+					print("NOT IMAGE/", flush=True)
+					continue
+				if image_req.headers.get("Content-Type","").startswith("image/svg"):
+					print("IMAGE/SVG", flush=True)
+					continue
+
+				name = f'/images/{time.time()}'.replace('.','') + '.webp'
+
+				with open(name, "wb") as file:
+					for chunk in image_req.iter_content(1024):
+						file.write(chunk)
+
+				if ext != 'webp':
+					try: process_image(name, v)
+					except: continue
+
+				if not os.path.exists(name):
+					continue
+
+				size = os.stat(name).st_size
+				print(size, flush=True)
+				if not size: continue
+
+				new_url = f"https://{SITE_IMAGES}{name}"
+
+				attribute = attribute.replace(y.group(0), new_url)
+				setattr(i, attrname, attribute)
+			
+			try: setattr(i, f'{attrname}_html', sanitize(attribute))
+			except:
+				g.db.rollback()
+				pass
+
+			g.db.add(i)
+			g.db.commit()
+
+		print('done!!!!!', flush=True)
+		time.sleep(5)
+	return "success"
+
@app.get('/admin/loggedin')
@limiter.limit(DEFAULT_RATELIMIT)
@limiter.limit(DEFAULT_RATELIMIT, key_func=get_ID)