diff --git a/files/helpers/media.py b/files/helpers/media.py index f393e8eb9..5734b14c2 100644 --- a/files/helpers/media.py +++ b/files/helpers/media.py @@ -183,6 +183,10 @@ def process_image(filename:str, v, resize=0, trim=False, uploader_id:Optional[in # if an image is too large or webp conversion fails, it'll crash # to avoid this, we'll simply return None instead has_request = has_request_context() + + if request.path == '/admin/images': + has_request = False + size = os.stat(filename).st_size patron = bool(v.patron) diff --git a/files/routes/admin.py b/files/routes/admin.py index f149d6232..3da65b7ab 100644 --- a/files/routes/admin.py +++ b/files/routes/admin.py @@ -25,6 +25,90 @@ from files.routes.routehelpers import get_alt_graph, get_alt_graph_ids from .front import frontlist, comment_idlist +mylist2 = [ + (User, User.bio, "bio"), + (Submission, Submission.body, "body"), + (Comment, Comment.body, "body"), +] +SITES = '(i.imgur.com|i.imgur.io|i.ibb.co|files.catbox.moe|pomf2.lain.la\/f)' +images4_regex = re.compile(f'https:\/\/{SITES}\/([a-zA-Z0-9/]*?)(_d)?\.(webp|png|jpg|jpeg|gif)[a-z0-9=&?;]*', flags=re.A) +@app.get('/admin/images') +@admin_level_required(5) +def images(v): + for cls, attr, attrname in mylist2: + print(f'{cls.__name__}.{attrname}\n---------------------------', flush=True) + items = g.db.query(cls).options(load_only(cls.id, attr)).filter( + attr.op('similar to')(f'%https://{SITES}/%.(webp|png|jpg|jpeg|gif)%'), + ).order_by(func.random()).all() + if not items: continue + + total = len(items) + for x, i in enumerate(items): + attribute = getattr(i, attrname) + if attribute.startswith(f'https://{SITE_IMAGES}/images/'): + continue + if not len(list(images4_regex.finditer(attribute))): + continue + + print(f'{x+1}/{total}: {i.id}', flush=True) + + captured = set() + for y in images4_regex.finditer(attribute): + site = y.group(1) + id = y.group(2) + if id in captured: continue + captured.add(id) + + ext = y.group(4) + + url = f'https://{site}/{id}.{ext}' + try: image_req = requests.get(url, headers=HEADERS, timeout=5) + except: continue + + if image_req.status_code >= 400: + print(f"ERROR CODE: {image_req.status_code}", flush=True) + continue + if not image_req.headers.get("Content-Type","").startswith("image/"): + print("NOT IMAGE/", flush=True) + continue + if image_req.headers.get("Content-Type","").startswith("image/svg"): + print("IMAGE/SVG", flush=True) + continue + + name = f'/images/{time.time()}'.replace('.','') + '.webp' + + with open(name, "wb") as file: + for chunk in image_req.iter_content(1024): + file.write(chunk) + + if ext != 'webp': + try: process_image(name, v) + except: continue + + if not os.path.exists(name): + continue + + size = os.stat(name).st_size + print(size, flush=True) + if not size: continue + + new_url = f"https://{SITE_IMAGES}{name}" + + attribute = attribute.replace(y.group(0), new_url) + setattr(i, attrname, attribute) + + try: setattr(i, f'{attrname}_html', sanitize(attribute)) + except: + g.db.rollback() + pass + + g.db.add(i) + g.db.commit() + + print('done!!!!!', flush=True) + time.sleep(5) + return "success" + @app.get('/admin/loggedin') @limiter.limit(DEFAULT_RATELIMIT) @limiter.limit(DEFAULT_RATELIMIT, key_func=get_ID)