115 lines
3.3 KiB
Python
115 lines
3.3 KiB
Python
import random
|
|
import os
|
|
from glob import glob
|
|
from PIL import Image
|
|
|
|
TRAINING_SIZE = 768
|
|
MARSEY_SIZE = TRAINING_SIZE * 0.65 # Resize the Marsey to this width/height.
|
|
|
|
|
|
# Apply the alpha channel from marseys/?????.png to upscaled/?????_Swin2SR.png
|
|
def remask(upscaled_path):
|
|
basename = upscaled_path.split("/")[-1]
|
|
# Strip the _Swin2SR suffix.
|
|
original_basename = basename.split("_Swin2SR")[0] + ".png"
|
|
original = Image.open(f"marseys/{original_basename}")
|
|
upscaled = Image.open(upscaled_path)
|
|
|
|
try:
|
|
mask = original.getchannel("A").resize((upscaled.width, upscaled.height))
|
|
upscaled.putalpha(mask)
|
|
return (original_basename, upscaled)
|
|
except ValueError as e: # Usually means it's animated.
|
|
print(e, upscaled_path)
|
|
|
|
|
|
# Remasks the images in upscaled/ with alpha masks from marseys/, resizes the result,
|
|
# and saves a copy with both a random background color and a white background.
|
|
def generate_training_set():
|
|
backgrounds = [
|
|
"#fabfb7",
|
|
"#ffda9e",
|
|
"#c5c6c8",
|
|
"#b2e2f2",
|
|
"#b0c2f2",
|
|
"#b0f2c2",
|
|
"#fdcae1",
|
|
"#8f7193",
|
|
"#77DD77",
|
|
"#836953",
|
|
"#89cff0",
|
|
"#99c5c4",
|
|
"#9adedb",
|
|
"#aa9499",
|
|
"#aaf0d1",
|
|
"#b2fba5",
|
|
"#b39eb5",
|
|
"#bdb0d0",
|
|
"#bee7a5",
|
|
"#befd73",
|
|
"#c1c6fc",
|
|
"#c6a4a4",
|
|
"#cb99c9",
|
|
"#ff6961",
|
|
"#ff694f",
|
|
"#ff9899",
|
|
"#ffb7ce",
|
|
"#ca9bf7",
|
|
]
|
|
|
|
if not os.path.exists("training-white"):
|
|
os.mkdir("training-white")
|
|
|
|
if not os.path.exists("training-colors"):
|
|
os.mkdir("training-colors")
|
|
|
|
remasked_images = [remask(path) for path in glob("upscaled/*.png")]
|
|
remasked_images = [i for i in remasked_images if i is not None]
|
|
|
|
for basename, marsey in remasked_images:
|
|
max_dim = max(marsey.size)
|
|
scale_factor = 1
|
|
|
|
if max_dim > MARSEY_SIZE:
|
|
scale_factor = MARSEY_SIZE / max_dim
|
|
|
|
marsey = marsey.resize(
|
|
(
|
|
round(marsey.size[0] * scale_factor),
|
|
round(marsey.size[1] * scale_factor),
|
|
),
|
|
resample=Image.Resampling.LANCZOS,
|
|
)
|
|
|
|
box = (
|
|
TRAINING_SIZE // 2 - marsey.size[0] // 2,
|
|
TRAINING_SIZE // 2 - marsey.size[1] // 2,
|
|
)
|
|
|
|
white_bg = Image.new("RGBA", (TRAINING_SIZE, TRAINING_SIZE), "#ffffff")
|
|
white_bg.paste(marsey, box, marsey)
|
|
white_bg.save(f"training-white/{basename}")
|
|
|
|
color_bg = Image.new(
|
|
"RGBA", (TRAINING_SIZE, TRAINING_SIZE), random.choice(backgrounds)
|
|
)
|
|
color_bg.paste(marsey, box, marsey)
|
|
color_bg.save(f"training-colors/{basename}")
|
|
|
|
|
|
# Creates a metadata.jsonl in the format of HuggingFace's `datasets` library.
|
|
def generate_metadata(training_dir):
|
|
for path in glob(f"{training_dir}/*.png"):
|
|
# "3 - Marsey walking.png" -> "Marsey walking"
|
|
stem = path.split("/")[-1].split(".")[0]
|
|
caption = stem.split("- ")[-1]
|
|
|
|
with open(f"{training_dir}/metadata.jsonl", "a") as f:
|
|
caption = caption.replace("Marsey", "Marsey the cat")
|
|
f.write(f'{{"file_name": "{stem}.png", "text": "{caption}"}}\n')
|
|
|
|
|
|
generate_training_set()
|
|
generate_metadata("training-colors")
|
|
generate_metadata("training-white")
|