ml-finetuning/stable-diffusion/preprocess_training_data.py

115 lines
3.3 KiB
Python

import random
import os
from glob import glob
from PIL import Image
TRAINING_SIZE = 768
MARSEY_SIZE = TRAINING_SIZE * 0.65 # Resize the Marsey to this width/height.
# Apply the alpha channel from marseys/?????.png to upscaled/?????_Swin2SR.png
def remask(upscaled_path):
basename = upscaled_path.split("/")[-1]
# Strip the _Swin2SR suffix.
original_basename = basename.split("_Swin2SR")[0] + ".png"
original = Image.open(f"marseys/{original_basename}")
upscaled = Image.open(upscaled_path)
try:
mask = original.getchannel("A").resize((upscaled.width, upscaled.height))
upscaled.putalpha(mask)
return (original_basename, upscaled)
except ValueError as e: # Usually means it's animated.
print(e, upscaled_path)
# Remasks the images in upscaled/ with alpha masks from marseys/, resizes the result,
# and saves a copy with both a random background color and a white background.
def generate_training_set():
backgrounds = [
"#fabfb7",
"#ffda9e",
"#c5c6c8",
"#b2e2f2",
"#b0c2f2",
"#b0f2c2",
"#fdcae1",
"#8f7193",
"#77DD77",
"#836953",
"#89cff0",
"#99c5c4",
"#9adedb",
"#aa9499",
"#aaf0d1",
"#b2fba5",
"#b39eb5",
"#bdb0d0",
"#bee7a5",
"#befd73",
"#c1c6fc",
"#c6a4a4",
"#cb99c9",
"#ff6961",
"#ff694f",
"#ff9899",
"#ffb7ce",
"#ca9bf7",
]
if not os.path.exists("training-white"):
os.mkdir("training-white")
if not os.path.exists("training-colors"):
os.mkdir("training-colors")
remasked_images = [remask(path) for path in glob("upscaled/*.png")]
remasked_images = [i for i in remasked_images if i is not None]
for basename, marsey in remasked_images:
max_dim = max(marsey.size)
scale_factor = 1
if max_dim > MARSEY_SIZE:
scale_factor = MARSEY_SIZE / max_dim
marsey = marsey.resize(
(
round(marsey.size[0] * scale_factor),
round(marsey.size[1] * scale_factor),
),
resample=Image.Resampling.LANCZOS,
)
box = (
TRAINING_SIZE // 2 - marsey.size[0] // 2,
TRAINING_SIZE // 2 - marsey.size[1] // 2,
)
white_bg = Image.new("RGBA", (TRAINING_SIZE, TRAINING_SIZE), "#ffffff")
white_bg.paste(marsey, box, marsey)
white_bg.save(f"training-white/{basename}")
color_bg = Image.new(
"RGBA", (TRAINING_SIZE, TRAINING_SIZE), random.choice(backgrounds)
)
color_bg.paste(marsey, box, marsey)
color_bg.save(f"training-colors/{basename}")
# Creates a metadata.jsonl in the format of HuggingFace's `datasets` library.
def generate_metadata(training_dir):
for path in glob(f"{training_dir}/*.png"):
# "3 - Marsey walking.png" -> "Marsey walking"
stem = path.split("/")[-1].split(".")[0]
caption = stem.split("- ")[-1]
with open(f"{training_dir}/metadata.jsonl", "a") as f:
caption = caption.replace("Marsey", "Marsey the cat")
f.write(f'{{"file_name": "{stem}.png", "text": "{caption}"}}\n')
generate_training_set()
generate_metadata("training-colors")
generate_metadata("training-white")