总览
准备工作
为了训练一个 Diffusion LoRA 模型, 一定量的图片素材做数据集必不可少.
我这里以白咲花为例, 图片素材来自番剧截图以及用爬虫从 Gelbooru 爬取的图片, 总计 1497 张.
Gelbooru 对爬虫比较友好, 注册一个账号即可以 json 格式批量爬取图片, 甚至可以获取图片标签方便训练.
参考脚本:
展开查看参考脚本
需要安装的库: rich
import asyncioimport functoolsimport itertoolsfrom urllib.parse import urlparse
from httpx import AsyncClientfrom rich.progress import Progressfrom pathlib import Path
API_KEY = "YOUR API KEY"UID = "YOUR UID"TAGS = "shirosaki_hana"SEMAPHORE = asyncio.Semaphore(100)
def async_retry(delay: float = 1.0, backoff: float = 2.0, exceptions=(Exception,)): def decorator(func): @functools.wraps(func) async def wrapper(*args, **kwargs): wait = delay attempt = 0 while True: try: return await func(*args, **kwargs) except exceptions as e: attempt += 1 print( f"[Retry {attempt}] {func.__name__}({', '.join(itertools.chain(map(str, args), [f'{k}={v}' for k, v in kwargs.items()]))}) failed: {e}, retrying in {wait:.1f}s..." ) await asyncio.sleep(wait) wait *= backoff # 退避
return wrapper
return decorator
def with_semaphore(sem: asyncio.Semaphore): def decorator(func): @functools.wraps(func) async def wrapper(*args, **kwargs): async with sem: return await func(*args, **kwargs)
return wrapper
return decorator
def get_url_suffix(url: str): path = urlparse(url).path return Path(path).suffix
async def main(): progress = Progress() async with AsyncClient(trust_env=True) as client: # 第一页 response = await client.get( f"https://gelbooru.com/index.php?page=dapi&s=post&q=index&json=1&tags={TAGS}&api_key={API_KEY}&user_id={UID}" ) page_count = response.json()["@attributes"]["count"]
progress.start() task = progress.add_task("[cyan]Downloading...", total=page_count)
@async_retry() @with_semaphore(SEMAPHORE) async def download_task(img_url: str, hash: str, tags: list[str]): ext = get_url_suffix(img_url) img_response = await client.get(img_url) with open(f"data/{hash}{ext}", "wb") as f: f.write(img_response.content) with open(f"data/{hash}.txt", "w") as f: f.write(", ".join(tags)) progress.update(task, advance=1)
@async_retry() @with_semaphore(SEMAPHORE) async def download_page(pid: int): response = await client.get( f"https://gelbooru.com/index.php?page=dapi&s=post&q=index&pid={pid}&json=1&tags={TAGS}&api_key={API_KEY}&user_id={UID}" ) tasks = [ download_task(post["file_url"], post["md5"], post["tags"].split()) for post in response.json()["post"] ] await asyncio.gather(*tasks)
tasks = [download_page(pid) for pid, _ in enumerate(range(0, page_count, 100))] await asyncio.gather(*tasks) progress.stop()
if __name__ == "__main__": asyncio.run(main())处理图片
有了图片素材后, 要对图片进行去重与打标签.
去重
我使用 Python 脚本进行简单的去重, 参考脚本如下:
展开查看参考脚本
需要安装的库: opencv-python scikit-image rich annoy numpy
from typing import Optionalimport cv2import shutilfrom pathlib import Pathfrom skimage.metrics import structural_similarity as ssimfrom rich.progress import Progressfrom rich.console import Consolefrom annoy import AnnoyIndeximport numpy as np
RAW_DIR = Path("data/step-1-deduplication")DUP_DIR = RAW_DIR / "duplicate"INV_DIR = RAW_DIR / "invalid"
DUP_DIR.mkdir(exist_ok=True)INV_DIR.mkdir(exist_ok=True)
IMG_EXTS = {".jpg", ".jpeg", ".png", ".bmp", ".gif", ".tiff", ".webp"}SIMILARITY_THRESHOLD = 0.95 # 精确判断阈值(SSIM)VECTOR_SIZE = 128 # ORB向量长度NEAREST_NEIGHBORS = 5 # 查询最近邻数量
console = Console()
def is_valid_image(path: Path) -> bool: try: img = cv2.imread(str(path)) return img is not None except Exception: return False
def load_gray_image(path: Path, size=256): img = cv2.imread(str(path), cv2.IMREAD_GRAYSCALE) if img is not None: img = cv2.resize(img, (size, size)) return img
def compare_images(img1, img2): return ssim(img1, img2)
def move_with_txt(src: Path, dst_dir: Path, keep: Optional[Path] = None): dst_dir.mkdir(exist_ok=True) dst_path = dst_dir / src.name shutil.move(str(src), dst_path)
src_txt = src.with_suffix(".txt") dst_txt = dst_path.with_suffix(".txt")
if src_txt.exists(): shutil.move(str(src_txt), dst_txt) # 如果有保留的文件但它没有标签文件,则复制一份 if keep is not None: keep_txt = keep.with_suffix(".txt") if not keep_txt.exists(): shutil.copy(dst_txt, keep_txt) console.log(f"[TXT-COPIED] {src_txt} → {keep_txt}")
def get_orb_vector(path: Path): img = load_gray_image(path) if img is None: return np.zeros(VECTOR_SIZE, dtype=np.float32) orb = cv2.ORB_create() kp, des = orb.detectAndCompute(img, None) if des is None: return np.zeros(VECTOR_SIZE, dtype=np.float32) vec = des.flatten() if len(vec) < VECTOR_SIZE: vec = np.pad(vec, (0, VECTOR_SIZE - len(vec))) else: vec = vec[:VECTOR_SIZE] return vec.astype(np.float32)
def main(): files = [ f for f in RAW_DIR.iterdir() if f.is_file() and f.suffix.lower() in IMG_EXTS ] vectors = [] valid_files: list[Path] = []
# 1. 构建特征向量 with Progress(console=console) as progress: task = progress.add_task("[cyan]Extracting features...", total=len(files)) for file in files: progress.update(task, description=f"[cyan]Processing {file.name}") if not is_valid_image(file): move_with_txt(file, INV_DIR) console.log(f"[INVALID] {file}") progress.advance(task) continue vec = get_orb_vector(file) vectors.append(vec) valid_files.append(file) progress.advance(task)
if not valid_files: console.log("No valid images found.") return
# 2. 构建 Annoy 索引 dim = VECTOR_SIZE index = AnnoyIndex(dim, "euclidean") for i, vec in enumerate(vectors): index.add_item(i, vec) index.build(10) # 可调 processed_indices = set() duplicate_map = {} console.log(f"Annoy index built with {len(valid_files)} images.")
with Progress(console=console) as progress: task = progress.add_task("[green]Finding duplicates...", total=len(valid_files)) for i, file in enumerate(valid_files): if file in processed_indices or not file.exists(): progress.advance(task) continue progress.update(task, description=f"[green]Checking {file.name}") if i in processed_indices: progress.advance(task) continue
# 查询最近邻 nearest = index.get_nns_by_item(i, NEAREST_NEIGHBORS) found_duplicate = False img_gray = load_gray_image(file) for j in nearest: if j == i or j in processed_indices: continue other_file = valid_files[j] if not other_file.exists(): processed_indices.add(j) continue other_gray = load_gray_image(other_file) score = compare_images(img_gray, other_gray) if score >= SIMILARITY_THRESHOLD: # 保留体积更大的 if file.stat().st_size >= other_file.stat().st_size: move_with_txt(other_file, DUP_DIR, keep=file) console.log( f"[DUPLICATE] {other_file} → {DUP_DIR}, kept {file} (SSIM={score:.3f})" ) duplicate_map[DUP_DIR / other_file.name] = file processed_indices.add(j) else: move_with_txt(file, DUP_DIR, keep=other_file) console.log( f"[DUPLICATE] {file} → {DUP_DIR}, kept {other_file} (SSIM={score:.3f})" ) found_duplicate = True break if not found_duplicate: processed_indices.add(i) progress.advance(task) for k, v in duplicate_map.items(): console.log(f"{k} keep: {v}") print(duplicate_map)
if __name__ == "__main__": main()使用时请根据实际情况修改配置.
格式转换
如果图片并非 png 格式, 可以使用 Stable Diffusion WebUI 的后期处理功能, 取消勾选所有功能, 只填写输入输出目录, 跑一遍之后图片就全为 png 格式了.
这里无须提前裁剪图片, 在 lora-script 训练时会自动裁剪.
打标签
Gelbooru 爬下来的图片自带标签, 所以只对番剧截图进行打标签.
由于习惯原因, 我使用 Stable Diffusion WebUI 进行打标签. 也可以选择较新的其他工具.

在此页面填写输入与输出目录, 其余配置保持默认即可.
完成后你应该获得与图片同名的 txt 文件, 里面存储该图片的标签.
训练
准备
接下来就是最关键的训练了. 由于没有合适的显卡, 我使用 AutoDL 组一台示例做训练机器.
登录并注册 AutoDL, 在“控制台”页面选择“租用新实例”.

显卡建议选 4090, 一张即可. 5090 由于 CUDA 太新镜像不支持, 选择多张 4090 时 lora-script 似乎无法识别, 故选择一张4090.
选择好配置后下滑到镜像页面, 选择“社区镜像”, 搜索 “lora-train” 选择第一个即可.

这个镜像的具体使用教程可以看 这个文章.
镜像创建完成后打开 JupyterLab 更新并运行项目, 等待依赖安装完成.
上传图片
将图片上传到镜像合适的位置.
如果你的图片较多, 建议上传到 /autodl-tmp/ 目录下的文件夹. 不过记得之后配置环节要填写正确的图片文件夹.
图片目录应使用如下结构:
Folder/ {count}_{concept_name}/ image1.png image1.txt ... {count}_{concept_name2}/ ...即文件夹下的子文件夹命名为: 重复数_概念名, 概念名即角色画风等, 我这里只训练 “shirosaki_hana” 一个概念, 所以我的目录为:
/autodl-tmp/train/ 5_shirosaki_hana/ ...重复数一般用 5~8 即可 (当然也不是绝对的, 可以多次尝试寻找合适值).
配置
使用刚才的文章提到的端口转发器, 填写配置后启动. 之后按照端口打开对应网页.
在 SD-Trainer 网页中即可配置 LoRA 训练参数.
我建议使用 sdxl 或 illustrious 做底模训练. 这里我使用自带的 sd_xl_base_1.0 做底模, illustrious 属于 sdxl 的变体, 这里选择此模型做底模可以最大化提高泛用型.
在 SD-Trainer 网页中选择 LoRA 训练一项, 加载完成后选择“专家”.
这里使用专家模式可以配置更多选项, 灵活性更高.

分享一下我使用的配置, 可以作参考. 复制并粘贴到 toml 文件中, 在 Lora-Trainer 中选择导入配置文件即可使用我的配置.
model_train_type = "sdxl-lora"pretrained_model_name_or_path = "./sd-models/sd_xl_base_1.0.safetensors"train_data_dir = "../autodl-tmp/train"prior_loss_weight = 1resolution = "512,512"enable_bucket = truemin_bucket_reso = 256max_bucket_reso = 1024bucket_reso_steps = 64bucket_no_upscale = trueoutput_name = "shirosakihana-sdxl-v2"output_dir = "./output"save_model_as = "safetensors"save_precision = "bf16"save_every_n_epochs = 1save_state = truemax_train_epochs = 10train_batch_size = 4gradient_checkpointing = falsenetwork_train_unet_only = falsenetwork_train_text_encoder_only = falselearning_rate = 0.0001unet_lr = 0.0001text_encoder_lr = 0.00001lr_scheduler = "cosine_with_restarts"lr_warmup_steps = 0lr_scheduler_num_cycles = 1optimizer_type = "AdamW8bit"network_module = "networks.lora"network_dim = 64network_alpha = 64randomly_choice_prompt = falsepositive_prompts = "(masterpiece, best quality:1.2), 1girl, arms behind back, bangs, black hair, blue dress, blush, bow, closed mouth, dress, eyebrows visible through hair, flower, hair between eyes, hair flower, hair ornament, long hair, long sleeves, looking at viewer, pink flower, red bow, sailor collar, sailor dress, school uniform, shirosaki hana, shirt, simple background, sleeveless, sleeveless dress, smile, solo, very long hair, white background, white sailor collar, white shirt, watashi ni tenshi ga maiorita!"negative_prompts = "lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts,signature, watermark, username, blurry"sample_width = 512sample_height = 512sample_cfg = 7sample_seed = 114514sample_steps = 24sample_sampler = "euler_a"sample_every_n_epochs = 1log_with = "tensorboard"logging_dir = "./logs"caption_extension = ".txt"shuffle_caption = falsekeep_tokens = 0max_token_length = 255seed = 1337mixed_precision = "bf16"xformers = truelowram = falsecache_latents = truecache_latents_to_disk = truepersistent_data_loader_workers = true模型名, 训练数据目录和生成预览图的标签一定要改成自己的, 别完全照抄.
配置完成后点击“开始训练”等待训练完成即可, 期间可以在 JupyterLab 中查看 output 目录下训练过程中生成的预览图查看训练效果. 也可以在 Lora-Trainer 中查看 Tensorboard 观察训练过程.
使用
训练完成后下载训练结果, 在 ComfyUI 中生成几张图片测试效果.
由于训练的是 sdxl LoRA, 底模可以选择 sdxl 或 illustrious.
这里推荐 “JANKU v5” 和 “waiNSFW” 这两个 illustrious 模型做底模.
展示
下列图片下载后拖入 ComfyUI 即可获取完整工作流.
