训练 Diffusion LoRA 模型 - HanaTaka2137's Blog

总览#

数据来源: 番剧截图 & Gelbooru
算力平台: AutoDL

准备工作#

为了训练一个 Diffusion LoRA 模型, 一定量的图片素材做数据集必不可少.

我这里以白咲花为例, 图片素材来自番剧截图以及用爬虫从 Gelbooru 爬取的图片, 总计 1497 张.

Gelbooru 对爬虫比较友好, 注册一个账号即可以 json 格式批量爬取图片, 甚至可以获取图片标签方便训练.

参考脚本:

展开查看参考脚本

需要安装的库: rich

1
import asyncio
2
import functools
3
import itertools
4
from urllib.parse import urlparse
5

6
from httpx import AsyncClient
7
from rich.progress import Progress
8
from pathlib import Path
9

10

11
API_KEY = "YOUR API KEY" # 这里填你的 API Key
12
UID = "YOUR UID" # 这里填你的 UID
13
TAGS = "shirosaki_hana" # 这里填要爬的标签
14
SEMAPHORE = asyncio.Semaphore(100) # 限制并发数
15

16

17
def async_retry(delay: float = 1.0, backoff: float = 2.0, exceptions=(Exception,)):
18
    def decorator(func):
19
        @functools.wraps(func)
20
        async def wrapper(*args, **kwargs):
21
            wait = delay
22
            attempt = 0
23
            while True:
24
                try:
25
                    return await func(*args, **kwargs)
26
                except exceptions as e:
27
                    attempt += 1
28
                    print(
29
                        f"[Retry {attempt}] {func.__name__}({', '.join(itertools.chain(map(str, args), [f'{k}={v}' for k, v in kwargs.items()]))}) failed: {e}, retrying in {wait:.1f}s..."
30
                    )
31
                    await asyncio.sleep(wait)
32
                    wait *= backoff  # 退避
33

34
        return wrapper
35

36
    return decorator
37

38

39
def with_semaphore(sem: asyncio.Semaphore):
40
    def decorator(func):
41
        @functools.wraps(func)
42
        async def wrapper(*args, **kwargs):
43
            async with sem:
44
                return await func(*args, **kwargs)
45

46
        return wrapper
47

48
    return decorator
49

50

51
def get_url_suffix(url: str):
52
    path = urlparse(url).path
53
    return Path(path).suffix
54

55

56
async def main():
57
    progress = Progress()
58
    async with AsyncClient(trust_env=True) as client:
59
        # 第一页
60
        response = await client.get(
61
            f"https://gelbooru.com/index.php?page=dapi&s=post&q=index&json=1&tags={TAGS}&api_key={API_KEY}&user_id={UID}"
62
        )
63
        page_count = response.json()["@attributes"]["count"]
64

65
        progress.start()
66
        task = progress.add_task("[cyan]Downloading...", total=page_count)
67

68
        @async_retry()
69
        @with_semaphore(SEMAPHORE)
70
        async def download_task(img_url: str, hash: str, tags: list[str]):
71
            ext = get_url_suffix(img_url)
72
            img_response = await client.get(img_url)
73
            with open(f"data/{hash}{ext}", "wb") as f:
74
                f.write(img_response.content)
75
            with open(f"data/{hash}.txt", "w") as f:
76
                f.write(", ".join(tags))
77
            progress.update(task, advance=1)
78

79
        @async_retry()
80
        @with_semaphore(SEMAPHORE)
81
        async def download_page(pid: int):
82
            response = await client.get(
83
                f"https://gelbooru.com/index.php?page=dapi&s=post&q=index&pid={pid}&json=1&tags={TAGS}&api_key={API_KEY}&user_id={UID}"
84
            )
85
            tasks = [
86
                download_task(post["file_url"], post["md5"], post["tags"].split())
87
                for post in response.json()["post"]
88
            ]
89
            await asyncio.gather(*tasks)
90

91
        tasks = [download_page(pid) for pid, _ in enumerate(range(0, page_count, 100))]
92
        await asyncio.gather(*tasks)
93
        progress.stop()
94

95

96
if __name__ == "__main__":
97
    asyncio.run(main())

处理图片#

有了图片素材后, 要对图片进行去重与打标签.

去重#

我使用 Python 脚本进行简单的去重, 参考脚本如下:

展开查看参考脚本

需要安装的库: opencv-python scikit-image rich annoy numpy

1
from typing import Optional
2
import cv2
3
import shutil
4
from pathlib import Path
5
from skimage.metrics import structural_similarity as ssim
6
from rich.progress import Progress
7
from rich.console import Console
8
from annoy import AnnoyIndex
9
import numpy as np
10

11
RAW_DIR = Path("data/step-1-deduplication")
12
DUP_DIR = RAW_DIR / "duplicate"
13
INV_DIR = RAW_DIR / "invalid"
14

15
DUP_DIR.mkdir(exist_ok=True)
16
INV_DIR.mkdir(exist_ok=True)
17

18
IMG_EXTS = {".jpg", ".jpeg", ".png", ".bmp", ".gif", ".tiff", ".webp"}
19
SIMILARITY_THRESHOLD = 0.95  # 精确判断阈值（SSIM）
20
VECTOR_SIZE = 128  # ORB向量长度
21
NEAREST_NEIGHBORS = 5  # 查询最近邻数量
22

23
console = Console()
24

25

26
def is_valid_image(path: Path) -> bool:
27
    try:
28
        img = cv2.imread(str(path))
29
        return img is not None
30
    except Exception:
31
        return False
32

33

34
def load_gray_image(path: Path, size=256):
35
    img = cv2.imread(str(path), cv2.IMREAD_GRAYSCALE)
36
    if img is not None:
37
        img = cv2.resize(img, (size, size))
38
    return img
39

40

41
def compare_images(img1, img2):
42
    return ssim(img1, img2)
43

44

45
def move_with_txt(src: Path, dst_dir: Path, keep: Optional[Path] = None):
46
    dst_dir.mkdir(exist_ok=True)
47
    dst_path = dst_dir / src.name
48
    shutil.move(str(src), dst_path)
49

50
    src_txt = src.with_suffix(".txt")
51
    dst_txt = dst_path.with_suffix(".txt")
52

53
    if src_txt.exists():
54
        shutil.move(str(src_txt), dst_txt)
55
        # 如果有保留的文件但它没有标签文件，则复制一份
56
        if keep is not None:
57
            keep_txt = keep.with_suffix(".txt")
58
            if not keep_txt.exists():
59
                shutil.copy(dst_txt, keep_txt)
60
                console.log(f"[TXT-COPIED] {src_txt} → {keep_txt}")
61

62

63
def get_orb_vector(path: Path):
64
    img = load_gray_image(path)
65
    if img is None:
66
        return np.zeros(VECTOR_SIZE, dtype=np.float32)
67
    orb = cv2.ORB_create()
68
    kp, des = orb.detectAndCompute(img, None)
69
    if des is None:
70
        return np.zeros(VECTOR_SIZE, dtype=np.float32)
71
    vec = des.flatten()
72
    if len(vec) < VECTOR_SIZE:
73
        vec = np.pad(vec, (0, VECTOR_SIZE - len(vec)))
74
    else:
75
        vec = vec[:VECTOR_SIZE]
76
    return vec.astype(np.float32)
77

78

79
def main():
80
    files = [
81
        f for f in RAW_DIR.iterdir() if f.is_file() and f.suffix.lower() in IMG_EXTS
82
    ]
83
    vectors = []
84
    valid_files: list[Path] = []
85

86
    # 1. 构建特征向量
87
    with Progress(console=console) as progress:
88
        task = progress.add_task("[cyan]Extracting features...", total=len(files))
89
        for file in files:
90
            progress.update(task, description=f"[cyan]Processing {file.name}")
91
            if not is_valid_image(file):
92
                move_with_txt(file, INV_DIR)
93
                console.log(f"[INVALID] {file}")
94
                progress.advance(task)
95
                continue
96
            vec = get_orb_vector(file)
97
            vectors.append(vec)
98
            valid_files.append(file)
99
            progress.advance(task)
100

101
    if not valid_files:
102
        console.log("No valid images found.")
103
        return
104

105
    # 2. 构建 Annoy 索引
106
    dim = VECTOR_SIZE
107
    index = AnnoyIndex(dim, "euclidean")
108
    for i, vec in enumerate(vectors):
109
        index.add_item(i, vec)
110
    index.build(10)  # 可调
111
    processed_indices = set()
112
    duplicate_map = {}
113
    console.log(f"Annoy index built with {len(valid_files)} images.")
114

115
    with Progress(console=console) as progress:
116
        task = progress.add_task("[green]Finding duplicates...", total=len(valid_files))
117
        for i, file in enumerate(valid_files):
118
            if file in processed_indices or not file.exists():
119
                progress.advance(task)
120
                continue
121
            progress.update(task, description=f"[green]Checking {file.name}")
122
            if i in processed_indices:
123
                progress.advance(task)
124
                continue
125

126
            # 查询最近邻
127
            nearest = index.get_nns_by_item(i, NEAREST_NEIGHBORS)
128
            found_duplicate = False
129
            img_gray = load_gray_image(file)
130
            for j in nearest:
131
                if j == i or j in processed_indices:
132
                    continue
133
                other_file = valid_files[j]
134
                if not other_file.exists():
135
                    processed_indices.add(j)
136
                    continue
137
                other_gray = load_gray_image(other_file)
138
                score = compare_images(img_gray, other_gray)
139
                if score >= SIMILARITY_THRESHOLD:
140
                    # 保留体积更大的
141
                    if file.stat().st_size >= other_file.stat().st_size:
142
                        move_with_txt(other_file, DUP_DIR, keep=file)
143
                        console.log(
144
                            f"[DUPLICATE] {other_file} → {DUP_DIR}, kept {file} (SSIM={score:.3f})"
145
                        )
146
                        duplicate_map[DUP_DIR / other_file.name] = file
147
                        processed_indices.add(j)
148
                    else:
149
                        move_with_txt(file, DUP_DIR, keep=other_file)
150
                        console.log(
151
                            f"[DUPLICATE] {file} → {DUP_DIR}, kept {other_file} (SSIM={score:.3f})"
152
                        )
153
                        found_duplicate = True
154
                        break
155
            if not found_duplicate:
156
                processed_indices.add(i)
157
            progress.advance(task)
158
    for k, v in duplicate_map.items():
159
        console.log(f"{k} keep: {v}")
160
    print(duplicate_map)
161

162

163
if __name__ == "__main__":
164
    main()

使用时请根据实际情况修改配置.

格式转换#

如果图片并非 png 格式, 可以使用 Stable Diffusion WebUI 的后期处理功能, 取消勾选所有功能, 只填写输入输出目录, 跑一遍之后图片就全为 png 格式了.

这里无须提前裁剪图片, 在 lora-script 训练时会自动裁剪.

打标签#

Gelbooru 爬下来的图片自带标签, 所以只对番剧截图进行打标签.

由于习惯原因, 我使用 Stable Diffusion WebUI 进行打标签. 也可以选择较新的其他工具.

stable diffusion webui

在此页面填写输入与输出目录, 其余配置保持默认即可.

完成后你应该获得与图片同名的 txt 文件, 里面存储该图片的标签.

训练#

准备#

接下来就是最关键的训练了. 由于没有合适的显卡, 我使用 AutoDL 组一台示例做训练机器.

登录并注册 AutoDL, 在“控制台”页面选择“租用新实例”.

autodl console

显卡建议选 4090, 一张即可. 5090 由于 CUDA 太新镜像不支持, 选择多张 4090 时 lora-script 似乎无法识别, 故选择一张4090.

选择好配置后下滑到镜像页面, 选择“社区镜像”, 搜索 “lora-train” 选择第一个即可.

autodl images

这个镜像的具体使用教程可以看这个文章.

镜像创建完成后打开 JupyterLab 更新并运行项目, 等待依赖安装完成.

上传图片#

将图片上传到镜像合适的位置.

如果你的图片较多, 建议上传到 /autodl-tmp/ 目录下的文件夹. 不过记得之后配置环节要填写正确的图片文件夹.

图片目录应使用如下结构:

1
Folder/
2
    {count}_{concept_name}/
3
        image1.png
4
        image1.txt
5
        ...
6
    {count}_{concept_name2}/
7
        ...

即文件夹下的子文件夹命名为: 重复数_概念名, 概念名即角色画风等, 我这里只训练 “shirosaki_hana” 一个概念, 所以我的目录为:

1
/autodl-tmp/train/
2
    5_shirosaki_hana/
3
        ...

重复数一般用 5~8 即可 (当然也不是绝对的, 可以多次尝试寻找合适值).

配置#

使用刚才的文章提到的端口转发器, 填写配置后启动. 之后按照端口打开对应网页.

在 SD-Trainer 网页中即可配置 LoRA 训练参数.

我建议使用 sdxl 或 illustrious 做底模训练. 这里我使用自带的 sd_xl_base_1.0 做底模, illustrious 属于 sdxl 的变体, 这里选择此模型做底模可以最大化提高泛用型.

在 SD-Trainer 网页中选择 LoRA 训练一项, 加载完成后选择“专家”.

这里使用专家模式可以配置更多选项, 灵活性更高.

Lora-Trainer

分享一下我使用的配置, 可以作参考. 复制并粘贴到 toml 文件中, 在 Lora-Trainer 中选择导入配置文件即可使用我的配置.

1
model_train_type = "sdxl-lora"
2
pretrained_model_name_or_path = "./sd-models/sd_xl_base_1.0.safetensors"
3
train_data_dir = "../autodl-tmp/train"
4
prior_loss_weight = 1
5
resolution = "512,512"
6
enable_bucket = true
7
min_bucket_reso = 256
8
max_bucket_reso = 1024
9
bucket_reso_steps = 64
10
bucket_no_upscale = true
11
output_name = "shirosakihana-sdxl-v2"
12
output_dir = "./output"
13
save_model_as = "safetensors"
14
save_precision = "bf16"
15
save_every_n_epochs = 1
16
save_state = true
17
max_train_epochs = 10
18
train_batch_size = 4
19
gradient_checkpointing = false
20
network_train_unet_only = false
21
network_train_text_encoder_only = false
22
learning_rate = 0.0001
23
unet_lr = 0.0001
24
text_encoder_lr = 0.00001
25
lr_scheduler = "cosine_with_restarts"
26
lr_warmup_steps = 0
27
lr_scheduler_num_cycles = 1
28
optimizer_type = "AdamW8bit"
29
network_module = "networks.lora"
30
network_dim = 64
31
network_alpha = 64
32
randomly_choice_prompt = false
33
positive_prompts = "(masterpiece, best quality:1.2), 1girl, arms behind back, bangs, black hair, blue dress, blush, bow, closed mouth, dress, eyebrows visible through hair, flower, hair between eyes, hair flower, hair ornament, long hair, long sleeves, looking at viewer, pink flower, red bow, sailor collar, sailor dress, school uniform, shirosaki hana, shirt, simple background, sleeveless, sleeveless dress, smile, solo, very long hair, white background, white sailor collar, white shirt, watashi ni tenshi ga maiorita!"
34
negative_prompts = "lowres, bad anatomy, bad hands, text, error, missing fingers, extra digit, fewer digits, cropped, worst quality, low quality, normal quality, jpeg artifacts,signature, watermark, username, blurry"
35
sample_width = 512
36
sample_height = 512
37
sample_cfg = 7
38
sample_seed = 114514
39
sample_steps = 24
40
sample_sampler = "euler_a"
41
sample_every_n_epochs = 1
42
log_with = "tensorboard"
43
logging_dir = "./logs"
44
caption_extension = ".txt"
45
shuffle_caption = false
46
keep_tokens = 0
47
max_token_length = 255
48
seed = 1337
49
mixed_precision = "bf16"
50
xformers = true
51
lowram = false
52
cache_latents = true
53
cache_latents_to_disk = true
54
persistent_data_loader_workers = true