1from os import path
2from pathlib import Path
3from uuid import uuid4
4
5from denoising_diffusion_pytorch import GaussianDiffusion, Trainer, Unet
6from mp_time_split.core import MPTimeSplit
7
8from xtal2png.core import XtalConverter
9
10# import numpy as np
11
12
13mpt = MPTimeSplit()
14mpt.load()
15
16fold = 0
17train_inputs, val_inputs, train_outputs, val_outputs = mpt.get_train_and_val_data(fold)
18# train_idx = np.random.permutation(len(train_inputs))
19# train_inputs = train_inputs.iloc[train_idx]
20# train_outputs = train_outputs.iloc[train_idx]
21
22channels = 1
23data_path = path.join("data", "preprocessed", "mp-time-split", f"fold={fold}")
24xc = XtalConverter(
25 save_dir=data_path,
26 encode_as_primitive=True,
27 decode_as_primitive=True,
28 channels=channels,
29)
30xc.xtal2png(train_inputs.tolist())
31
32model = Unet(dim=64, dim_mults=(1, 2, 4, 8), channels=channels).cuda()
33
34diffusion = GaussianDiffusion(
35 model, channels=channels, image_size=64, timesteps=1000, loss_type="l1"
36).cuda()
37
38train_batch_size = 32
39print("train_batch_size: ", train_batch_size)
40
41results_folder = path.join(
42 "data", "interim", "denoising_diffusion_pytorch", f"fold={fold}", str(uuid4())[0:4]
43)
44Path(results_folder).mkdir(exist_ok=True, parents=True)
45
46trainer = Trainer(
47 diffusion,
48 data_path,
49 image_size=64,
50 train_batch_size=train_batch_size,
51 train_lr=2e-5,
52 train_num_steps=700000, # total training steps
53 gradient_accumulate_every=2, # gradient accumulation steps
54 ema_decay=0.995, # exponential moving average decay
55 amp=True, # turn on mixed precision
56 augment_horizontal_flip=False,
57 results_folder=results_folder,
58)
59
60trainer.train()
61
62sampled_images = diffusion.sample(batch_size=100)
63
641 + 1