{ "models": { "denoiser": { "name": "ElasticSLatFlowModel", "args": { "resolution": 64, "in_channels": 8, "out_channels": 8, "model_channels": 1280, "cond_channels": 768, "num_blocks": 28, "num_heads": 16, "mlp_ratio": 4, "patch_size": 2, "num_io_res_blocks": 3, "io_block_channels": [256], "pe_mode": "ape", "qk_rms_norm": true, "use_fp16": true } } }, "dataset": { "name": "TextConditionedSLat", "args": { "latent_model": "dinov2_vitl14_reg_slat_enc_swin8_B_64l8_fp16", "min_aesthetic_score": 4.5, "max_num_voxels": 32768, "normalization": { "mean": [ -2.1687545776367188, -0.004347046371549368, -0.13352349400520325, -0.08418072760105133, -0.5271206498146057, 0.7238689064979553, -1.1414450407028198, 1.2039363384246826 ], "std": [ 2.377650737762451, 2.386378288269043, 2.124418020248413, 2.1748552322387695, 2.663944721221924, 2.371192216873169, 2.6217446327209473, 2.684523105621338 ] }, "pretrained_slat_dec": "microsoft/TRELLIS-image-large/ckpts/slat_dec_gs_swin8_B_64l8gs32_fp16" } }, "trainer": { "name": "TextConditionedSparseFlowMatchingCFGTrainer", "args": { "max_steps": 1000000, "batch_size_per_gpu": 4, "batch_split": 4, "optimizer": { "name": "AdamW", "args": { "lr": 0.0001, "weight_decay": 0.0 } }, "ema_rate": [ 0.9999 ], "fp16_mode": "inflat_all", "fp16_scale_growth": 0.001, "elastic": { "name": "LinearMemoryController", "args": { "target_ratio": 0.75, "max_mem_ratio_start": 0.5 } }, "grad_clip": { "name": "AdaptiveGradClipper", "args": { "max_norm": 1.0, "clip_percentile": 95 } }, "i_log": 500, "i_sample": 10000, "i_save": 10000, "p_uncond": 0.1, "t_schedule": { "name": "logitNormal", "args": { "mean": 1.0, "std": 1.0 } }, "sigma_min": 1e-5, "text_cond_model": "openai/clip-vit-large-patch14" } } }