Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

Config

Code Block
{

}


Model info

Diffusers/Tongyi-MAI/Z-Image-Turbo [8dc64d5281]
ModuleClassDeviceDtypeQuantParamsModulesConfig
vaeAutoencoderKLxpu:0torch.bfloat16None83819683241

FrozenDict({'in_channels': 3, 'out_channels': 3, 'down_block_types': ['DownEncoderBlock2D', 'DownEncoderBlock2D', 'DownEncoderBlock2D', 'DownEncoderBlock2D'], 'up_block_types': ['UpDecoderBlock2D', 'UpDecoderBlock2D', 'UpDecoderBlock2D', 'UpDecoderBlock2D'], 'block_out_channels': [128, 256, 512, 512], 'layers_per_block': 2, 'act_fn': 'silu', 'latent_channels': 16, 'norm_num_groups': 32, 'sample_size': 1024, 'scaling_factor': 0.3611, 'shift_factor': 0.1159, 'latents_mean': None, 'latents_std': None, 'force_upcast': True, 'use_quant_conv': False, 'use_post_quant_conv': False, 'mid_block_add_attention': True, '_class_name': 'AutoencoderKL', '_diffusers_version': '0.36.0.dev0', '_name_or_path': '/mnt/models/Diffusers/models--Tongyi-MAI--Z-Image-Turbo/snapshots/8dc64d5281ef263238d1b12eb617b4bf1ed3ff2f/vae'})

text_encoderQwen3Modelxpu:0torch.bfloat16None4022468096545

Qwen3Config { "architectures": [ "Qwen3ForCausalLM" ], "attention_bias": false, "attention_dropout": 0.0, "bos_token_id": 151643, "dtype": "bfloat16", "eos_token_id": 151645, "head_dim": 128, "hidden_act": "silu", "hidden_size": 2560, "initializer_range": 0.02, "intermediate_size": 9728, "layer_types": [ "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention" ], "max_position_embeddings": 40960, "max_window_layers": 36, "model_type": "qwen3", "num_attention_heads": 32, "num_hidden_layers": 36, "num_key_value_heads": 8, "rms_norm_eps": 1e-06, "rope_scaling": null, "rope_theta": 1000000, "sliding_window": null, "tie_word_embeddings": true, "transformers_version": "4.57.1", "use_cache": true, "use_sliding_window": false, "vocab_size": 151936 }

tokenizerQwen2TokenizerNoneNoneNone00

None

schedulerFlowMatchEulerDiscreteSchedulerNoneNoneNone00

FrozenDict({'num_train_timesteps': 1000, 'shift': 3.0, 'use_dynamic_shifting': False, 'base_shift': 0.5, 'max_shift': 1.15, 'base_image_seq_len': 256, 'max_image_seq_len': 4096, 'invert_sigmas': False, 'shift_terminal': None, 'use_karras_sigmas': False, 'use_exponential_sigmas': False, 'use_beta_sigmas': False, 'time_shift_type': 'exponential', 'stochastic_sampling': False, '_use_default_values': ['base_shift', 'max_shift', 'use_karras_sigmas', 'shift_terminal', 'base_image_seq_len', 'invert_sigmas', 'max_image_seq_len', 'use_exponential_sigmas', 'time_shift_type', 'use_beta_sigmas', 'stochastic_sampling'], '_class_name': 'FlowMatchEulerDiscreteScheduler', '_diffusers_version': '0.36.0.dev0'})

transformerZImageTransformer2DModelxpu:0torch.bfloat16None6154908736697

FrozenDict({'all_patch_size': [2], 'all_f_patch_size': [1], 'in_channels': 16, 'dim': 3840, 'n_layers': 30, 'n_refiner_layers': 2, 'n_heads': 30, 'n_kv_heads': 30, 'norm_eps': 1e-05, 'qk_norm': True, 'cap_feat_dim': 2560, 'rope_theta': 256.0, 't_scale': 1000.0, 'axes_dims': [32, 48, 48], 'axes_lens': [1536, 512, 512], '_class_name': 'ZImageTransformer2DModel', '_diffusers_version': '0.36.0.dev0', '_name_or_path': '/mnt/models/Diffusers/models--Tongyi-MAI--Z-Image-Turbo/snapshots/8dc64d5281ef263238d1b12eb617b4bf1ed3ff2f/transformer'})