Versions Compared

Key

  • This line was added.
  • This line was removed.
  • Formatting was changed.

...

hunyuanvideo-community/HunyuanImage-2.1-Distilled-Diffusers [2effeb8511] 

ModuleClassDeviceDtypeQuantParamsModulesConfig
vaeAutoencoderKLHunyuanImagexpu:0torch.bfloat16None405575491255

FrozenDict({'in_channels': 3, 'out_channels': 3, 'latent_channels': 64, 'block_out_channels': [128, 256, 512, 512, 1024, 1024], 'layers_per_block': 2, 'spatial_compression_ratio': 32, 'sample_size': 384, 'scaling_factor': 0.75289, 'downsample_match_channel': True, 'upsample_match_channel': True, '_class_name': 'AutoencoderKLHunyuanImage', '_diffusers_version': '0.36.0.dev0', '_name_or_path': '/mnt/models/Diffusers/models--hunyuanvideo-community--HunyuanImage-2.1-Distilled-Diffusers/snapshots/2effeb8511aee5b2ed94984d30c630203404173b/vae'})

text_encoderQwen2_5_VLForConditionalGenerationxpu:0torch.bfloat16None8292166656763

Qwen2_5_VLConfig { "architectures": [ "Qwen2_5_VLForConditionalGeneration" ], "attention_dropout": 0.0, "bos_token_id": 151643, "dtype": "bfloat16", "eos_token_id": 151645, "hidden_act": "silu", "hidden_size": 3584, "initializer_range": 0.02, "intermediate_size": 18944, "max_position_embeddings": 128000, "max_window_layers": 28, "model_type": "qwen2_5_vl", "num_attention_heads": 28, "num_hidden_layers": 28, "num_key_value_heads": 4, "rms_norm_eps": 1e-06, "rope_scaling": { "mrope_section": [ 16, 24, 24 ], "rope_type": "default", "type": "default" }, "rope_theta": 1000000.0, "sliding_window": 32768, "text_config": { "_name_or_path": "hunyuanvideo-community/HunyuanImage-2.1-Diffusers", "architectures": [ "Qwen2_5_VLForConditionalGeneration" ], "attention_dropout": 0.0, "bos_token_id": 151643, "dtype": "bfloat16", "eos_token_id": 151645, "hidden_act": "silu", "hidden_size": 3584, "image_token_id": 151655, "initializer_range": 0.02, "intermediate_size": 18944, "layer_types": [ "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention" ], "max_position_embeddings": 128000, "max_window_layers": 28, "model_type": "qwen2_5_vl_text", "num_attention_heads": 28, "num_hidden_layers": 28, "num_key_value_heads": 4, "rms_norm_eps": 1e-06, "rope_scaling": { "mrope_section": [ 16, 24, 24 ], "rope_type": "default", "type": "default" }, "rope_theta": 1000000.0, "sliding_window": null, "use_cache": true, "use_sliding_window": false, "video_token_id": 151656, "vision_end_token_id": 151653, "vision_start_token_id": 151652, "vision_token_id": 151654, "vocab_size": 152064 }, "tie_word_embeddings": false, "transformers_version": "4.57.1", "use_cache": true, "use_sliding_window": false, "vision_config": { "depth": 32, "dtype": "bfloat16", "fullatt_block_indexes": [ 7, 15, 23, 31 ], "hidden_act": "silu", "hidden_size": 1280, "in_channels": 3, "in_chans": 3, "initializer_range": 0.02, "intermediate_size": 3420, "model_type": "qwen2_5_vl", "num_heads": 16, "out_hidden_size": 3584, "patch_size": 14, "spatial_merge_size": 2, "spatial_patch_size": 14, "temporal_patch_size": 2, "tokens_per_second": 2, "window_size": 112 }, "vision_token_id": 151654, "vocab_size": 152064 }

tokenizerQwen2TokenizerNoneNoneNone00

None

text_encoder_2T5EncoderModelxpu:0torch.bfloat16None219314944235

T5Config { "architectures": [ "T5EncoderModel" ], "classifier_dropout": 0.0, "d_ff": 3584, "d_kv": 64, "d_model": 1472, "decoder_start_token_id": 0, "dense_act_fn": "gelu_new", "dropout_rate": 0.1, "dtype": "bfloat16", "eos_token_id": 1, "feed_forward_proj": "gated-gelu", "gradient_checkpointing": false, "initializer_factor": 1.0, "is_encoder_decoder": false, "is_gated_act": true, "layer_norm_epsilon": 1e-06, "model_type": "t5", "num_decoder_layers": 4, "num_heads": 6, "num_layers": 12, "pad_token_id": 0, "relative_attention_max_distance": 128, "relative_attention_num_buckets": 32, "tie_word_embeddings": false, "tokenizer_class": "ByT5Tokenizer", "transformers_version": "4.57.1", "use_cache": false, "vocab_size": 1510 }

tokenizer_2ByT5TokenizerNoneNoneNone00

None

transformerHunyuanImageTransformer2DModelxpu:0torch.bfloat16None174533349761406

FrozenDict({'in_channels': 64, 'out_channels': 64, 'num_attention_heads': 28, 'attention_head_dim': 128, 'num_layers': 20, 'num_single_layers': 40, 'num_refiner_layers': 2, 'mlp_ratio': 4.0, 'patch_size': [1, 1], 'qk_norm': 'rms_norm', 'guidance_embeds': True, 'text_embed_dim': 3584, 'text_embed_2_dim': 1472, 'rope_theta': 256.0, 'rope_axes_dim': [64, 64], 'use_meanflow': True, '_class_name': 'HunyuanImageTransformer2DModel', '_diffusers_version': '0.36.0.dev0', '_name_or_path': 'hunyuanvideo-community/HunyuanImage-2.1-Distilled-Diffusers'})

schedulerFlowMatchEulerDiscreteSchedulerNoneNoneNone00

FrozenDict({'num_train_timesteps': 1000, 'shift': 4.0, 'use_dynamic_shifting': False, 'base_shift': 0.5, 'max_shift': 1.15, 'base_image_seq_len': 256, 'max_image_seq_len': 4096, 'invert_sigmas': False, 'shift_terminal': None, 'use_karras_sigmas': False, 'use_exponential_sigmas': False, 'use_beta_sigmas': False, 'time_shift_type': 'exponential', 'stochastic_sampling': False, '_class_name': 'FlowMatchEulerDiscreteScheduler', '_diffusers_version': '0.36.0.dev0'})

guiderNoneTypeNoneNoneNone00

None

ocr_guiderNoneTypeNoneNoneNone00

None

...