...

Prompt: photorealistic girl in bookshop choosing the book in romantic stories shelf. smiling

4

8

16

32

64

CFG1

Image Removed

CFG2

Image Removed

CFG3

Image Removed

CFG4

Image Removed

CFG5

Image Removed

CFG6

Image Removed

CFG8

CFG6Image Added

Image RemovedImage Added

CFG8

Image Added

Image RemovedImage Added

Test 2 - Face and hand

...

Code Block

  "huggingface_token": "hf_..FraU",
  "diffusers_version": "7536f647e4144c7acaf9e140893ff7edb85bf9a3",
  "sd_model_checkpoint": "hunyuanvideo-community/HunyuanImage-2.1-Diffusers",
  "sd_checkpoint_hash": null,
  "diffusers_to_gpu": true,
  "device_map": "gpu",
  "model_wan_stage": "combined",
  "diffusers_offload_mode": "none",
  "ui_request_timeout": 300000,
  "show_progress_type": "Simple"

Model info

Diffusers/lodestones/Chroma1-HD [ca9e916ceb]video-community/HunyuanImage-2.1-Diffusers

Module	Class	Device	Dtype	Quant	Params	Modules	Config
vae	AutoencoderKLHunyuanImage	cpu	torch.bfloat16	None405575491	405575491	255	FrozenDict({'in_channels': 3, 'out_channels': 3, 'latent_channels': 64, 'block_out_channels': [128, 256, 512, 512, 1024, 1024], 'layers_per_block': 2, 'spatial_compression_ratio': 32, 'sample_size': 384, 'scaling_factor': 0.75289, 'downsample_match_channel': True, 'upsample_match_channel': True, '_class_name': 'AutoencoderKLHunyuanImage', '_diffusers_version': '0.36.0.dev0', '_name_or_path': '/mnt/models/Diffusers/models--hunyuanvideo-community--HunyuanImage-2.1-Diffusers/snapshots/7e7b7a177de58591aeaffca0929f4765003d7ced/vae'})
text_encoder	Qwen2_5_VLForConditionalGeneration	xpu:0	torch.bfloat16	None8292166656	8292166656	763	Qwen2_5_VLConfig { "architectures": [ "Qwen2_5_VLForConditionalGeneration" ], "attention_dropout": 0.0, "bos_token_id": 151643, "dtype": "bfloat16", "eos_token_id": 151645, "hidden_act": "silu", "hidden_size": 3584, "initializer_range": 0.02, "intermediate_size": 18944, "max_position_embeddings": 128000, "max_window_layers": 28, "model_type": "qwen2_5_vl", "num_attention_heads": 28, "num_hidden_layers": 28, "num_key_value_heads": 4, "rms_norm_eps": 1e-06, "rope_scaling": { "mrope_section": [ 16, 24, 24 ], "rope_type": "default", "type": "default" }, "rope_theta": 1000000.0, "sliding_window": 32768, "text_config": { "_name_or_path": "hunyuanvideo-community/HunyuanImage-2.1-Diffusers", "architectures": [ "Qwen2_5_VLForConditionalGeneration" ], "attention_dropout": 0.0, "bos_token_id": 151643, "dtype": "bfloat16", "eos_token_id": 151645, "hidden_act": "silu", "hidden_size": 3584, "image_token_id": 151655, "initializer_range": 0.02, "intermediate_size": 18944, "layer_types": [ "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention", "full_attention" ], "max_position_embeddings": 128000, "max_window_layers": 28, "model_type": "qwen2_5_vl_text", "num_attention_heads": 28, "num_hidden_layers": 28, "num_key_value_heads": 4, "rms_norm_eps": 1e-06, "rope_scaling": { "mrope_section": [ 16, 24, 24 ], "rope_type": "default", "type": "default" }, "rope_theta": 1000000.0, "sliding_window": null, "use_cache": true, "use_sliding_window": false, "video_token_id": 151656, "vision_end_token_id": 151653, "vision_start_token_id": 151652, "vision_token_id": 151654, "vocab_size": 152064 }, "tie_word_embeddings": false, "transformers_version": "4.57.1", "use_cache": true, "use_sliding_window": false, "vision_config": { "depth": 32, "dtype": "bfloat16", "fullatt_block_indexes": [ 7, 15, 23, 31 ], "hidden_act": "silu", "hidden_size": 1280, "in_channels": 3, "in_chans": 3, "initializer_range": 0.02, "intermediate_size": 3420, "model_type": "qwen2_5_vl", "num_heads": 16, "out_hidden_size": 3584, "patch_size": 14, "spatial_merge_size": 2, "spatial_patch_size": 14, "temporal_patch_size": 2, "tokens_per_second": 2, "window_size": 112 }, "vision_token_id": 151654, "vocab_size": 152064 }
tokenizer	Qwen2Tokenizer	None	None	None	0	0	None
text_encoder_2	T5EncoderModel	xpu:0	torch.bfloat16	None219314944	219314944	235	T5Config { "architectures": [ "T5EncoderModel" ], "classifier_dropout": 0.0, "d_ff": 3584, "d_kv": 64, "d_model": 1472, "decoder_start_token_id": 0, "dense_act_fn": "gelu_new", "dropout_rate": 0.1, "dtype": "bfloat16", "eos_token_id": 1, "feed_forward_proj": "gated-gelu", "gradient_checkpointing": false, "initializer_factor": 1.0, "is_encoder_decoder": false, "is_gated_act": true, "layer_norm_epsilon": 1e-06, "model_type": "t5", "num_decoder_layers": 4, "num_heads": 6, "num_layers": 12, "pad_token_id": 0, "relative_attention_max_distance": 128, "relative_attention_num_buckets": 32, "tie_word_embeddings": false, "tokenizer_class": "ByT5Tokenizer", "transformers_version": "4.57.1", "use_cache": false, "vocab_size": 1510 }
tokenizer_2	ByT5Tokenizer	None	None	None	0	0	None
transformerHunyuanImageTransformer2DModel	HunyuanImageTransformer2DModel	xpu:0	torch.bfloat16	None17425795520	17425795520	1397	FrozenDict({'in_channels': 64, 'out_channels': 64, 'num_attention_heads': 28, 'attention_head_dim': 128, 'num_layers': 20, 'num_single_layers': 40, 'num_refiner_layers': 2, 'mlp_ratio': 4.0, 'patch_size': [1, 1], 'qk_norm': 'rms_norm', 'guidance_embeds': False, 'text_embed_dim': 3584, 'text_embed_2_dim': 1472, 'rope_theta': 256.0, 'rope_axes_dim': [64, 64], 'use_meanflow': False, '_use_default_values': ['use_meanflow'], '_class_name': 'HunyuanImageTransformer2DModel', '_diffusers_version': '0.36.0.dev0', '_name_or_path': 'hunyuanvideo-community/HunyuanImage-2.1-Diffusers'})
schedulerFlowMatchEulerDiscreteScheduler	FlowMatchEulerDiscreteScheduler	None	None	None	0	0	FrozenDict({'num_train_timesteps': 1000, 'shift': 5.0, 'use_dynamic_shifting': False, 'base_shift': 0.5, 'max_shift': 1.15, 'base_image_seq_len': 256, 'max_image_seq_len': 4096, 'invert_sigmas': False, 'shift_terminal': None, 'use_karras_sigmas': False, 'use_exponential_sigmas': False, 'use_beta_sigmas': False, 'time_shift_type': 'exponential', 'stochastic_sampling': False, '_class_name': 'FlowMatchEulerDiscreteScheduler', '_diffusers_version': '0.36.0.dev0'})
guider	AdaptiveProjectedMixGuidance	None	None	None	0	0	FrozenDict({'guidance_scale': 3.5, 'guidance_rescale': 0.0, 'adaptive_projected_guidance_scale': 10.0, 'adaptive_projected_guidance_momentum': -0.5, 'adaptive_projected_guidance_rescale': 10.0, 'eta': 0.0, 'use_original_formulation': False, 'start': 0.0, 'stop': 1.0, 'adaptive_projected_guidance_start_step': 5, 'enabled': True, '_class_name': 'AdaptiveProjectedMixGuidance', '_diffusers_version': '0.36.0.dev0'})
ocr_guider	AdaptiveProjectedMixGuidance	None	None	None	0	0	FrozenDict({'guidance_scale': 3.5, 'guidance_rescale': 0.0, 'adaptive_projected_guidance_scale': 10.0, 'adaptive_projected_guidance_momentum': -0.5, 'adaptive_projected_guidance_rescale': 10.0, 'eta': 0.0, 'use_original_formulation': False, 'start': 0.0, 'stop': 1.0, 'adaptive_projected_guidance_start_step': 38, 'enabled': True, '_class_name': 'AdaptiveProjectedMixGuidance', '_diffusers_version': '0.36.0.dev0'})

...

Page tree

Versions Compared

Old Version 24

New Version 25

Key

Test 2 - Face and hand

Model info

Page tree

Page History

Versions Compared

Old Version 24

New Version 25

Key

Test 2 - Face and hand

Model info