File size: 4,804 Bytes
b05b8f1 a4814de 10d76c7 a4814de b05b8f1 a4814de b05b8f1 a4814de 10d76c7 a4814de a378269 a4814de a378269 a4814de 735c1c8 a4814de 735c1c8 a4814de a226463 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 |
---
license: apache-2.0
pipeline_tag: text-to-image
tags:
- flux
base_model:
- black-forest-labs/FLUX.1-dev
---
# Flux Dev F8 Diffusers
Transformer support in float8_e4m3fn precision. Compatible with the native FluxPipeline.
Requires RTX 3000 or newer card. ***2-3x speedup*** in inference time.
You will need the full weight Flux model, but the transformer directory is in float8_e4m3fn.
Also compatible with:
* [T5 Encoder](https://huggingface.co./twodgirl/Flux-dev-optimum-quant-qfloat8/tree/main/flux-t5) quanto model
* [PuLID](https://github.com/ToTheBeginning/PuLID/tree/5004b6954ec3f9b59319b271f60339178f786918)
*Make sure your torch version is 2.4 or newer.*
## Inference
It replaces and transforms the linear layers of a float8 model to bfloat16 on the fly, using 2x less VRAM.
```python
from diffusers import AutoencoderKL, FluxTransformer2DModel, FluxPipeline
from linear_8 import replace_regular_linears
import torch
transformer = FluxTransformer2DModel.from_pretrained('twodgirl/flux-dev-fp8-e4m3fn-diffusers',
subfolder='transformer',
torch_dtype=torch.float8_e4m3fn)
replace_regular_linears(transformer)
vae = AutoencoderKL.from_pretrained('black-forest-labs/FLUX.1-dev', subfolder='vae').to(torch.bfloat16)
pipe = FluxPipeline.from_pretrained('black-forest-labs/FLUX.1-dev',
transformer=transformer,
vae=vae)
pipe.enable_model_cpu_offload()
```
## Inference with PuLID
*Tested with v0.30.2 diffusers library, it's likely to need small modifications in the future version.*
```
pip install -r requirements.txt
```
Download [T5 Encoder](https://huggingface.co./twodgirl/Flux-dev-optimum-quant-qfloat8/tree/main/flux-t5) and the content of [PuLID](https://github.com/ToTheBeginning/PuLID) (without the requirements file).
You should have:
```
eva_clip
pulid
flux-t5
flux_model.py
linear_8.py
the-file-below.py
```
The demo file in the same directory:
```python
from diffusers import AutoencoderKL, FluxPipeline
from flux_model import FluxTransformer2DModel
from linear_8 import replace_regular_linears
import torch
from optimum.quanto.models import QuantizedTransformersModel
import numpy as np
from PIL import Image
from pulid.pipeline_flux import PuLIDPipeline
from transformers import T5EncoderModel
from torchvision import transforms
class T5Model(QuantizedTransformersModel):
auto_class = T5EncoderModel
class FluxGenerator:
def __init__(self, pipe):
self.pipe = pipe
self.pulid_model = PuLIDPipeline(pipe.transformer, 'cuda', weight_dtype=torch.bfloat16)
self.pulid_model.load_pretrain()
def clear_id(self):
self.pipe.transformer.pul_id = None
self.pipe.transformer.pul_weight = 1.0
def set_id(self, id_image, id_weight=1.0, true_cfg=1.0):
# Variable use_true_cfg is False by default.
use_true_cfg = abs(true_cfg - 1.0) > 1e-2
if id_image is not None:
id_embeddings, uncond_id_embeddings = self.pulid_model.get_id_embedding(id_image, cal_uncond=use_true_cfg)
else:
id_embeddings = None
uncond_id_embeddings = None
# The pipe cannot accept its module's parameters,
# change the module's state instead.
self.pipe.transformer.pul_id = uncond_id_embeddings if use_true_cfg else id_embeddings
self.pipe.transformer.pul_id_weight = id_weight
T5EncoderModel.from_config = lambda c: T5EncoderModel(c).to(dtype=torch.bfloat16)
t5 = T5Model.from_pretrained('./flux-t5')._wrapped
transformer = FluxTransformer2DModel.from_pretrained('twodgirl/flux-dev-fp8-e4m3fn-diffusers',
subfolder='transformer',
torch_dtype=torch.float8_e4m3fn)
replace_regular_linears(transformer)
vae = AutoencoderKL.from_pretrained('black-forest-labs/FLUX.1-dev', subfolder='vae').to(torch.bfloat16)
pipe = FluxPipeline.from_pretrained('black-forest-labs/FLUX.1-dev',
text_encoder_2=t5,
transformer=transformer,
vae=vae)
pipe.enable_model_cpu_offload()
face = transforms.Resize(1024)(Image.open('reference.png').convert('RGB'))
gen = FluxGenerator(pipe)
gen.set_id(np.array(face))
image = pipe('portrait, color, cinematic', num_inference_steps=10).images[0]
image.save('portrait.png')
```
## Disclaimer
Use of this code requires citation and attribution to the author via a link to their Hugging Face profile in all resulting work.
The model weights have a [custom license](https://huggingface.co./black-forest-labs/FLUX.1-dev/blob/main/LICENSE.md). |