import io import random import modal
MINUTES = 60
app = modal.App("Flux.1-dev")
image = ( modal.Image.debian_slim(python_version="3.12") .pip_install( "accelerate==0.33.0", "diffusers==0.31.0", "fastapi[standard]==0.115.4", "huggingface-hub[hf_transfer]==0.25.2", "sentencepiece==0.2.0", "torch==2.5.1", "torchvision==0.20.1", "transformers~=4.44.0", "peft" ) .env({"HF_HUB_ENABLE_HF_TRANSFER": "1"}) )
with image.imports(): import diffusers import torch from fastapi import Response
model_id = "black-forest-labs/FLUX.1-dev"
@app.cls( image=image, gpu="A100-40GB", timeout=10 * MINUTES, ) class Inference: @modal.build() @modal.enter() def initialize(self): self.pipe = diffusers.FluxPipeline.from_pretrained( model_id, token="", torch_dtype=torch.bfloat16, ) self.pipe.load_lora_weights("Shakker-Labs/FLUX.1-dev-LoRA-add-details", weight_name="FLUX-dev-lora-add_details.safetensors") self.pipe.fuse_lora(lora_scale=0.45)
@modal.enter() def move_to_gpu(self): self.pipe.to("cuda")
@modal.method() def run( self, prompt: str, width: int = 768, height: int = 1024, steps: int = 28, cfg: float = 4.5, batch_size: int = 1, seed: int = None ) -> list[bytes]: seed = seed if seed is not None else random.randint(0, 2**32 - 1) print("seeding RNG with", seed) torch.manual_seed(seed) images = self.pipe( prompt, num_images_per_prompt=batch_size, num_inference_steps=steps, height=height, width=width, guidance_scale=cfg, max_sequence_length=512, ).images
image_output = [] for image in images: with io.BytesIO() as buf: image.save(buf, format="PNG") image_output.append(buf.getvalue()) torch.cuda.empty_cache() return image_output
@modal.web_endpoint(docs=True) def web(self, prompt: str, width: int = 768, height: int = 1024, steps: int = 28, cfg: float = 4.5, seed: int = None): return Response( content=self.run.local( prompt, width=width, height=height, steps=steps, cfg=cfg, batch_size=1, seed=seed )[0], media_type="image/png", )
|