File size: 5,096 Bytes
311f85c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168


# Or save the image
#output_image.save("output_image.jpg")
from os import device_encoding
from diffusers import StableDiffusionInpaintPipeline
from PIL import Image
import torch
import numpy as np
import torch
import gc
from diffusers import StableDiffusionPipeline, StableDiffusionImg2ImgPipeline, StableDiffusionInpaintPipelineLegacy, DDIMScheduler, AutoencoderKL
from PIL import Image
#import pose_estimation as pe
import requests
from rembg import remove
from transformers import BlipProcessor, BlipForConditionalGeneration
import sys
import os
import subprocess
sys.path.append(
    os.path.join(os.path.dirname(__file__), "huggingface-cloth-segmentation"))

from process import load_seg_model, get_palette, generate_mask


device = 'cpu'



def initialize_and_load_models():

    checkpoint_path = 'model/cloth_segm.pth'
    net = load_seg_model(checkpoint_path, device=device)    

    return net

net = initialize_and_load_models()
palette = get_palette(4)


def run(img):

    cloth_seg = generate_mask(img, net=net, palette=palette, device=device)
    return cloth_seg

def image_caption(image_path, img_type):
    device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
    processor = BlipProcessor.from_pretrained("noamrot/FuseCap")
    model = BlipForConditionalGeneration.from_pretrained("noamrot/FuseCap").to(device)
 
    raw_image = Image.open(image_path).convert('RGB')
    if img_type == "dress":
        raw_image = remove(raw_image)
        print("bg removed")
        raw_image.show
    #raw_image = img_np_no_bg
    
    text = "a picture of "
    inputs = processor(raw_image, text, return_tensors="pt").to(device)

    out = model.generate(**inputs, num_beams = 3)
    print(processor.decode(out[0], skip_special_tokens=True))
    caption = processor.decode(out[0], skip_special_tokens=True)
    return caption

def gen_vton(image_input, dress_input):
# Load the pre-trained model
    pipe = StableDiffusionInpaintPipeline.from_pretrained(
        "runwayml/stable-diffusion-inpainting",
    #revision="fp16",  # Or "full" to disable
        torch_dtype=torch.float32,  # Or torch.float32
    )
    image_path = image_input
    #submodule_path =  os.path.join(os.path.dirname(__file__), "huggingface-cloth-segmentation/process.py")
    
    img_open = Image.open(image_path)
#  
    run(img_open) 
    gen_mask_1 = "./huggingface-cloth-segmentation/output/alpha/1.png"
    gen_mask_2 = "./huggingface-cloth-segmentation/output/alpha/2.png"
    gen_mask_3 = "./huggingface-cloth-segmentation/output/alpha/3.png"
    print("mask_generated")
    if gen_mask_1:
         mask_path = gen_mask_1
    elif gen_mask_2:
        mask_path = gen_mask_2
    else:
        mask_path = gen_mask_3

    dress_path = dress_input
    
    image = Image.open(image_path)
    mask = Image.open(mask_path) # Convert mask to grayscale
#image = Image.open("/content/drive/MyDrive/train1/train/image/000025.jpg")
#mask = Image.open("/content/drive/MyDrive/train1/train/image/000014.jpg")# Convert mask to grayscale
#image = download_image(img_url).resize((512, 512))
#mask = download_image(mask_url).resize((512, 512))

#image = Image.open(image_path)
#mask_image = Image.open(mask_path)
    image = image.resize((512, 512))
    mask = mask.resize((512, 512))
# Define your prompt (text input)

    user_caption = image_caption(image_path, "user")
    dress_caption = image_caption(dress_path, "dress")
    print(user_caption)
    print(dress_caption)
    prompt = " a human wearing a {dress_caption} "
    neg_prompt = "{user_caption}"

# Note: `image` and `mask_image` should be PIL images.
# The mask structure is white for inpainting and black for keeping as is.
# Replace `image` and `mask_image` with your actual images.

    guidance_scale=7.5
    denoising_strength=0.9
    num_samples = 2
    generator = torch.Generator(device="cpu")  # Explicitly create a CPU generator




    images = pipe(
        prompt=prompt,
        negative_prompt=neg_prompt,
        image=image,
        mask_image=mask,
        guidance_scale=guidance_scale,
        denoising_strength=denoising_strength,
        generator=generator,
        num_images_per_prompt=num_samples,
    ).images

#Image_1 = pipe(prompt=prompt, image=image, mask_image=mask).images[0]


#images[0] # Display the image

#img = Image.open(images[0])
#img.show()
#img = Image.open(images[1])
#img.show()

#images[2].show
# Or save the image
    images[0].save("./processed_images/output_image.jpg")
    images[1].save("./processed_images/output_image_1.jpg")

    #images[2].save("output_image_2.jpg")
    #images[3].save("output_image_3.jpg")
    #images[3].save("output_image_4.jpg")
 

#if app == "__main__":
#gen_vton()
#user_image = "C:/Users/Admin/Downloads/woman.jpg"
#dress_image = "C:/Users/Admin/Downloads/dress1.jpg"
#gen_vton(user_image, dress_image)

def predict(dict, prompt):
  image =  dict['image'].convert("RGB").resize((512, 512))
  mask_image = dict['mask'].convert("RGB").resize((512, 512))
  #images = pipe(prompt=prompt, image=image, mask_image=mask_image).images
  return(images[0])