File size: 6,065 Bytes
87cceff
 
 
 
 
 
 
 
 
 
 
 
 
 
fea34cb
87cceff
 
 
 
 
 
 
 
 
84db587
 
87cceff
84db587
 
87cceff
84db587
87cceff
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6fe986d
87cceff
 
2e63965
87cceff
d59438d
149130e
07d1d1e
149130e
87cceff
7fcc2d6
87cceff
 
e06edba
87cceff
 
8aa8d48
 
 
 
 
87cceff
 
 
84db587
 
87cceff
 
 
 
 
 
 
 
 
 
 
84db587
87cceff
 
b584450
87cceff
e290e19
149130e
ee82891
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import cv2
import einops
import gradio as gr
import numpy as np
import torch

from pytorch_lightning import seed_everything
from util import resize_image, HWC3, apply_canny
from ldm.models.diffusion.ddim import DDIMSampler
from annotator.openpose import apply_openpose
from cldm.model import create_model, load_state_dict
from huggingface_hub import hf_hub_url, cached_download


REPO_ID = "lllyasviel/ControlNet"
scribble_checkpoint = "models/control_sd15_scribble.pth"
scribble_model = create_model('./models/cldm_v15.yaml').cpu()
scribble_model.load_state_dict(load_state_dict(cached_download(
    hf_hub_url(REPO_ID, scribble_checkpoint)
), location='cpu'))
scribble_model = scribble_model.cuda()
ddim_sampler_scribble = DDIMSampler(scribble_model)
save_memory = False

def process(input_image, prompt, input_control, num_samples, image_resolution, ddim_steps, scale, seed, eta, low_threshold, high_threshold):
    # TODO: Clean Function for single Task

    if input_control == "Scribble":
        return process_scribble(input_image, prompt, num_samples, image_resolution, ddim_steps, scale, seed, eta)

def process_scribble(input_image, prompt, num_samples, image_resolution, ddim_steps, scale, seed, eta):

    with torch.no_grad():
        img = resize_image(HWC3(input_image), image_resolution)
        H, W, C = img.shape

        detected_map = np.zeros_like(img, dtype=np.uint8)
        detected_map[np.min(img, axis=2) < 127] = 255

        control = torch.from_numpy(detected_map.copy()).float().cuda() / 255.0
        control = torch.stack([control for _ in range(num_samples)], dim=0)
        control = einops.rearrange(control, 'b h w c -> b c h w').clone()

        seed_everything(seed)

        if save_memory:
            scribble_model.low_vram_shift(is_diffusing=False)

        cond = {"c_concat": [control], "c_crossattn": [scribble_model.get_learned_conditioning([prompt + ', ' + a_prompt] * num_samples)]}
        un_cond = {"c_concat": [control], "c_crossattn": [scribble_model.get_learned_conditioning([n_prompt] * num_samples)]}
        shape = (4, H // 8, W // 8)

        if save_memory:
            scribble_model.low_vram_shift(is_diffusing=False)
            
        samples, intermediates = ddim_sampler_scribble.sample(ddim_steps, num_samples,
                                                     shape, cond, verbose=False, eta=eta,
                                                     unconditional_guidance_scale=scale,
                                                     unconditional_conditioning=un_cond)

        if save_memory:
            scribble_model.low_vram_shift(is_diffusing=False)
                    
        x_samples = scribble_model.decode_first_stage(samples)
        x_samples = (einops.rearrange(x_samples, 'b c h w -> b h w c') * 127.5 + 127.5).cpu().numpy().clip(0, 255).astype(np.uint8)

        results = [x_samples[i] for i in range(num_samples)]
    return [255 - detected_map] + results

    
def create_canvas(w, h):
    new_control_options = ["Interactive Scribble"]
    return np.zeros(shape=(h, w, 3), dtype=np.uint8) + 255


block = gr.Blocks().queue()
control_task_list = [
    "Scribble"
]

a_prompt = 'best quality, extremely detailed, architecture render, photorealistic, hyper realistic, surreal, dali, 3d rendering, render, 8k, 16k, extremely detailed, unreal engine, octane, maya'
n_prompt = 'longbody, lowres, bad anatomy, bad hands, missing fingers, pubic hair,extra digit, number, text, watermark, fewer digits, cropped, worst quality, low quality'

with block:
    gr.Markdown("## ControlNet - Architectural Sketch to Render Image")
    gr.HTML('''
     <p style="margin-bottom: 10px; font-size: 94%">
                Demo for ControlNet, Optimized for architectural sketch, based on <a href="https://github.com/lllyasviel/ControlNet" style="text-decoration: underline;" target="_blank">  lllyasviel ControlNet </a> implementation. 
              </p>
              ''')
    gr.HTML('''
     <p style="margin-bottom: 8px; font-size: 94%">
                HF Space created by Thaweewat Rugsujarit, If you have any suggestions or feedback, please feel free to contact me via <a href="https://www.linkedin.com/in/thaweewat-rugsujarit/" style="text-decoration: underline;" target="_blank">  Linkedin </a>. 
              </p>
              ''')
    with gr.Row():
        with gr.Column():
            input_image = gr.Image(source='upload', type="numpy")
            input_control = gr.Dropdown(control_task_list, value="Scribble", label="Task")
            prompt = gr.Textbox(label="Architectural Style")
            run_button = gr.Button(label="Run")
            
            with gr.Accordion("Advanced options", open=False):
                num_samples = gr.Slider(label="Images", minimum=1, maximum=12, value=1, step=1)
                image_resolution = gr.Slider(label="Image Resolution", minimum=256, maximum=768, value=512, step=256)
                low_threshold = gr.Slider(label="Canny low threshold", minimum=1, maximum=255, value=100, step=1)
                high_threshold = gr.Slider(label="Canny high threshold", minimum=1, maximum=255, value=200, step=1)
                ddim_steps = gr.Slider(label="Steps", minimum=1, maximum=100, value=20, step=1)
                scale = gr.Slider(label="Guidance Scale", minimum=0.1, maximum=30.0, value=9.0, step=0.1)
                seed = gr.Slider(label="Seed", minimum=0, maximum=2147483647, step=1, randomize=True)
                eta = gr.Slider(label="eta (DDIM)", minimum=0.0,maximum =1.0, value=0.0, step=0.1)

        with gr.Column():
            result_gallery = gr.Gallery(label='Output', show_label=False, elem_id="gallery").style(grid=2, height='auto')
    ips = [input_image, prompt, input_control, num_samples, image_resolution, ddim_steps, scale, seed, eta, low_threshold, high_threshold]
    run_button.click(fn=process, inputs=ips, outputs=[result_gallery])
    gr.Markdown("![visitor badge](https://visitor-badge.glitch.me/badge?page_id=Thaweewat.ControlNet-Architecture)")

block.launch(debug = True)