RuntimeError: No CUDA GPUs are available

#106
by mantrakp - opened

Getting RuntimeError: No CUDA GPUs are available, I checked the changes applied here but it was not caused from the same error.

Error:

Traceback (most recent call last):
  File "/usr/local/lib/python3.10/site-packages/spaces/zero/wrappers.py", line 135, in worker_init
    torch.init(nvidia_uuid)
  File "/usr/local/lib/python3.10/site-packages/spaces/zero/torch/patching.py", line 354, in init
    torch.Tensor([0]).cuda()
  File "/usr/local/lib/python3.10/site-packages/torch/cuda/__init__.py", line 314, in _lazy_init
    torch._C._cuda_init()
RuntimeError: No CUDA GPUs are available
Traceback (most recent call last):
  File "/usr/local/lib/python3.10/site-packages/gradio/queueing.py", line 536, in process_events
    response = await route_utils.call_process_api(
  File "/usr/local/lib/python3.10/site-packages/gradio/route_utils.py", line 321, in call_process_api
    output = await app.get_blocks().process_api(
  File "/usr/local/lib/python3.10/site-packages/gradio/blocks.py", line 1935, in process_api
    result = await self.call_function(
  File "/usr/local/lib/python3.10/site-packages/gradio/blocks.py", line 1520, in call_function
    prediction = await anyio.to_thread.run_sync(  # type: ignore
  File "/usr/local/lib/python3.10/site-packages/anyio/to_thread.py", line 56, in run_sync
    return await get_async_backend().run_sync_in_worker_thread(
  File "/usr/local/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 2177, in run_sync_in_worker_thread
    return await future
  File "/usr/local/lib/python3.10/site-packages/anyio/_backends/_asyncio.py", line 859, in run
    result = context.run(func, *args)
  File "/usr/local/lib/python3.10/site-packages/gradio/utils.py", line 826, in wrapper
    response = f(*args, **kwargs)
  File "/home/user/app/ui/images/images.py", line 829, in generate_images
    images = generate_t2i(base_request)
  File "/usr/local/lib/python3.10/site-packages/spaces/zero/wrappers.py", line 214, in gradio_handler
    raise res.value
RuntimeError: No CUDA GPUs are available

I have made sure to add @spaces.GPU(duration=120) before the functions

@spaces.GPU(duration=120)
def generate_t2i(request: GenT2I):
  ...
images = generate_t2i(base_request)
return gr.update( # output_images
  value=images,
  interactive=True
)

This is how i load the model

def load_sd():
    # device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
    device = "cuda" if torch.cuda.is_available() else "cpu"

    # Models
    models = [
        {
            "repo_id": "black-forest-labs/FLUX.1-dev",
            "loader": "flux",
            "compute_type": torch.bfloat16,
        },
        {
            "repo_id": "SG161222/RealVisXL_V4.0",
            "loader": "xl",
            "compute_type": torch.float16,
        }
    ]

    for model in models:
        try:
            model["pipeline"] = AutoPipelineForText2Image.from_pretrained(
                model['repo_id'],
                torch_dtype = model['compute_type'],
                safety_checker = None,
                variant = "fp16"
            ).to(device)
            model["pipeline"].enable_model_cpu_offload()
        except:
            model["pipeline"] = AutoPipelineForText2Image.from_pretrained(
                model['repo_id'],
                torch_dtype = model['compute_type'],
                safety_checker = None
            ).to(device)
            model["pipeline"].enable_model_cpu_offload() 


    # VAE n Refiner
    sdxl_vae = AutoencoderKL.from_pretrained("madebyollin/sdxl-vae-fp16-fix", torch_dtype=torch.float16)
    refiner = DiffusionPipeline.from_pretrained("stabilityai/stable-diffusion-xl-refiner-1.0", vae=sdxl_vae, torch_dtype=torch.float16, use_safetensors=True, variant="fp16").to(device)
    refiner.enable_model_cpu_offload()


    # Safety Checker
    safety_checker = StableDiffusionSafetyChecker.from_pretrained("CompVis/stable-diffusion-safety-checker").to(device)
    feature_extractor = CLIPFeatureExtractor.from_pretrained("openai/clip-vit-base-patch32", from_pt=True)


    # Controlnets
    controlnet_models = [
        {
            "repo_id": "xinsir/controlnet-depth-sdxl-1.0",
            "name": "depth_xl",
            "layers": ["depth"],
            "loader": "xl",
            "compute_type": torch.float16,
        },
        {
            "repo_id": "xinsir/controlnet-canny-sdxl-1.0",
            "name": "canny_xl",
            "layers": ["canny"],
            "loader": "xl",
            "compute_type": torch.float16,
        },
        {
            "repo_id": "xinsir/controlnet-openpose-sdxl-1.0",
            "name": "openpose_xl",
            "layers": ["pose"],
            "loader": "xl",
            "compute_type": torch.float16,
        },
        {
            "repo_id": "xinsir/controlnet-scribble-sdxl-1.0",
            "name": "scribble_xl",
            "layers": ["scribble"],
            "loader": "xl",
            "compute_type": torch.float16,
        },
        {
            "repo_id": "Shakker-Labs/FLUX.1-dev-ControlNet-Union-Pro",
            "name": "flux1_union_pro",
            "layers": ["canny_fl", "tile_fl", "depth_fl", "blur_fl", "pose_fl", "gray_fl", "low_quality_fl"],
            "loader": "flux-multi",
            "compute_type": torch.bfloat16,
        }
    ]

    for controlnet in controlnet_models:
        if controlnet["loader"] == "xl":
            controlnet["controlnet"] = ControlNetModel.from_pretrained(
                controlnet["repo_id"],
                torch_dtype = controlnet['compute_type']
            )
        elif controlnet["loader"] == "flux-multi":
            controlnet["controlnet"] = FluxMultiControlNetModel([FluxControlNetModel.from_pretrained(
                controlnet["repo_id"],
                torch_dtype = controlnet['compute_type']
            )])
        #TODO: Add support for flux only controlnet


    # Face Detection (for PhotoMaker)
    face_detector = FaceAnalysis2(providers=['CUDAExecutionProvider'], allowed_modules=['detection', 'recognition'])
    face_detector.prepare(ctx_id=0, det_size=(640, 640))


    # PhotoMaker V2 (for SDXL only)
    photomaker_ckpt = hf_hub_download(repo_id="TencentARC/PhotoMaker-V2", filename="photomaker-v2.bin", repo_type="model")

    return device, models, sdxl_vae, refiner, safety_checker, feature_extractor, controlnet_models, face_detector, photomaker_ckpt

device, models, sdxl_vae, refiner, safety_checker, feature_extractor, controlnet_models, face_detector, photomaker_ckpt = load_sd()

Update:
https://huggingface.co/spaces/mantrakp/aai
please check this space out and let me know

ZeroGPU Explorers org

@mantrakp thanks for the report. Currently the Space works as expected on my side but do not hesitate to report again if you still encounter errors

I was encountering errors, but when I refactored all the code into a single file, it worked. Now I'm copying and separating the code again to try to re-refactor it.

Sign up or log in to comment