import os from typing import Any import gradio as gr import torch import numpy as np from transformers import pipeline from PIL import Image # Load the depth estimation model from Hugging Face Transformers depth_estimator = pipeline(task="depth-estimation", model="Intel/dpt-hybrid-midas") def launch(input_image: Image.Image) -> Image.Image: """ Process an input image to estimate its depth map. Args: input_image: An image object as received from the Gradio interface. Returns: A PIL Image object representing the depth map. """ # Generate depth estimation from the input image out = depth_estimator(input_image) # Resize the prediction to match the input image size prediction = torch.nn.functional.interpolate( out["predicted_depth"].unsqueeze(1), size=input_image.size[::-1], # PIL images use width x height, whereas torch uses height x width mode="bicubic", align_corners=False, ) # Normalize the prediction to be in the range [0, 255] output = prediction.squeeze().numpy() formatted = (output * 255 / np.max(output)).astype("uint8") # Convert the numpy array back to a PIL image depth = Image.fromarray(formatted) return depth # Define the Gradio interface iface = gr.Interface( fn=launch, inputs=gr.Image(type='pil'), outputs=gr.Image(type='pil'), title="Depth Estimation", description="Upload an image to estimate its depth map." ) # Launch the Gradio app with sharing option enabled if __name__ == "__main__": iface.launch(share=True)