import os
from typing import Any

import gradio as gr
import torch
import numpy as np
from transformers import pipeline
from PIL import Image

# Load the depth estimation model from Hugging Face Transformers
depth_estimator = pipeline(task="depth-estimation", model="Intel/dpt-hybrid-midas")

def launch(input_image: Image.Image) -> Image.Image:
    """
    Process an input image to estimate its depth map.

    Args:
    input_image: An image object as received from the Gradio interface.

    Returns:
    A PIL Image object representing the depth map.
    """
    # Generate depth estimation from the input image
    out = depth_estimator(input_image)

    # Resize the prediction to match the input image size
    prediction = torch.nn.functional.interpolate(
        out["predicted_depth"].unsqueeze(1),
        size=input_image.size[::-1],  # PIL images use width x height, whereas torch uses height x width
        mode="bicubic",
        align_corners=False,
    )

    # Normalize the prediction to be in the range [0, 255]
    output = prediction.squeeze().numpy()
    formatted = (output * 255 / np.max(output)).astype("uint8")

    # Convert the numpy array back to a PIL image
    depth = Image.fromarray(formatted)
    return depth

# Define the Gradio interface
iface = gr.Interface(
    fn=launch,
    inputs=gr.Image(type='pil'),
    outputs=gr.Image(type='pil'),
    title="Depth Estimation",
    description="Upload an image to estimate its depth map."
)

# Launch the Gradio app with sharing option enabled
if __name__ == "__main__":
    iface.launch(share=True)