Spaces:

remyxai
/

SpaceLLaVA

Paused

smellslikeml commited on Mar 9

Commit

6229c52

•

1 Parent(s): 56ccf9e

update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -47,9 +47,15 @@ class Llava:
         )
         return res["choices"][0]["message"]["content"]
-# Initialize the model
 llm_model = Llava()
 title_and_links_markdown = """
 # 🛸SpaceLLaVA🌋: A spatial reasoning multi-modal model
 This space hosts our initial release of LLaVA 1.5 LoRA tuned for spatial reasoning using data generated with [VQASynth](https://github.com/remyxai/VQASynth).
@@ -58,26 +64,20 @@ Upload an image and ask a question.
 [Model](https://huggingface.co/remyxai/SpaceLLaVA) | [Code](https://github.com/remyxai/VQASynth) | [Paper](https://spatial-vlm.github.io)
 """
-def predict(image, prompt):
-    result = llm_model.run_inference(image, prompt)
-    return result
 image_input = gr.Image(type="pil", label="Input Image")
 text_input = gr.Textbox(label="Prompt")
-# Initialize interface with examples
 iface = gr.Interface(
-    fn=predict,
-    inputs=[image_input, text_input],
-    outputs="text",
-    title="Llava Model Inference",
-    description="Input an image and a prompt to receive a description."
 )
-examples = [
-    ["examples/warehouse_1.jpg", "Is the man wearing gray pants to the left of the pile of boxes on a pallet?"],
-    ["examples/warehouse_2.jpg", "Is the forklift taller than the shelves of boxes?"],
-]
-iface.examples = examples
 iface.launch()

         )
         return res["choices"][0]["message"]["content"]
 llm_model = Llava()
+def predict(image, prompt):
+    result = llm_model.run_inference(image, prompt)
+    return result
+image_input = gr.Image(type="pil", label="Input Image")
+text_input = gr.Textbox(label="Prompt")
 title_and_links_markdown = """
 # 🛸SpaceLLaVA🌋: A spatial reasoning multi-modal model
 This space hosts our initial release of LLaVA 1.5 LoRA tuned for spatial reasoning using data generated with [VQASynth](https://github.com/remyxai/VQASynth).
 [Model](https://huggingface.co/remyxai/SpaceLLaVA) | [Code](https://github.com/remyxai/VQASynth) | [Paper](https://spatial-vlm.github.io)
 """
+examples = [
+    ["examples/warehouse_1.jpg", "Is the man wearing gray pants to the left of the pile of boxes on a pallet?"],
+    ["examples/warehouse_2.jpg", "Is the forklift taller than the shelves of boxes?"],
+]
 image_input = gr.Image(type="pil", label="Input Image")
 text_input = gr.Textbox(label="Prompt")
 iface = gr.Interface(
+    fn=predict,
+    inputs=[image_input, text_input],
+    outputs="text",
 )
+iface.add_component(gr.Markdown(title_and_links_markdown), "header")
+iface.set_examples(examples)
 iface.launch()