winglian commited on
Commit
dce6894
1 Parent(s): ec779d5

try to fix combining gr.interface with blocks, try to increase concurrency on larger gpus

Browse files
Files changed (2) hide show
  1. requirements.txt +1 -0
  2. tabbed.py +22 -8
requirements.txt CHANGED
@@ -1,6 +1,7 @@
1
  --extra-index-url https://pypi.ngc.nvidia.com
2
  nvidia-cuda-runtime
3
  nvidia-cublas
 
4
  llama-cpp-python @ https://github.com/OpenAccess-AI-Collective/ggml-webui/releases/download/v0.1.50-rc3/llama_cpp_python-gpu-0.1.50-cp38-cp38-linux_x86_64.whl
5
  pyyaml
6
  torch
 
1
  --extra-index-url https://pypi.ngc.nvidia.com
2
  nvidia-cuda-runtime
3
  nvidia-cublas
4
+ torch
5
  llama-cpp-python @ https://github.com/OpenAccess-AI-Collective/ggml-webui/releases/download/v0.1.50-rc3/llama_cpp_python-gpu-0.1.50-cp38-cp38-linux_x86_64.whl
6
  pyyaml
7
  torch
tabbed.py CHANGED
@@ -1,4 +1,7 @@
 
 
1
  import gradio as gr
 
2
  import yaml
3
  from huggingface_hub import hf_hub_download
4
  from huggingface_hub.utils import LocalEntryNotFoundError
@@ -81,17 +84,19 @@ instruct_description = f"""
81
  - Contribute at [https://github.com/OpenAccess-AI-Collective/ggml-webui](https://github.com/OpenAccess-AI-Collective/ggml-webui)
82
  """
83
 
 
 
 
 
 
 
 
 
84
  with gr.Blocks() as demo:
85
  with gr.Tab("Instruct"):
86
  gr.Markdown("# GGML Spaces Instruct Demo")
 
87
 
88
- gr.Interface(
89
- fn=generate_text_instruct,
90
- inputs=gr.inputs.Textbox(lines= 10, label="Enter your input text"),
91
- outputs=gr.outputs.Textbox(label="Output text"),
92
- title="GGML UI Chatbot Demo",
93
- description=instruct_description,
94
- )
95
  with gr.Tab("Chatbot"):
96
  gr.Markdown("# GGML Spaces Chatbot Demo")
97
  chatbot = gr.Chatbot()
@@ -143,4 +148,13 @@ with gr.Blocks() as demo:
143
  stop.click(fn=None, inputs=None, outputs=None, cancels=[submit_click_event, message_submit_event], queue=False)
144
 
145
 
146
- demo.queue(max_size=32, concurrency_count=1).launch(debug=True, server_name="0.0.0.0", server_port=7860)
 
 
 
 
 
 
 
 
 
 
1
+ import math
2
+
3
  import gradio as gr
4
+ import torch
5
  import yaml
6
  from huggingface_hub import hf_hub_download
7
  from huggingface_hub.utils import LocalEntryNotFoundError
 
84
  - Contribute at [https://github.com/OpenAccess-AI-Collective/ggml-webui](https://github.com/OpenAccess-AI-Collective/ggml-webui)
85
  """
86
 
87
+ instruct_interface = gr.Interface(
88
+ fn=generate_text_instruct,
89
+ inputs=gr.inputs.Textbox(lines= 10, label="Enter your input text"),
90
+ outputs=gr.outputs.Textbox(label="Output text"),
91
+ title="GGML UI Chatbot Demo",
92
+ description=instruct_description,
93
+ )
94
+
95
  with gr.Blocks() as demo:
96
  with gr.Tab("Instruct"):
97
  gr.Markdown("# GGML Spaces Instruct Demo")
98
+ instruct_interface.render()
99
 
 
 
 
 
 
 
 
100
  with gr.Tab("Chatbot"):
101
  gr.Markdown("# GGML Spaces Chatbot Demo")
102
  chatbot = gr.Chatbot()
 
148
  stop.click(fn=None, inputs=None, outputs=None, cancels=[submit_click_event, message_submit_event], queue=False)
149
 
150
 
151
+ # figure out how much VRAM is available to see if we can increase concurrency
152
+ concurrency_count = 1
153
+ model_vram_size_in_gb = 11
154
+ if torch.cuda.is_available():
155
+ device = torch.cuda.current_device()
156
+ total_memory = torch.cuda.get_device_properties(device).total_memory
157
+ total_memory_in_gb = total_memory / 1024**3
158
+ concurrency_count = int(math.floor(total_memory_in_gb / model_vram_size_in_gb))
159
+
160
+ demo.queue(max_size=16, concurrency_count=1).launch(debug=True, server_name="0.0.0.0", server_port=7860)