Spaces:

erfanzar
/

PHI-2-cLLM

Sleeping

erfanzar commited on Jan 17

Commit

fa1cb61

•

1 Parent(s): 674476e

Create app.py

Files changed (1) hide show

app.py ADDED Viewed

+from cLLM.gradio import GradioUserInference
+from cLLM.interactors import OpenChatInteract
+from cLLM import LlamaCPParams, InferenceSession, LlamaCPPGenerationConfig
+from huggingface_hub import hf_hub_download
+def launch():
+    interact = OpenChatInteract(
+        user_name="User",
+        assistant_name="cLLM-GPT"
+    )
+    params = LlamaCPParams(
+        model_path=hf_hub_download(
+            "TheBloke/phi-2-GGUF",
+            "phi-2.Q4_K_S.gguf"
+        ),
+        num_threads=8,
+        verbose=False,
+        num_batch=512,
+        num_context=2048,
+        offload_kqv=True,
+    )
+    inference = InferenceSession.create(
+        llama_params=params,
+        generation_config=LlamaCPPGenerationConfig(
+            stream=True,
+            stop=interact.get_stop_signs()
+        )
+    )
+    interface = GradioUserInference(
+        interactor=interact,
+        inference_session=inference,
+        llama_param=params,
+        use_prefix_for_interactor=True
+    )
+    interface.build_chat_interface().launch()
+if __name__ == "__main__":
+    launch()