Spaces:

erfanzar
/

PHI-2-cLLM

Sleeping

PHI-2-cLLM / app.py

Create app.py

fa1cb61 verified 8 months ago

No virus

1.07 kB

	from cLLM.gradio import GradioUserInference
	from cLLM.interactors import OpenChatInteract
	from cLLM import LlamaCPParams, InferenceSession, LlamaCPPGenerationConfig
	from huggingface_hub import hf_hub_download


	def launch():
	interact = OpenChatInteract(
	user_name="User",
	assistant_name="cLLM-GPT"
	)

	params = LlamaCPParams(
	model_path=hf_hub_download(
	"TheBloke/phi-2-GGUF",
	"phi-2.Q4_K_S.gguf"
	),
	num_threads=8,
	verbose=False,
	num_batch=512,
	num_context=2048,
	offload_kqv=True,
	)

	inference = InferenceSession.create(
	llama_params=params,
	generation_config=LlamaCPPGenerationConfig(
	stream=True,
	stop=interact.get_stop_signs()
	)
	)

	interface = GradioUserInference(
	interactor=interact,
	inference_session=inference,
	llama_param=params,
	use_prefix_for_interactor=True
	)

	interface.build_chat_interface().launch()


	if __name__ == "__main__":
	launch()