import panel as pn
from langchain.llms import LlamaCpp
from langchain_core.callbacks import CallbackManager
from langchain_core.prompts import PromptTemplate

from setup import OLMO_MODEL

template = """<|user|>{contents}<|user|>"""
prompt = PromptTemplate.from_template(template)

pn.extension(design="bootstrap", sizing_mode="stretch_width")

model_path = OLMO_MODEL
model_name = "OLMo"
model_avatar = "🌳"

def callback(contents, user, instance):
    callback_handler = pn.chat.langchain.PanelCallbackHandler(instance, user=model_name, avatar=model_avatar)
    # Not return the result at the end of the generation
    # this prevents the model from repeating the result
    callback_handler.on_llm_end = lambda response, *args, **kwargs: None
    # Callbacks support token-wise streaming
    callback_manager = CallbackManager([callback_handler])
    llm = LlamaCpp(
        model_path=str(model_path),
        callback_manager=callback_manager,
        temperature=0.8,
        max_tokens=512,
        verbose=False,
        echo=False
    )
    llm_chain = prompt | llm
    llm_chain.invoke({"contents": contents})

chat_interface = pn.chat.ChatInterface(callback=callback)

# create dashboard
main = pn.WidgetBox(
    f"##### {model_avatar} Chat with {model_name} with the interface below!",
    pn.Column(chat_interface, height=500, scroll=True)
)

title = f"{model_name} Chat Demo"
pn.template.BootstrapTemplate(
    title=title,
    main=main,
    main_max_width="min(50%, 698px)",
).servable(title=title)