|
import gradio as gr |
|
import transformers |
|
from transformers import AutoTokenizer , GemmaForCausalLM |
|
import torch |
|
import time |
|
|
|
title = "Gemma Function Calling" |
|
description = "Octopus-V2-2B, an advanced open-source language model with 2 billion parameters, represents Nexa AI's research breakthrough in the application of large language models (LLMs) for function calling, specifically tailored for Android APIs. Unlike Retrieval-Augmented Generation (RAG) methods, which require detailed descriptions of potential function arguments—sometimes needing up to tens of thousands of input tokens—Octopus-V2-2B introduces a unique functional token strategy for both its training and inference stages. This approach not only allows it to achieve performance levels comparable to GPT-4 but also significantly enhances its inference speed beyond that of RAG-based methods, making it especially beneficial for edge computing devices." |
|
|
|
|
|
|
|
example1 = '''def get_weather_data(coordinates): |
|
""" |
|
Fetches weather data from the Open-Meteo API for the given latitude and longitude. |
|
|
|
Args: |
|
coordinates (tuple): The latitude of the location. |
|
|
|
Returns: |
|
float: The current temperature in the coordinates you've asked for |
|
""" |
|
|
|
def get_coordinates_from_city(city_name): |
|
""" |
|
Fetches the latitude and longitude of a given city name using the Maps.co Geocoding API. |
|
|
|
Args: |
|
city_name (str): The name of the city. |
|
|
|
Returns: |
|
tuple: The latitude and longitude of the city. |
|
|
|
What's the weather like in Seattle right now? |
|
''' |
|
|
|
example2 = '''Function: |
|
def add_edge(u, v): |
|
""" |
|
Adds an edge between node u and node v in the graph. Make sure to create a graph first by calling create_new_graph! |
|
|
|
Args: |
|
u (str): Node name as string |
|
v (str): Node name as string |
|
""" |
|
|
|
Function: |
|
def is_two_nodes_connected(u, v): |
|
""" |
|
Answers if two nodes are connected. |
|
""" |
|
|
|
Emma is friends with Bob and Charlie, and Charlie is friends with Erik, and Erik is friends with Brian. Can you represent all of these relationship as a graph and answer if Emma is friends with Erik? |
|
''' |
|
|
|
EXAMPLES = [ |
|
[example1], |
|
[example2] |
|
] |
|
|
|
model_id = "NexaAIDev/Octopus-v2" |
|
tokenizer = AutoTokenizer.from_pretrained(model_id) |
|
model = GemmaForCausalLM.from_pretrained( |
|
model_id, torch_dtype=torch.bfloat16, device_map="cpu" |
|
) |
|
|
|
def inference(input_text): |
|
start_time = time.time() |
|
input_ids = tokenizer(input_text, return_tensors="pt").to(model.device) |
|
input_length = input_ids["input_ids"].shape[1] |
|
outputs = model.generate( |
|
input_ids=input_ids["input_ids"], |
|
max_length=1024, |
|
do_sample=False) |
|
generated_sequence = outputs[:, input_length:].tolist() |
|
res = tokenizer.decode(generated_sequence[0]) |
|
end_time = time.time() |
|
return {"output": res, "latency": f"{end_time - start_time:.2f} seconds"} |
|
|
|
def gradio_interface(input_text): |
|
nexa_query = f"Below is the query from the users, please call the correct function and generate the parameters to call the function.\n\nQuery: {input_text} \n\nResponse:" |
|
result = inference(nexa_query) |
|
return result["output"], result["latency"] |
|
|
|
iface = gr.Interface( |
|
fn=gradio_interface, |
|
inputs=gr.Code(lines=2, language="python", value="Add functions and your querry here..."), |
|
outputs=[gr.Textbox(label="Output"), gr.Textbox(label="Latency")], |
|
title=title, |
|
description=description, |
|
examples=EXAMPLES |
|
) |
|
|
|
if __name__ == "__main__": |
|
iface.launch() |