karmiq's picture
Add application
90dfdae
raw
history blame
2.1 kB
from dataclasses import dataclass
from operator import add, sub
import gradio as gr
import numpy as np
from datasets import load_dataset
from sklearn.metrics.pairwise import cosine_similarity
from pyparsing import Word, alphas, Char, ParseException
term = Word(alphas)
operator = Char("+ -")
expression = term + (operator + term)[...]
operations = {"+": add, "-": sub}
def parse_expression(input):
try:
return expression.parseString(input)
except ParseException as pe:
raise gr.Error(f"Syntax error at {pe.loc}: {pe.msg}")
def evaluate_expression(input):
# Skip every other item
words = input[::2]
operators = input[1::2]
result = word_to_vectors(words[0])
for operator, word in zip(operators, words[1:]):
result = operations[operator](result, word_to_vectors(word))
return result
dataset = load_dataset("karmiq/glove", split="train")
df = dataset.to_pandas()
all_words = df["word"].to_numpy()
all_vectors = np.array(df["embeddings"].to_list())
def word_to_vectors(word):
return df.loc[df["word"] == word].embeddings.to_numpy()[0]
def expression_to_vectors(input):
return evaluate_expression(parse_expression(input))
def get_results(expression):
vectors = expression_to_vectors(expression)
similarity_scores = cosine_similarity([vectors], all_vectors)[0]
top_indices = np.argsort(similarity_scores)[::-1]
return dict(
[
(all_words[i], similarity_scores[i])
for i in top_indices
if not all_words[i] in expression.split()
][:10]
)
examples = [
"king - man + woman",
"berlin - germany + france",
]
with gr.Blocks() as app:
with gr.Row():
with gr.Column():
input = gr.Textbox(value=examples[0], label="Expression")
with gr.Row():
btn = gr.Button("Run")
with gr.Row():
gr.Examples(examples, inputs=input)
with gr.Column():
output = gr.Label(label="Closest words")
btn.click(fn=get_results, inputs=input, outputs=output)
app.launch()