karmiq commited on
Commit
90dfdae
1 Parent(s): 9bdcf0d

Add application

Browse files
Files changed (3) hide show
  1. .gitignore +4 -0
  2. app.py +86 -0
  3. requirements.txt +7 -0
.gitignore ADDED
@@ -0,0 +1,4 @@
 
 
 
 
 
1
+ __pycache__/
2
+ tmp/
3
+
4
+ gradio_cached_examples/
app.py ADDED
@@ -0,0 +1,86 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from dataclasses import dataclass
2
+ from operator import add, sub
3
+
4
+ import gradio as gr
5
+
6
+ import numpy as np
7
+ from datasets import load_dataset
8
+ from sklearn.metrics.pairwise import cosine_similarity
9
+
10
+ from pyparsing import Word, alphas, Char, ParseException
11
+
12
+ term = Word(alphas)
13
+ operator = Char("+ -")
14
+
15
+ expression = term + (operator + term)[...]
16
+ operations = {"+": add, "-": sub}
17
+
18
+
19
+ def parse_expression(input):
20
+ try:
21
+ return expression.parseString(input)
22
+ except ParseException as pe:
23
+ raise gr.Error(f"Syntax error at {pe.loc}: {pe.msg}")
24
+
25
+
26
+ def evaluate_expression(input):
27
+ # Skip every other item
28
+ words = input[::2]
29
+ operators = input[1::2]
30
+
31
+ result = word_to_vectors(words[0])
32
+
33
+ for operator, word in zip(operators, words[1:]):
34
+ result = operations[operator](result, word_to_vectors(word))
35
+
36
+ return result
37
+
38
+
39
+ dataset = load_dataset("karmiq/glove", split="train")
40
+ df = dataset.to_pandas()
41
+
42
+ all_words = df["word"].to_numpy()
43
+ all_vectors = np.array(df["embeddings"].to_list())
44
+
45
+
46
+ def word_to_vectors(word):
47
+ return df.loc[df["word"] == word].embeddings.to_numpy()[0]
48
+
49
+
50
+ def expression_to_vectors(input):
51
+ return evaluate_expression(parse_expression(input))
52
+
53
+
54
+ def get_results(expression):
55
+ vectors = expression_to_vectors(expression)
56
+ similarity_scores = cosine_similarity([vectors], all_vectors)[0]
57
+ top_indices = np.argsort(similarity_scores)[::-1]
58
+ return dict(
59
+ [
60
+ (all_words[i], similarity_scores[i])
61
+ for i in top_indices
62
+ if not all_words[i] in expression.split()
63
+ ][:10]
64
+ )
65
+
66
+
67
+ examples = [
68
+ "king - man + woman",
69
+ "berlin - germany + france",
70
+ ]
71
+
72
+ with gr.Blocks() as app:
73
+ with gr.Row():
74
+ with gr.Column():
75
+ input = gr.Textbox(value=examples[0], label="Expression")
76
+ with gr.Row():
77
+ btn = gr.Button("Run")
78
+ with gr.Row():
79
+ gr.Examples(examples, inputs=input)
80
+
81
+ with gr.Column():
82
+ output = gr.Label(label="Closest words")
83
+
84
+ btn.click(fn=get_results, inputs=input, outputs=output)
85
+
86
+ app.launch()
requirements.txt ADDED
@@ -0,0 +1,7 @@
 
 
 
 
 
 
 
 
1
+ gradio
2
+
3
+ datasets
4
+ numpy
5
+ scikit-learn
6
+
7
+ pyparsing