BerserkerMother commited on
Commit
24c3ed3
1 Parent(s): 27c530a

Adds parser package

Browse files

This package is responsible for parsing the user prompts into jobs that
are carried by mather to find the best matches.

elise/src/parser/__init__.py ADDED
@@ -0,0 +1 @@
 
 
1
+ from .parser import SentenceParser
elise/src/parser/parser.py ADDED
@@ -0,0 +1,60 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ from typing import List, Union
2
+
3
+ import transformers
4
+
5
+
6
+ class SentenceParser:
7
+ mappings = {
8
+ "Amenity": "Semantic",
9
+ "Price": "Price",
10
+ "Hours": "Hours",
11
+ "Dish": "Menu",
12
+ "Restaurant_Name": "Name",
13
+ "Location": "Location",
14
+ "Cuisine": "Semantic",
15
+ }
16
+
17
+ def __init__(self, pipe):
18
+ self.pipe = pipe
19
+
20
+ @classmethod
21
+ def from_huggingface(cls, model_path: str):
22
+ token_classifier = transformers.pipeline(
23
+ "token-classification",
24
+ model=model_path,
25
+ aggregation_strategy="simple",
26
+ )
27
+ return cls(token_classifier)
28
+
29
+ def get_ner(self, sentences: Union[str, List[str]]):
30
+ """Identifies the user intents from prompts
31
+ returns: list of dictionaries{category: words}
32
+ """
33
+ if isinstance(sentences, str):
34
+ sentences = [sentences]
35
+ # format output
36
+ list_out = []
37
+ for items in self.pipe(sentences):
38
+ sentence_ner = {}
39
+ for recognized_token in items:
40
+ if recognized_token["entity_group"] in list(sentence_ner.keys()):
41
+ sentence_ner[recognized_token["entity_group"]].append(
42
+ recognized_token["word"]
43
+ )
44
+ else:
45
+ sentence_ner[recognized_token["entity_group"]] = [
46
+ recognized_token["word"]
47
+ ]
48
+ list_out.append(sentence_ner)
49
+ return list_out
50
+
51
+ def get_jobs(self, ners):
52
+ """Maps user's intents to Matcher jobs"""
53
+ list_out = []
54
+ for item in ners:
55
+ jobs = {}
56
+ for ner, value in item.items():
57
+ if ner in self.mappings.keys():
58
+ jobs[self.mappings[ner]] = value
59
+ list_out.append(jobs)
60
+ return list_out