versae commited on
Commit
8c0b8ef
1 Parent(s): 3fef081

Update eval.py

Browse files
Files changed (1) hide show
  1. eval.py +4 -5
eval.py CHANGED
@@ -130,11 +130,10 @@ def normalize_text(original_text: str, dataset: str) -> str:
130
  text = re.sub('[úùüû]', 'u', text)
131
  text = re.sub('[«»]', '', text)
132
  text = re.sub('\s+', ' ', text)
133
- text = re.sub('<e+h?>', 'eee', text)
134
- text = re.sub('<m+>', 'mmm', text)
135
- text = re.sub('<q+>', 'qqq', text)
136
  text = re.sub('<inaudible>', 'xxx', text)
137
- text = re.sub('[<>]', '', text)
138
 
139
  # # In addition, we can normalize the target text, e.g. removing new lines characters etc...
140
  # # note that order is important here!
@@ -143,7 +142,7 @@ def normalize_text(original_text: str, dataset: str) -> str:
143
  # for t in token_sequences_to_ignore:
144
  # text = " ".join(text.split(t))
145
 
146
- return text.strip()
147
 
148
 
149
  def main(args):
 
130
  text = re.sub('[úùüû]', 'u', text)
131
  text = re.sub('[«»]', '', text)
132
  text = re.sub('\s+', ' ', text)
133
+ text = re.sub('<ee>', 'eee', text)
134
+ text = re.sub('<qq>', 'qqq', text)
135
+ text = re.sub('<mm>', 'mmm', text)
136
  text = re.sub('<inaudible>', 'xxx', text)
 
137
 
138
  # # In addition, we can normalize the target text, e.g. removing new lines characters etc...
139
  # # note that order is important here!
 
142
  # for t in token_sequences_to_ignore:
143
  # text = " ".join(text.split(t))
144
 
145
+ return text
146
 
147
 
148
  def main(args):