Yurii Paniv commited on
Commit
cfb23ad
1 Parent(s): 9430ecf
Files changed (2) hide show
  1. app.py +8 -10
  2. requirements.txt +1 -1
app.py CHANGED
@@ -7,11 +7,11 @@ from pydub.audio_segment import AudioSegment
7
  import requests
8
  from os.path import exists
9
  from stt import Model
10
-
11
 
12
  MODEL_NAMES = [
13
- "With scorer",
14
- "No scorer"
15
  ]
16
 
17
  # download model
@@ -52,12 +52,11 @@ def download(url, file_name):
52
 
53
  def stt(audio: Tuple[int, np.array], model_name: str):
54
  sample_rate, audio = audio
 
55
  use_scorer = True if model_name == "With scorer" else False
56
 
57
- if sample_rate != 16000:
58
- raise ValueError("Incorrect sample rate.")
59
-
60
  recognized_result = client(audio, sample_rate, use_scorer)
 
61
 
62
  return recognized_result
63
 
@@ -67,14 +66,13 @@ def _convert_audio(audio_data: np.array, sample_rate: int):
67
  source_audio.write(audio_data)
68
  source_audio.seek(0)
69
  output_audio = BytesIO()
70
- wav_file = AudioSegment.from_raw(
71
  source_audio,
72
  channels=1,
73
- sample_width=2,
74
  frame_rate=sample_rate
75
  )
76
- wav_file = wav_file.set_frame_rate(16000).set_channels(1)
77
- wav_file.export(output_audio, "wav", codec="pcm_s16le")
78
  output_audio.seek(0)
79
  return output_audio
80
 
 
7
  import requests
8
  from os.path import exists
9
  from stt import Model
10
+ from datetime import datetime
11
 
12
  MODEL_NAMES = [
13
+ "No scorer",
14
+ "With scorer"
15
  ]
16
 
17
  # download model
 
52
 
53
  def stt(audio: Tuple[int, np.array], model_name: str):
54
  sample_rate, audio = audio
55
+ print(f"Input sample rate: {sample_rate}. Audio file length: {round(audio.shape[0]/sample_rate ,2)}")
56
  use_scorer = True if model_name == "With scorer" else False
57
 
 
 
 
58
  recognized_result = client(audio, sample_rate, use_scorer)
59
+ print(f"Time: {datetime.utcnow()}. Transcript: `{recognized_result}`. Scorer: {use_scorer}.")
60
 
61
  return recognized_result
62
 
 
66
  source_audio.write(audio_data)
67
  source_audio.seek(0)
68
  output_audio = BytesIO()
69
+ wav_file: AudioSegment = AudioSegment.from_raw(
70
  source_audio,
71
  channels=1,
72
+ sample_width=4,
73
  frame_rate=sample_rate
74
  )
75
+ wav_file.export(output_audio, "wav", codec="pcm_s16le", parameters=["-ar", "16k"])
 
76
  output_audio.seek(0)
77
  return output_audio
78
 
requirements.txt CHANGED
@@ -1,3 +1,3 @@
1
  gradio==2.4.5
2
- STT==1.0.0
3
  pydub==0.25.1
 
1
  gradio==2.4.5
2
+ STT==1.3.0
3
  pydub==0.25.1