litagin commited on
Commit
c04c46a
β€’
1 Parent(s): 5f871bf

Add FF and refactor a little

Browse files
Files changed (4) hide show
  1. app.py +19 -15
  2. requirements.txt +0 -1
  3. weights/FF/FF.index +3 -0
  4. weights/FF/FF_e300.pth +3 -0
app.py CHANGED
@@ -31,18 +31,15 @@ limitation = os.getenv("SYSTEM") == "spaces"
31
 
32
  config = Config()
33
 
 
34
  edge_output_filename = "edge_output.mp3"
35
  tts_voice_list = asyncio.get_event_loop().run_until_complete(edge_tts.list_voices())
36
  tts_voices = [f"{v['ShortName']}-{v['Gender']}" for v in tts_voice_list]
37
 
 
38
  model_root = "weights"
39
  models = [d for d in os.listdir(model_root) if os.path.isdir(f"{model_root}/{d}")]
40
  models.sort()
41
- hubert_model = None
42
-
43
- print("Loading rmvpe model...")
44
- rmvpe_model = RMVPE("rmvpe.pt", config.is_half, config.device)
45
- print("rmvpe model loaded.")
46
 
47
 
48
  def model_data(model_name):
@@ -97,7 +94,7 @@ def model_data(model_name):
97
 
98
 
99
  def load_hubert():
100
- global hubert_model
101
  models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
102
  ["hubert_base.pt"],
103
  suffix="",
@@ -108,7 +105,7 @@ def load_hubert():
108
  hubert_model = hubert_model.half()
109
  else:
110
  hubert_model = hubert_model.float()
111
- hubert_model.eval()
112
 
113
 
114
  def tts(
@@ -128,7 +125,7 @@ def tts(
128
  print(datetime.datetime.now())
129
  print("tts_text:")
130
  print(tts_text)
131
- print(f"tts_voice: {tts_voice}")
132
  print(f"Model name: {model_name}")
133
  print(f"F0: {f0_method}, Key: {f0_up_key}, Index: {index_rate}, Protect: {protect}")
134
  try:
@@ -139,7 +136,6 @@ def tts(
139
  None,
140
  None,
141
  )
142
- tgt_sr, net_g, vc, version, index_file, if_f0 = model_data(model_name)
143
  t0 = time.time()
144
  if speed >= 0:
145
  speed_str = f"+{speed}%"
@@ -162,11 +158,9 @@ def tts(
162
  edge_output_filename,
163
  None,
164
  )
165
-
166
  f0_up_key = int(f0_up_key)
167
 
168
- if not hubert_model:
169
- load_hubert()
170
  if f0_method == "rmvpe":
171
  vc.model_rmvpe = rmvpe_model
172
  times = [0, 0, 0]
@@ -201,9 +195,11 @@ def tts(
201
  (tgt_sr, audio_opt),
202
  )
203
  except EOFError:
204
- info = """
205
- It seems that edge-tts output is empty. This may occur when the input text and the speaker do not match.
206
- For example, maybe you entered Japanese (without alphabets) text but chose non-Japanese speaker?"""
 
 
207
  print(info)
208
  return info, None, None
209
  except:
@@ -212,6 +208,14 @@ For example, maybe you entered Japanese (without alphabets) text but chose non-J
212
  return info, None, None
213
 
214
 
 
 
 
 
 
 
 
 
215
  initial_md = """
216
  # RVC text-to-speech demo
217
 
 
31
 
32
  config = Config()
33
 
34
+ # Edge TTS
35
  edge_output_filename = "edge_output.mp3"
36
  tts_voice_list = asyncio.get_event_loop().run_until_complete(edge_tts.list_voices())
37
  tts_voices = [f"{v['ShortName']}-{v['Gender']}" for v in tts_voice_list]
38
 
39
+ # RVC models
40
  model_root = "weights"
41
  models = [d for d in os.listdir(model_root) if os.path.isdir(f"{model_root}/{d}")]
42
  models.sort()
 
 
 
 
 
43
 
44
 
45
  def model_data(model_name):
 
94
 
95
 
96
  def load_hubert():
97
+ # global hubert_model
98
  models, _, _ = checkpoint_utils.load_model_ensemble_and_task(
99
  ["hubert_base.pt"],
100
  suffix="",
 
105
  hubert_model = hubert_model.half()
106
  else:
107
  hubert_model = hubert_model.float()
108
+ return hubert_model.eval()
109
 
110
 
111
  def tts(
 
125
  print(datetime.datetime.now())
126
  print("tts_text:")
127
  print(tts_text)
128
+ print(f"tts_voice: {tts_voice}, speed: {speed}")
129
  print(f"Model name: {model_name}")
130
  print(f"F0: {f0_method}, Key: {f0_up_key}, Index: {index_rate}, Protect: {protect}")
131
  try:
 
136
  None,
137
  None,
138
  )
 
139
  t0 = time.time()
140
  if speed >= 0:
141
  speed_str = f"+{speed}%"
 
158
  edge_output_filename,
159
  None,
160
  )
 
161
  f0_up_key = int(f0_up_key)
162
 
163
+ tgt_sr, net_g, vc, version, index_file, if_f0 = model_data(model_name)
 
164
  if f0_method == "rmvpe":
165
  vc.model_rmvpe = rmvpe_model
166
  times = [0, 0, 0]
 
195
  (tgt_sr, audio_opt),
196
  )
197
  except EOFError:
198
+ info = (
199
+ "It seems that the edge-tts output is not valid. "
200
+ "This may occur when the input text and the speaker do not match. "
201
+ "For example, maybe you entered Japanese (without alphabets) text but chose non-Japanese speaker?"
202
+ )
203
  print(info)
204
  return info, None, None
205
  except:
 
208
  return info, None, None
209
 
210
 
211
+ print("Loading hubert model...")
212
+ hubert_model = load_hubert()
213
+ print("Hubert model loaded.")
214
+
215
+ print("Loading rmvpe model...")
216
+ rmvpe_model = RMVPE("rmvpe.pt", config.is_half, config.device)
217
+ print("rmvpe model loaded.")
218
+
219
  initial_md = """
220
  # RVC text-to-speech demo
221
 
requirements.txt CHANGED
@@ -1,4 +1,3 @@
1
- # Cython==0.29.34
2
  edge_tts==6.1.7
3
  fairseq==0.12.2
4
  faiss_cpu==1.7.4
 
 
1
  edge_tts==6.1.7
2
  fairseq==0.12.2
3
  faiss_cpu==1.7.4
weights/FF/FF.index ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:586dd540bc384163e2107df8b48c2a1d21cc1e89b5eef1c050d0dc12544ebd24
3
+ size 508489659
weights/FF/FF_e300.pth ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3f1f037b3c249418806317a14dd12d5fcabef908a52bc2f1ba2c83ca34569d49
3
+ size 55232064