text-to-speech / db /create_db.py
Daryl Fung
added imgur bucket
932db78
raw
history blame
No virus
1.85 kB
import os
from pymilvus import (
utility,
FieldSchema,
CollectionSchema,
DataType,
Collection,
)
from db_connect import connect
connect()
#region creating collections
### Create collections ###
fields = [
FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=65535),
FieldSchema(name="embeddings", dtype=DataType.FLOAT_VECTOR, dim=384)
]
schema = CollectionSchema(fields, "Texts to generate audio for. "
"This collection cache the texts needed to generate audio. "
"We can then do offline generation for the audio file.")
utility.drop_collection("Response")
response_collection = Collection("Response", schema)
index_params = {
"metric_type": "COSINE",
"index_type": "IVF_FLAT",
"params": {"nlist": 1024}
}
response_collection.create_index(field_name='embeddings', index_params=index_params)
utility.index_building_progress("Response")
fields = [
FieldSchema(name="id", dtype=DataType.INT64, is_primary=True, auto_id=True),
FieldSchema(name="text", dtype=DataType.VARCHAR, max_length=65535),
FieldSchema(name="filename", dtype=DataType.VARCHAR, max_length=65535),
FieldSchema(name="embeddings", dtype=DataType.FLOAT_VECTOR, dim=384)
]
audio_schema = CollectionSchema(fields, "The text that corresponds to the audio file.")
utility.drop_collection("AudioResponse")
audio_response_collection = Collection("AudioResponse", audio_schema)
audio_response_collection = Collection("AudioResponse")
index_params = {
"metric_type": "COSINE",
"index_type": "IVF_FLAT",
"params": {"nlist": 1024}
}
audio_response_collection.create_index(field_name='embeddings', index_params=index_params)
utility.index_building_progress("AudioResponse")
#endregion