davanstrien HF staff commited on
Commit
6a4b44c
1 Parent(s): cb13c5d

update embedding model

Browse files
Files changed (1) hide show
  1. load_viewer_data.py +4 -4
load_viewer_data.py CHANGED
@@ -17,10 +17,10 @@ from utils import get_chroma_client
17
  logger = logging.getLogger(__name__)
18
  logger.setLevel(logging.INFO)
19
 
20
- EMBEDDING_MODEL_NAME = "davanstrien/dataset-viewer-descriptions-processed-st"
21
- EMBEDDING_MODEL_REVISION = "d09abf1227ac41c6955eb9dd53c21771b0984ade"
22
  INFERENCE_MODEL_URL = (
23
- "https://bm143rfir2on1bkw.us-east-1.aws.endpoints.huggingface.cloud"
24
  )
25
 
26
 
@@ -61,7 +61,7 @@ def embed_and_upsert_datasets(
61
  dataset_rows_and_ids: list[dict[str, str]],
62
  collection: chromadb.Collection,
63
  inference_client: InferenceClient,
64
- batch_size: int = 10,
65
  ):
66
  logger.info(
67
  f"Embedding and upserting {len(dataset_rows_and_ids)} datasets for viewer data"
 
17
  logger = logging.getLogger(__name__)
18
  logger.setLevel(logging.INFO)
19
 
20
+ EMBEDDING_MODEL_NAME = "davanstrien/query-to-dataset-viewer-descriptions"
21
+ EMBEDDING_MODEL_REVISION = "07c71d97861a73695f0c53cd6b4b32980007d908"
22
  INFERENCE_MODEL_URL = (
23
+ "https://ecg0by60w2vo9j8h.us-east-1.aws.endpoints.huggingface.cloud"
24
  )
25
 
26
 
 
61
  dataset_rows_and_ids: list[dict[str, str]],
62
  collection: chromadb.Collection,
63
  inference_client: InferenceClient,
64
+ batch_size: int = 100,
65
  ):
66
  logger.info(
67
  f"Embedding and upserting {len(dataset_rows_and_ids)} datasets for viewer data"