MohammedNasser commited on
Commit
6ebe94a
1 Parent(s): 89913e6

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -2
app.py CHANGED
@@ -16,6 +16,7 @@ import pytesseract
16
  from pdf2image import convert_from_path
17
  from huggingface_hub import Repository, login
18
  from huggingface_hub import hf_hub_download
 
19
 
20
 
21
 
@@ -58,9 +59,14 @@ def load_pdf(file_path):
58
 
59
  def prepare_vectorstore(data):
60
  text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=20, separator="\n")
61
- texts = text_splitter.split_documents(data)
62
- vectorstore = FAISS.from_texts(texts, embeddings)
63
 
 
 
 
 
 
64
  return vectorstore
65
 
66
  def create_chain(vectorstore):
 
16
  from pdf2image import convert_from_path
17
  from huggingface_hub import Repository, login
18
  from huggingface_hub import hf_hub_download
19
+ from langchain.schema import Document
20
 
21
 
22
 
 
59
 
60
  def prepare_vectorstore(data):
61
  text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=20, separator="\n")
62
+ # Create Document objects from the input data
63
+ documents = [Document(page_content=text) for text in data]
64
 
65
+ # Split the documents into chunks
66
+ chunks = text_splitter.split_documents(documents)
67
+
68
+ # Create the vector store
69
+ vectorstore = FAISS.from_documents(chunks, embeddings)
70
  return vectorstore
71
 
72
  def create_chain(vectorstore):