Spaces:

MohammedNasser
/

Arabic-PDF-Chat

Running

MohammedNasser commited on 4 days ago

Commit

6ebe94a

•

1 Parent(s): 89913e6

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -16,6 +16,7 @@ import pytesseract
 from pdf2image import convert_from_path
 from huggingface_hub import Repository, login
 from huggingface_hub import hf_hub_download
@@ -58,9 +59,14 @@ def load_pdf(file_path):
 def prepare_vectorstore(data):
     text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=20, separator="\n")
-    texts = text_splitter.split_documents(data)
-    vectorstore = FAISS.from_texts(texts, embeddings)
     return vectorstore
 def create_chain(vectorstore):

 from pdf2image import convert_from_path
 from huggingface_hub import Repository, login
 from huggingface_hub import hf_hub_download
+from langchain.schema import Document
 def prepare_vectorstore(data):
     text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=20, separator="\n")
+    # Create Document objects from the input data
+    documents = [Document(page_content=text) for text in data]
+    # Split the documents into chunks
+    chunks = text_splitter.split_documents(documents)
+    # Create the vector store
+    vectorstore = FAISS.from_documents(chunks, embeddings)
     return vectorstore
 def create_chain(vectorstore):