MohammedNasser commited on
Commit
7b54e65
1 Parent(s): 631b794

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +4 -40
app.py CHANGED
@@ -5,7 +5,7 @@ import fitz
5
  from dotenv import load_dotenv
6
  from langchain_community.document_loaders import UnstructuredPDFLoader
7
  from langchain_community.vectorstores import FAISS
8
- from langchain_community.embeddings import HuggingFaceEmbeddings
9
  from langchain_text_splitters import CharacterTextSplitter
10
  from langchain_groq import ChatGroq
11
  from langchain.memory import ConversationBufferMemory
@@ -15,52 +15,15 @@ import sys
15
  import pytesseract
16
  from pdf2image import convert_from_path
17
 
18
- def check_installation(command):
19
- try:
20
- result = subprocess.run([command, '--version'], capture_output=True, text=True)
21
- return result.returncode == 0, result.stdout
22
- except FileNotFoundError:
23
- return False, f"{command} not found"
24
-
25
- def check_dependencies():
26
- dependencies = {
27
- 'tesseract': '/usr/bin/tesseract',
28
- 'pdftoppm': '/usr/bin/pdftoppm', # Part of poppler-utils
29
- }
30
-
31
- status = {}
32
- for dep, path in dependencies.items():
33
- installed, version = check_installation(path)
34
- status[dep] = {
35
- 'installed': installed,
36
- 'path': path,
37
- 'version': version if installed else 'Not found'
38
- }
39
-
40
- return status
41
-
42
- def log_dependency_status(status):
43
- print("Dependency Status:")
44
- for dep, info in status.items():
45
- print(f"{dep}:")
46
- print(f" Installed: {info['installed']}")
47
- print(f" Path: {info['path']}")
48
- print(f" Version: {info['version']}")
49
- print("\nEnvironment Variables:")
50
- for key, value in os.environ.items():
51
- if 'PATH' in key or 'PYTHONPATH' in key:
52
- print(f"{key}: {value}")
53
-
54
- # Run dependency check
55
- dependency_status = check_dependencies()
56
- log_dependency_status(dependency_status)
57
 
58
  # Load environment variables
59
  load_dotenv()
60
  secret_key = os.getenv("GROQ_API_KEY")
61
 
62
  os.environ["GROQ_API_KEY"] = secret_key
 
63
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2")
 
64
  # Ensure the necessary folders exist
65
  UPLOAD_FOLDER = 'uploads/'
66
  AUDIO_FOLDER = 'audio/'
@@ -83,6 +46,7 @@ def prepare_vectorstore(data):
83
  texts = data
84
  vectorstore = FAISS.from_texts(texts, embeddings)
85
  vectorstore.save_local("faiss_index")
 
86
  return vectorstore
87
 
88
  def load_vectorstore():
 
5
  from dotenv import load_dotenv
6
  from langchain_community.document_loaders import UnstructuredPDFLoader
7
  from langchain_community.vectorstores import FAISS
8
+ from langchain_huggingface import HuggingFaceEmbeddings
9
  from langchain_text_splitters import CharacterTextSplitter
10
  from langchain_groq import ChatGroq
11
  from langchain.memory import ConversationBufferMemory
 
15
  import pytesseract
16
  from pdf2image import convert_from_path
17
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
18
 
19
  # Load environment variables
20
  load_dotenv()
21
  secret_key = os.getenv("GROQ_API_KEY")
22
 
23
  os.environ["GROQ_API_KEY"] = secret_key
24
+
25
  embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2")
26
+
27
  # Ensure the necessary folders exist
28
  UPLOAD_FOLDER = 'uploads/'
29
  AUDIO_FOLDER = 'audio/'
 
46
  texts = data
47
  vectorstore = FAISS.from_texts(texts, embeddings)
48
  vectorstore.save_local("faiss_index")
49
+
50
  return vectorstore
51
 
52
  def load_vectorstore():