Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -5,7 +5,7 @@ import fitz
|
|
5 |
from dotenv import load_dotenv
|
6 |
from langchain_community.document_loaders import UnstructuredPDFLoader
|
7 |
from langchain_community.vectorstores import FAISS
|
8 |
-
from
|
9 |
from langchain_text_splitters import CharacterTextSplitter
|
10 |
from langchain_groq import ChatGroq
|
11 |
from langchain.memory import ConversationBufferMemory
|
@@ -15,52 +15,15 @@ import sys
|
|
15 |
import pytesseract
|
16 |
from pdf2image import convert_from_path
|
17 |
|
18 |
-
def check_installation(command):
|
19 |
-
try:
|
20 |
-
result = subprocess.run([command, '--version'], capture_output=True, text=True)
|
21 |
-
return result.returncode == 0, result.stdout
|
22 |
-
except FileNotFoundError:
|
23 |
-
return False, f"{command} not found"
|
24 |
-
|
25 |
-
def check_dependencies():
|
26 |
-
dependencies = {
|
27 |
-
'tesseract': '/usr/bin/tesseract',
|
28 |
-
'pdftoppm': '/usr/bin/pdftoppm', # Part of poppler-utils
|
29 |
-
}
|
30 |
-
|
31 |
-
status = {}
|
32 |
-
for dep, path in dependencies.items():
|
33 |
-
installed, version = check_installation(path)
|
34 |
-
status[dep] = {
|
35 |
-
'installed': installed,
|
36 |
-
'path': path,
|
37 |
-
'version': version if installed else 'Not found'
|
38 |
-
}
|
39 |
-
|
40 |
-
return status
|
41 |
-
|
42 |
-
def log_dependency_status(status):
|
43 |
-
print("Dependency Status:")
|
44 |
-
for dep, info in status.items():
|
45 |
-
print(f"{dep}:")
|
46 |
-
print(f" Installed: {info['installed']}")
|
47 |
-
print(f" Path: {info['path']}")
|
48 |
-
print(f" Version: {info['version']}")
|
49 |
-
print("\nEnvironment Variables:")
|
50 |
-
for key, value in os.environ.items():
|
51 |
-
if 'PATH' in key or 'PYTHONPATH' in key:
|
52 |
-
print(f"{key}: {value}")
|
53 |
-
|
54 |
-
# Run dependency check
|
55 |
-
dependency_status = check_dependencies()
|
56 |
-
log_dependency_status(dependency_status)
|
57 |
|
58 |
# Load environment variables
|
59 |
load_dotenv()
|
60 |
secret_key = os.getenv("GROQ_API_KEY")
|
61 |
|
62 |
os.environ["GROQ_API_KEY"] = secret_key
|
|
|
63 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2")
|
|
|
64 |
# Ensure the necessary folders exist
|
65 |
UPLOAD_FOLDER = 'uploads/'
|
66 |
AUDIO_FOLDER = 'audio/'
|
@@ -83,6 +46,7 @@ def prepare_vectorstore(data):
|
|
83 |
texts = data
|
84 |
vectorstore = FAISS.from_texts(texts, embeddings)
|
85 |
vectorstore.save_local("faiss_index")
|
|
|
86 |
return vectorstore
|
87 |
|
88 |
def load_vectorstore():
|
|
|
5 |
from dotenv import load_dotenv
|
6 |
from langchain_community.document_loaders import UnstructuredPDFLoader
|
7 |
from langchain_community.vectorstores import FAISS
|
8 |
+
from langchain_huggingface import HuggingFaceEmbeddings
|
9 |
from langchain_text_splitters import CharacterTextSplitter
|
10 |
from langchain_groq import ChatGroq
|
11 |
from langchain.memory import ConversationBufferMemory
|
|
|
15 |
import pytesseract
|
16 |
from pdf2image import convert_from_path
|
17 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
18 |
|
19 |
# Load environment variables
|
20 |
load_dotenv()
|
21 |
secret_key = os.getenv("GROQ_API_KEY")
|
22 |
|
23 |
os.environ["GROQ_API_KEY"] = secret_key
|
24 |
+
|
25 |
embeddings = HuggingFaceEmbeddings(model_name="sentence-transformers/paraphrase-multilingual-mpnet-base-v2")
|
26 |
+
|
27 |
# Ensure the necessary folders exist
|
28 |
UPLOAD_FOLDER = 'uploads/'
|
29 |
AUDIO_FOLDER = 'audio/'
|
|
|
46 |
texts = data
|
47 |
vectorstore = FAISS.from_texts(texts, embeddings)
|
48 |
vectorstore.save_local("faiss_index")
|
49 |
+
|
50 |
return vectorstore
|
51 |
|
52 |
def load_vectorstore():
|