Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -15,11 +15,26 @@ client = chromadb.PersistentClient(path="/data/chroma_db")
|
|
15 |
collection = client.get_or_create_collection(name="knowledge_base")
|
16 |
pdf_file="Sutures and Suturing techniques.pdf"
|
17 |
pptx_file="impalnt 1.pptx"
|
18 |
-
|
19 |
collections = client.list_collections()
|
20 |
|
21 |
print("Existing Collections:", [c.name for c in collections])
|
22 |
collection = client.get_collection(name="knowledge_base")
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
23 |
|
24 |
# Initialize models
|
25 |
text_model = SentenceTransformer('all-MiniLM-L6-v2')
|
@@ -30,7 +45,7 @@ clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
|
|
30 |
IMAGE_FOLDER = "/data/extracted_images"
|
31 |
os.makedirs(IMAGE_FOLDER, exist_ok=True)
|
32 |
|
33 |
-
|
34 |
@app.get("/")
|
35 |
def greet_json():
|
36 |
|
@@ -134,18 +149,3 @@ def store_data(texts, image_paths):
|
|
134 |
print("Data stored successfully!")
|
135 |
|
136 |
|
137 |
-
### Step 8: Process and Store from Files ###
|
138 |
-
def process_and_store(pdf_path=None, pptx_path=None):
|
139 |
-
texts, images = [], []
|
140 |
-
|
141 |
-
if pdf_path:
|
142 |
-
print(f"Processing PDF: {pdf_path}")
|
143 |
-
texts.append(extract_text_from_pdf(pdf_path))
|
144 |
-
images.extend(extract_images_from_pdf(pdf_path))
|
145 |
-
|
146 |
-
if pptx_path:
|
147 |
-
print(f"Processing PPTX: {pptx_path}")
|
148 |
-
texts.append(extract_text_from_pptx(pptx_path))
|
149 |
-
images.extend(extract_images_from_pptx(pptx_path))
|
150 |
-
|
151 |
-
store_data(texts, images)
|
|
|
15 |
collection = client.get_or_create_collection(name="knowledge_base")
|
16 |
pdf_file="Sutures and Suturing techniques.pdf"
|
17 |
pptx_file="impalnt 1.pptx"
|
18 |
+
|
19 |
collections = client.list_collections()
|
20 |
|
21 |
print("Existing Collections:", [c.name for c in collections])
|
22 |
collection = client.get_collection(name="knowledge_base")
|
23 |
+
### Step 8: Process and Store from Files ###
|
24 |
+
def process_and_store(pdf_path=None, pptx_path=None):
|
25 |
+
texts, images = [], []
|
26 |
+
|
27 |
+
if pdf_path:
|
28 |
+
print(f"Processing PDF: {pdf_path}")
|
29 |
+
texts.append(extract_text_from_pdf(pdf_path))
|
30 |
+
images.extend(extract_images_from_pdf(pdf_path))
|
31 |
+
|
32 |
+
if pptx_path:
|
33 |
+
print(f"Processing PPTX: {pptx_path}")
|
34 |
+
texts.append(extract_text_from_pptx(pptx_path))
|
35 |
+
images.extend(extract_images_from_pptx(pptx_path))
|
36 |
+
|
37 |
+
store_data(texts, images)
|
38 |
|
39 |
# Initialize models
|
40 |
text_model = SentenceTransformer('all-MiniLM-L6-v2')
|
|
|
45 |
IMAGE_FOLDER = "/data/extracted_images"
|
46 |
os.makedirs(IMAGE_FOLDER, exist_ok=True)
|
47 |
|
48 |
+
process_and_store(pdf_path=pdf_file, pptx_path=pptx_file)
|
49 |
@app.get("/")
|
50 |
def greet_json():
|
51 |
|
|
|
149 |
print("Data stored successfully!")
|
150 |
|
151 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|