Redmind commited on
Commit
6e9858c
·
verified ·
1 Parent(s): 16af574

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +17 -17
app.py CHANGED
@@ -15,11 +15,26 @@ client = chromadb.PersistentClient(path="/data/chroma_db")
15
  collection = client.get_or_create_collection(name="knowledge_base")
16
  pdf_file="Sutures and Suturing techniques.pdf"
17
  pptx_file="impalnt 1.pptx"
18
- process_and_store(pdf_path=pdf_file, pptx_path=pptx_file)
19
  collections = client.list_collections()
20
 
21
  print("Existing Collections:", [c.name for c in collections])
22
  collection = client.get_collection(name="knowledge_base")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
23
 
24
  # Initialize models
25
  text_model = SentenceTransformer('all-MiniLM-L6-v2')
@@ -30,7 +45,7 @@ clip_processor = CLIPProcessor.from_pretrained("openai/clip-vit-base-patch32")
30
  IMAGE_FOLDER = "/data/extracted_images"
31
  os.makedirs(IMAGE_FOLDER, exist_ok=True)
32
 
33
-
34
  @app.get("/")
35
  def greet_json():
36
 
@@ -134,18 +149,3 @@ def store_data(texts, image_paths):
134
  print("Data stored successfully!")
135
 
136
 
137
- ### Step 8: Process and Store from Files ###
138
- def process_and_store(pdf_path=None, pptx_path=None):
139
- texts, images = [], []
140
-
141
- if pdf_path:
142
- print(f"Processing PDF: {pdf_path}")
143
- texts.append(extract_text_from_pdf(pdf_path))
144
- images.extend(extract_images_from_pdf(pdf_path))
145
-
146
- if pptx_path:
147
- print(f"Processing PPTX: {pptx_path}")
148
- texts.append(extract_text_from_pptx(pptx_path))
149
- images.extend(extract_images_from_pptx(pptx_path))
150
-
151
- store_data(texts, images)
 
15
  collection = client.get_or_create_collection(name="knowledge_base")
16
  pdf_file="Sutures and Suturing techniques.pdf"
17
  pptx_file="impalnt 1.pptx"
18
+
19
  collections = client.list_collections()
20
 
21
  print("Existing Collections:", [c.name for c in collections])
22
  collection = client.get_collection(name="knowledge_base")
23
+ ### Step 8: Process and Store from Files ###
24
+ def process_and_store(pdf_path=None, pptx_path=None):
25
+ texts, images = [], []
26
+
27
+ if pdf_path:
28
+ print(f"Processing PDF: {pdf_path}")
29
+ texts.append(extract_text_from_pdf(pdf_path))
30
+ images.extend(extract_images_from_pdf(pdf_path))
31
+
32
+ if pptx_path:
33
+ print(f"Processing PPTX: {pptx_path}")
34
+ texts.append(extract_text_from_pptx(pptx_path))
35
+ images.extend(extract_images_from_pptx(pptx_path))
36
+
37
+ store_data(texts, images)
38
 
39
  # Initialize models
40
  text_model = SentenceTransformer('all-MiniLM-L6-v2')
 
45
  IMAGE_FOLDER = "/data/extracted_images"
46
  os.makedirs(IMAGE_FOLDER, exist_ok=True)
47
 
48
+ process_and_store(pdf_path=pdf_file, pptx_path=pptx_file)
49
  @app.get("/")
50
  def greet_json():
51
 
 
149
  print("Data stored successfully!")
150
 
151