Update app.py
Browse files
app.py
CHANGED
@@ -330,6 +330,18 @@ def get_model_files(model_type="causal_lm"):
|
|
330 |
def get_gallery_files(file_types=["png"]):
|
331 |
return sorted([f for ext in file_types for f in glob.glob(f"*.{ext}")])
|
332 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
333 |
# Mock Search Tool for RAG
|
334 |
def mock_search(query: str) -> str:
|
335 |
if "superhero" in query.lower():
|
@@ -651,7 +663,7 @@ with tab4:
|
|
651 |
csv_path = f"sft_dataset_{int(time.time())}.csv"
|
652 |
with open(csv_path, "w", newline="") as f:
|
653 |
writer = csv.writer(f)
|
654 |
-
writer.writerow(["image", "text
|
655 |
for _, row in edited_data.iterrows():
|
656 |
writer.writerow([row["image"], row["text"]])
|
657 |
st.markdown(get_download_link(csv_path, "text/csv", "Download SFT Dataset CSV"), unsafe_allow_html=True)
|
|
|
330 |
def get_gallery_files(file_types=["png"]):
|
331 |
return sorted([f for ext in file_types for f in glob.glob(f"*.{ext}")])
|
332 |
|
333 |
+
def download_pdf(url, output_path):
|
334 |
+
try:
|
335 |
+
response = requests.get(url, stream=True, timeout=10)
|
336 |
+
if response.status_code == 200:
|
337 |
+
with open(output_path, "wb") as f:
|
338 |
+
for chunk in response.iter_content(chunk_size=8192):
|
339 |
+
f.write(chunk)
|
340 |
+
return True
|
341 |
+
except requests.RequestException as e:
|
342 |
+
logger.error(f"Failed to download {url}: {e}")
|
343 |
+
return False
|
344 |
+
|
345 |
# Mock Search Tool for RAG
|
346 |
def mock_search(query: str) -> str:
|
347 |
if "superhero" in query.lower():
|
|
|
663 |
csv_path = f"sft_dataset_{int(time.time())}.csv"
|
664 |
with open(csv_path, "w", newline="") as f:
|
665 |
writer = csv.writer(f)
|
666 |
+
writer.writerow(["image", "text"])
|
667 |
for _, row in edited_data.iterrows():
|
668 |
writer.writerow([row["image"], row["text"]])
|
669 |
st.markdown(get_download_link(csv_path, "text/csv", "Download SFT Dataset CSV"), unsafe_allow_html=True)
|