Update app.py
Browse files
app.py
CHANGED
@@ -1,3 +1,8 @@
|
|
|
|
|
|
|
|
|
|
|
|
1 |
import os
|
2 |
import subprocess
|
3 |
from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError
|
@@ -8,7 +13,6 @@ import re
|
|
8 |
from pathlib import Path
|
9 |
from io import BytesIO
|
10 |
import random
|
11 |
-
import streamlit as st
|
12 |
from bs4 import BeautifulSoup
|
13 |
from PyPDF2 import PdfReader
|
14 |
import zipfile
|
@@ -21,33 +25,31 @@ import spacy
|
|
21 |
import spacy.cli
|
22 |
from spacy.language import Language
|
23 |
|
24 |
-
# Register a dummy factory under the exact key that the transformer model expects.
|
25 |
@Language.factory("spacy-curated-transformers_RobertaTransformer_v1")
|
26 |
def dummy_roberta_transformer(nlp, name):
|
27 |
-
# This dummy component simply passes the Doc through.
|
28 |
def dummy(doc):
|
29 |
return doc
|
30 |
return dummy
|
31 |
|
32 |
-
# Try to load the transformer-based model.
|
33 |
@st.cache_resource
|
34 |
def load_nlp_model():
|
35 |
try:
|
36 |
-
nlp_model = spacy.load("en_core_web_trf")
|
37 |
-
except OSError:
|
38 |
-
st.write("Model en_core_web_trf not found. Downloading it now...")
|
39 |
-
spacy.cli.download("en_core_web_trf")
|
40 |
try:
|
41 |
-
nlp_model = spacy.load("
|
42 |
-
except
|
43 |
-
st.
|
44 |
-
st.write("Falling back to en_core_web_sm...")
|
45 |
spacy.cli.download("en_core_web_sm")
|
46 |
nlp_model = spacy.load("en_core_web_sm")
|
|
|
|
|
|
|
47 |
return nlp_model
|
48 |
|
|
|
49 |
nlp_model = load_nlp_model()
|
50 |
|
|
|
|
|
51 |
# Also load SentenceTransformer for semantic re-ranking.
|
52 |
from sentence_transformers import SentenceTransformer, util
|
53 |
@st.cache_resource
|
|
|
1 |
+
import streamlit as st
|
2 |
+
# Set page config as the very first Streamlit command
|
3 |
+
st.set_page_config(page_title="Advanced File Downloader", layout="wide")
|
4 |
+
|
5 |
+
# Now import all other libraries
|
6 |
import os
|
7 |
import subprocess
|
8 |
from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError
|
|
|
13 |
from pathlib import Path
|
14 |
from io import BytesIO
|
15 |
import random
|
|
|
16 |
from bs4 import BeautifulSoup
|
17 |
from PyPDF2 import PdfReader
|
18 |
import zipfile
|
|
|
25 |
import spacy.cli
|
26 |
from spacy.language import Language
|
27 |
|
|
|
28 |
@Language.factory("spacy-curated-transformers_RobertaTransformer_v1")
|
29 |
def dummy_roberta_transformer(nlp, name):
|
|
|
30 |
def dummy(doc):
|
31 |
return doc
|
32 |
return dummy
|
33 |
|
|
|
34 |
@st.cache_resource
|
35 |
def load_nlp_model():
|
36 |
try:
|
|
|
|
|
|
|
|
|
37 |
try:
|
38 |
+
nlp_model = spacy.load("en_core_web_sm")
|
39 |
+
except OSError:
|
40 |
+
st.write("Model en_core_web_sm not found. Downloading it now...")
|
|
|
41 |
spacy.cli.download("en_core_web_sm")
|
42 |
nlp_model = spacy.load("en_core_web_sm")
|
43 |
+
except Exception as e:
|
44 |
+
st.error(f"Error loading model: {e}")
|
45 |
+
return None
|
46 |
return nlp_model
|
47 |
|
48 |
+
# Load models after page config
|
49 |
nlp_model = load_nlp_model()
|
50 |
|
51 |
+
|
52 |
+
|
53 |
# Also load SentenceTransformer for semantic re-ranking.
|
54 |
from sentence_transformers import SentenceTransformer, util
|
55 |
@st.cache_resource
|