euler314 commited on
Commit
940f220
·
verified ·
1 Parent(s): f8f469c

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +14 -12
app.py CHANGED
@@ -1,3 +1,8 @@
 
 
 
 
 
1
  import os
2
  import subprocess
3
  from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError
@@ -8,7 +13,6 @@ import re
8
  from pathlib import Path
9
  from io import BytesIO
10
  import random
11
- import streamlit as st
12
  from bs4 import BeautifulSoup
13
  from PyPDF2 import PdfReader
14
  import zipfile
@@ -21,33 +25,31 @@ import spacy
21
  import spacy.cli
22
  from spacy.language import Language
23
 
24
- # Register a dummy factory under the exact key that the transformer model expects.
25
  @Language.factory("spacy-curated-transformers_RobertaTransformer_v1")
26
  def dummy_roberta_transformer(nlp, name):
27
- # This dummy component simply passes the Doc through.
28
  def dummy(doc):
29
  return doc
30
  return dummy
31
 
32
- # Try to load the transformer-based model.
33
  @st.cache_resource
34
  def load_nlp_model():
35
  try:
36
- nlp_model = spacy.load("en_core_web_trf")
37
- except OSError:
38
- st.write("Model en_core_web_trf not found. Downloading it now...")
39
- spacy.cli.download("en_core_web_trf")
40
  try:
41
- nlp_model = spacy.load("en_core_web_trf")
42
- except Exception as e:
43
- st.error(f"Error loading model after download: {e}")
44
- st.write("Falling back to en_core_web_sm...")
45
  spacy.cli.download("en_core_web_sm")
46
  nlp_model = spacy.load("en_core_web_sm")
 
 
 
47
  return nlp_model
48
 
 
49
  nlp_model = load_nlp_model()
50
 
 
 
51
  # Also load SentenceTransformer for semantic re-ranking.
52
  from sentence_transformers import SentenceTransformer, util
53
  @st.cache_resource
 
1
+ import streamlit as st
2
+ # Set page config as the very first Streamlit command
3
+ st.set_page_config(page_title="Advanced File Downloader", layout="wide")
4
+
5
+ # Now import all other libraries
6
  import os
7
  import subprocess
8
  from playwright.async_api import async_playwright, TimeoutError as PlaywrightTimeoutError
 
13
  from pathlib import Path
14
  from io import BytesIO
15
  import random
 
16
  from bs4 import BeautifulSoup
17
  from PyPDF2 import PdfReader
18
  import zipfile
 
25
  import spacy.cli
26
  from spacy.language import Language
27
 
 
28
  @Language.factory("spacy-curated-transformers_RobertaTransformer_v1")
29
  def dummy_roberta_transformer(nlp, name):
 
30
  def dummy(doc):
31
  return doc
32
  return dummy
33
 
 
34
  @st.cache_resource
35
  def load_nlp_model():
36
  try:
 
 
 
 
37
  try:
38
+ nlp_model = spacy.load("en_core_web_sm")
39
+ except OSError:
40
+ st.write("Model en_core_web_sm not found. Downloading it now...")
 
41
  spacy.cli.download("en_core_web_sm")
42
  nlp_model = spacy.load("en_core_web_sm")
43
+ except Exception as e:
44
+ st.error(f"Error loading model: {e}")
45
+ return None
46
  return nlp_model
47
 
48
+ # Load models after page config
49
  nlp_model = load_nlp_model()
50
 
51
+
52
+
53
  # Also load SentenceTransformer for semantic re-ranking.
54
  from sentence_transformers import SentenceTransformer, util
55
  @st.cache_resource