CR7CAD commited on
Commit
cf98c48
·
verified ·
1 Parent(s): 6200bd8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +18 -6
app.py CHANGED
@@ -30,7 +30,7 @@ def load_models():
30
  with st.spinner("Loading AI models... This may take a minute on first run."):
31
  models = {}
32
  # Load summarization model
33
- models['summarizer'] = pipeline("summarization", model="sshleifer/distilbart-xsum-12-6")
34
 
35
  # Load feature extraction model for similarity
36
  models['feature_extractor'] = pipeline("feature-extraction", model="bert-base-uncased")
@@ -58,13 +58,25 @@ def extract_text_from_file(file_obj):
58
  text = "\n".join(para.text for para in document.paragraphs if para.text.strip())
59
  except Exception as e:
60
  text = f"Error processing DOCX file: {e}"
61
- elif ext == ".txt":
62
  try:
63
- text = file_obj.getvalue().decode("utf-8")
64
- except Exception as e:
65
- text = f"Error processing TXT file: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
66
  else:
67
- text = "Unsupported file type. Please upload a .docx or .txt file."
68
  return text
69
 
70
  #####################################
 
30
  with st.spinner("Loading AI models... This may take a minute on first run."):
31
  models = {}
32
  # Load summarization model
33
+ models['summarizer'] = pipeline("summarization", model="microsoft/Phi-3.5-mini-instruct")
34
 
35
  # Load feature extraction model for similarity
36
  models['feature_extractor'] = pipeline("feature-extraction", model="bert-base-uncased")
 
58
  text = "\n".join(para.text for para in document.paragraphs if para.text.strip())
59
  except Exception as e:
60
  text = f"Error processing DOCX file: {e}"
61
+ elif ext == ".doc":
62
  try:
63
+ # For .doc files, we need to save to a temp file
64
+ with tempfile.NamedTemporaryFile(delete=False, suffix='.doc') as temp_file:
65
+ temp_file.write(file_obj.getvalue())
66
+ temp_path = temp_file.name
67
+
68
+ # Use docx2txt which is generally faster
69
+ try:
70
+ text = docx2txt.process(temp_path)
71
+ except Exception:
72
+ text = "Could not process .doc file. Please convert to .docx format."
73
+
74
+ # Clean up temp file
75
+ os.unlink(temp_path)
76
+ except Exception as e:
77
+ text = f"Error processing DOC file: {e}"
78
  else:
79
+ text = "Unsupported file type. Please upload a .docx or .doc file."
80
  return text
81
 
82
  #####################################