Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -5,29 +5,37 @@ from scipy.stats import boxcox
|
|
5 |
import numpy as np
|
6 |
import plotly.express as px
|
7 |
import plotly.graph_objects as go
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
8 |
from sklearn.impute import SimpleImputer
|
9 |
-
from sklearn.model_selection import GridSearchCV
|
10 |
from sklearn.linear_model import LogisticRegression
|
11 |
from sklearn.svm import SVC
|
12 |
-
from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier
|
13 |
from sklearn.neural_network import MLPRegressor, MLPClassifier
|
14 |
-
from sklearn.metrics import confusion_matrix, classification_report, r2_score
|
15 |
-
from sklearn.model_selection import cross_val_score
|
16 |
-
import scipy.stats as stats
|
17 |
-
import matplotlib.pyplot as plt #For SHAP charts
|
18 |
-
from scipy.stats import pearsonr, spearmanr
|
19 |
from sklearn.inspection import permutation_importance
|
20 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
21 |
from sklearn.preprocessing import StandardScaler, LabelEncoder
|
22 |
-
|
23 |
-
|
24 |
-
|
25 |
-
|
26 |
-
from
|
27 |
-
|
28 |
-
|
29 |
-
# Advanced
|
30 |
-
from transformers import TFBertForSequenceClassification
|
31 |
import tensorflow as tf
|
32 |
from tensorflow.keras.models import Sequential
|
33 |
from tensorflow.keras.layers import Dense, Conv2D, LSTM, Embedding, Dropout, Flatten, MaxPooling2D, BatchNormalization
|
@@ -36,14 +44,30 @@ from tensorflow.keras.utils import plot_model
|
|
36 |
from tensorflow.keras.callbacks import Callback
|
37 |
import tf2onnx
|
38 |
import onnx
|
39 |
-
|
40 |
-
|
41 |
-
from
|
|
|
42 |
|
43 |
|
44 |
# --------------------------
|
45 |
# Helper Functions
|
46 |
# --------------------------
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
47 |
def enhance_section_title(title, icon="✨"):
|
48 |
"""Helper function to create a styled section title with an icon."""
|
49 |
st.markdown(f"<h2 style='border-bottom: 2px solid #ccc; padding-bottom: 5px;'>{icon} {title}</h2>", unsafe_allow_html=True)
|
@@ -1632,11 +1656,9 @@ elif app_mode == "Predictions":
|
|
1632 |
st.error(f"Prediction failed: {str(e)}")
|
1633 |
|
1634 |
|
1635 |
-
|
1636 |
elif app_mode == "PDF Analysis":
|
1637 |
st.title("📄 Advanced PDF Analyzer")
|
1638 |
-
|
1639 |
-
# PDF Upload with drag & drop zone
|
1640 |
with st.container(border=True):
|
1641 |
uploaded_pdfs = st.file_uploader("Drag & Drop PDF Files",
|
1642 |
type="pdf",
|
|
|
5 |
import numpy as np
|
6 |
import plotly.express as px
|
7 |
import plotly.graph_objects as go
|
8 |
+
import joblib # For saving and loading models
|
9 |
+
import os # For file directory
|
10 |
+
from datetime import datetime
|
11 |
+
from stqdm import stqdm
|
12 |
+
from ydata_profiling import ProfileReport
|
13 |
+
from streamlit_pandas_profiling import st_profile_report
|
14 |
+
import shap
|
15 |
+
|
16 |
+
# PDF and OCR Processing
|
17 |
+
import pdfplumber
|
18 |
+
import pytesseract
|
19 |
+
from pdf2image import convert_from_path
|
20 |
+
|
21 |
+
# Machine Learning
|
22 |
from sklearn.impute import SimpleImputer
|
23 |
+
from sklearn.model_selection import GridSearchCV, cross_val_score, train_test_split
|
24 |
from sklearn.linear_model import LogisticRegression
|
25 |
from sklearn.svm import SVC
|
26 |
+
from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier, RandomForestClassifier, RandomForestRegressor
|
27 |
from sklearn.neural_network import MLPRegressor, MLPClassifier
|
28 |
+
from sklearn.metrics import confusion_matrix, classification_report, r2_score, accuracy_score, mean_squared_error
|
|
|
|
|
|
|
|
|
29 |
from sklearn.inspection import permutation_importance
|
30 |
from sklearn.feature_extraction.text import TfidfVectorizer
|
31 |
from sklearn.preprocessing import StandardScaler, LabelEncoder
|
32 |
+
|
33 |
+
# Visualization
|
34 |
+
import matplotlib.pyplot as plt # For SHAP charts
|
35 |
+
import scipy.stats as stats
|
36 |
+
from scipy.stats import pearsonr, spearmanr
|
37 |
+
|
38 |
+
# Advanced and Neural Network Models
|
|
|
|
|
39 |
import tensorflow as tf
|
40 |
from tensorflow.keras.models import Sequential
|
41 |
from tensorflow.keras.layers import Dense, Conv2D, LSTM, Embedding, Dropout, Flatten, MaxPooling2D, BatchNormalization
|
|
|
44 |
from tensorflow.keras.callbacks import Callback
|
45 |
import tf2onnx
|
46 |
import onnx
|
47 |
+
|
48 |
+
# Transformers and BERT
|
49 |
+
from transformers import TFBertForSequenceClassification
|
50 |
+
|
51 |
|
52 |
|
53 |
# --------------------------
|
54 |
# Helper Functions
|
55 |
# --------------------------
|
56 |
+
pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract' # Update this path if Tesseract is installed elsewhere
|
57 |
+
|
58 |
+
def extract_text_from_pdf(pdf_path, ocr_enabled=False):
|
59 |
+
text = ""
|
60 |
+
if ocr_enabled:
|
61 |
+
images = convert_from_path(pdf_path)
|
62 |
+
for image in images:
|
63 |
+
text += pytesseract.image_to_string(image)
|
64 |
+
else:
|
65 |
+
with pdfplumber.open(pdf_path) as pdf:
|
66 |
+
for page in pdf.pages:
|
67 |
+
text += page.extract_text()
|
68 |
+
return text
|
69 |
+
|
70 |
+
|
71 |
def enhance_section_title(title, icon="✨"):
|
72 |
"""Helper function to create a styled section title with an icon."""
|
73 |
st.markdown(f"<h2 style='border-bottom: 2px solid #ccc; padding-bottom: 5px;'>{icon} {title}</h2>", unsafe_allow_html=True)
|
|
|
1656 |
st.error(f"Prediction failed: {str(e)}")
|
1657 |
|
1658 |
|
|
|
1659 |
elif app_mode == "PDF Analysis":
|
1660 |
st.title("📄 Advanced PDF Analyzer")
|
1661 |
+
# PDF Upload with drag & drop zone
|
|
|
1662 |
with st.container(border=True):
|
1663 |
uploaded_pdfs = st.file_uploader("Drag & Drop PDF Files",
|
1664 |
type="pdf",
|