CosmickVisions commited on
Commit
c21d97a
·
verified ·
1 Parent(s): 1170bf0

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +44 -22
app.py CHANGED
@@ -5,29 +5,37 @@ from scipy.stats import boxcox
5
  import numpy as np
6
  import plotly.express as px
7
  import plotly.graph_objects as go
 
 
 
 
 
 
 
 
 
 
 
 
 
 
8
  from sklearn.impute import SimpleImputer
9
- from sklearn.model_selection import GridSearchCV
10
  from sklearn.linear_model import LogisticRegression
11
  from sklearn.svm import SVC
12
- from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier
13
  from sklearn.neural_network import MLPRegressor, MLPClassifier
14
- from sklearn.metrics import confusion_matrix, classification_report, r2_score
15
- from sklearn.model_selection import cross_val_score
16
- import scipy.stats as stats
17
- import matplotlib.pyplot as plt #For SHAP charts
18
- from scipy.stats import pearsonr, spearmanr
19
  from sklearn.inspection import permutation_importance
20
  from sklearn.feature_extraction.text import TfidfVectorizer
21
  from sklearn.preprocessing import StandardScaler, LabelEncoder
22
- from sklearn.model_selection import train_test_split
23
- from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
24
- from sklearn.metrics import accuracy_score, mean_squared_error
25
- from ydata_profiling import ProfileReport
26
- from streamlit_pandas_profiling import st_profile_report
27
- import joblib # For saving and loading models
28
- import os # For file directory
29
- # Advanced
30
- from transformers import TFBertForSequenceClassification
31
  import tensorflow as tf
32
  from tensorflow.keras.models import Sequential
33
  from tensorflow.keras.layers import Dense, Conv2D, LSTM, Embedding, Dropout, Flatten, MaxPooling2D, BatchNormalization
@@ -36,14 +44,30 @@ from tensorflow.keras.utils import plot_model
36
  from tensorflow.keras.callbacks import Callback
37
  import tf2onnx
38
  import onnx
39
- import shap
40
- from datetime import datetime
41
- from stqdm import stqdm
 
42
 
43
 
44
  # --------------------------
45
  # Helper Functions
46
  # --------------------------
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
47
  def enhance_section_title(title, icon="✨"):
48
  """Helper function to create a styled section title with an icon."""
49
  st.markdown(f"<h2 style='border-bottom: 2px solid #ccc; padding-bottom: 5px;'>{icon} {title}</h2>", unsafe_allow_html=True)
@@ -1632,11 +1656,9 @@ elif app_mode == "Predictions":
1632
  st.error(f"Prediction failed: {str(e)}")
1633
 
1634
 
1635
-
1636
  elif app_mode == "PDF Analysis":
1637
  st.title("📄 Advanced PDF Analyzer")
1638
-
1639
- # PDF Upload with drag & drop zone
1640
  with st.container(border=True):
1641
  uploaded_pdfs = st.file_uploader("Drag & Drop PDF Files",
1642
  type="pdf",
 
5
  import numpy as np
6
  import plotly.express as px
7
  import plotly.graph_objects as go
8
+ import joblib # For saving and loading models
9
+ import os # For file directory
10
+ from datetime import datetime
11
+ from stqdm import stqdm
12
+ from ydata_profiling import ProfileReport
13
+ from streamlit_pandas_profiling import st_profile_report
14
+ import shap
15
+
16
+ # PDF and OCR Processing
17
+ import pdfplumber
18
+ import pytesseract
19
+ from pdf2image import convert_from_path
20
+
21
+ # Machine Learning
22
  from sklearn.impute import SimpleImputer
23
+ from sklearn.model_selection import GridSearchCV, cross_val_score, train_test_split
24
  from sklearn.linear_model import LogisticRegression
25
  from sklearn.svm import SVC
26
+ from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier, RandomForestClassifier, RandomForestRegressor
27
  from sklearn.neural_network import MLPRegressor, MLPClassifier
28
+ from sklearn.metrics import confusion_matrix, classification_report, r2_score, accuracy_score, mean_squared_error
 
 
 
 
29
  from sklearn.inspection import permutation_importance
30
  from sklearn.feature_extraction.text import TfidfVectorizer
31
  from sklearn.preprocessing import StandardScaler, LabelEncoder
32
+
33
+ # Visualization
34
+ import matplotlib.pyplot as plt # For SHAP charts
35
+ import scipy.stats as stats
36
+ from scipy.stats import pearsonr, spearmanr
37
+
38
+ # Advanced and Neural Network Models
 
 
39
  import tensorflow as tf
40
  from tensorflow.keras.models import Sequential
41
  from tensorflow.keras.layers import Dense, Conv2D, LSTM, Embedding, Dropout, Flatten, MaxPooling2D, BatchNormalization
 
44
  from tensorflow.keras.callbacks import Callback
45
  import tf2onnx
46
  import onnx
47
+
48
+ # Transformers and BERT
49
+ from transformers import TFBertForSequenceClassification
50
+
51
 
52
 
53
  # --------------------------
54
  # Helper Functions
55
  # --------------------------
56
+ pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract' # Update this path if Tesseract is installed elsewhere
57
+
58
+ def extract_text_from_pdf(pdf_path, ocr_enabled=False):
59
+ text = ""
60
+ if ocr_enabled:
61
+ images = convert_from_path(pdf_path)
62
+ for image in images:
63
+ text += pytesseract.image_to_string(image)
64
+ else:
65
+ with pdfplumber.open(pdf_path) as pdf:
66
+ for page in pdf.pages:
67
+ text += page.extract_text()
68
+ return text
69
+
70
+
71
  def enhance_section_title(title, icon="✨"):
72
  """Helper function to create a styled section title with an icon."""
73
  st.markdown(f"<h2 style='border-bottom: 2px solid #ccc; padding-bottom: 5px;'>{icon} {title}</h2>", unsafe_allow_html=True)
 
1656
  st.error(f"Prediction failed: {str(e)}")
1657
 
1658
 
 
1659
  elif app_mode == "PDF Analysis":
1660
  st.title("📄 Advanced PDF Analyzer")
1661
+ # PDF Upload with drag & drop zone
 
1662
  with st.container(border=True):
1663
  uploaded_pdfs = st.file_uploader("Drag & Drop PDF Files",
1664
  type="pdf",