Spaces:

CosmickVisions
/

Data-Vision

Running

App Files Files Community

CosmickVisions commited on Mar 3

Commit

c21d97a

verified ·

1 Parent(s): 1170bf0

Update app.py

Browse files

Files changed (1) hide show

app.py +44 -22

app.py CHANGED Viewed

@@ -5,29 +5,37 @@ from scipy.stats import boxcox
 import numpy as np
 import plotly.express as px
 import plotly.graph_objects as go
 from sklearn.impute import SimpleImputer
-from sklearn.model_selection import GridSearchCV
 from sklearn.linear_model import LogisticRegression
 from sklearn.svm import SVC
-from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier
 from sklearn.neural_network import MLPRegressor, MLPClassifier
-from sklearn.metrics import confusion_matrix, classification_report, r2_score
-from sklearn.model_selection import cross_val_score
-import scipy.stats as stats
-import matplotlib.pyplot as plt #For SHAP charts
-from scipy.stats import pearsonr, spearmanr
 from sklearn.inspection import permutation_importance
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.preprocessing import StandardScaler, LabelEncoder
-from sklearn.model_selection import train_test_split
-from sklearn.ensemble import RandomForestClassifier, RandomForestRegressor
-from sklearn.metrics import accuracy_score, mean_squared_error
-from ydata_profiling import ProfileReport
-from streamlit_pandas_profiling import st_profile_report
-import joblib  # For saving and loading models
-import os  # For file directory
-# Advanced
-from transformers import TFBertForSequenceClassification
 import tensorflow as tf
 from tensorflow.keras.models import Sequential
 from tensorflow.keras.layers import Dense, Conv2D, LSTM, Embedding, Dropout, Flatten, MaxPooling2D, BatchNormalization
@@ -36,14 +44,30 @@ from tensorflow.keras.utils import plot_model
 from tensorflow.keras.callbacks import Callback
 import tf2onnx
 import onnx
-import shap
-from datetime import datetime
-from stqdm import stqdm
 # --------------------------
 # Helper Functions
 # --------------------------
 def enhance_section_title(title, icon="✨"):
     """Helper function to create a styled section title with an icon."""
     st.markdown(f"<h2 style='border-bottom: 2px solid #ccc; padding-bottom: 5px;'>{icon} {title}</h2>", unsafe_allow_html=True)
@@ -1632,11 +1656,9 @@ elif app_mode == "Predictions":
             st.error(f"Prediction failed: {str(e)}")
 elif app_mode == "PDF Analysis":
     st.title("📄 Advanced PDF Analyzer")
-    # PDF Upload with drag & drop zone
     with st.container(border=True):
         uploaded_pdfs = st.file_uploader("Drag & Drop PDF Files",
                                        type="pdf",

 import numpy as np
 import plotly.express as px
 import plotly.graph_objects as go
+import joblib  # For saving and loading models
+import os  # For file directory
+from datetime import datetime
+from stqdm import stqdm
+from ydata_profiling import ProfileReport
+from streamlit_pandas_profiling import st_profile_report
+import shap
+# PDF and OCR Processing
+import pdfplumber
+import pytesseract
+from pdf2image import convert_from_path
+# Machine Learning
 from sklearn.impute import SimpleImputer
+from sklearn.model_selection import GridSearchCV, cross_val_score, train_test_split
 from sklearn.linear_model import LogisticRegression
 from sklearn.svm import SVC
+from sklearn.ensemble import GradientBoostingRegressor, GradientBoostingClassifier, RandomForestClassifier, RandomForestRegressor
 from sklearn.neural_network import MLPRegressor, MLPClassifier
+from sklearn.metrics import confusion_matrix, classification_report, r2_score, accuracy_score, mean_squared_error
 from sklearn.inspection import permutation_importance
 from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.preprocessing import StandardScaler, LabelEncoder
+# Visualization
+import matplotlib.pyplot as plt # For SHAP charts
+import scipy.stats as stats
+from scipy.stats import pearsonr, spearmanr
+# Advanced and Neural Network Models
 import tensorflow as tf
 from tensorflow.keras.models import Sequential
 from tensorflow.keras.layers import Dense, Conv2D, LSTM, Embedding, Dropout, Flatten, MaxPooling2D, BatchNormalization
 from tensorflow.keras.callbacks import Callback
 import tf2onnx
 import onnx
+# Transformers and BERT
+from transformers import TFBertForSequenceClassification
 # --------------------------
 # Helper Functions
 # --------------------------
+pytesseract.pytesseract.tesseract_cmd = r'/usr/bin/tesseract'  # Update this path if Tesseract is installed elsewhere
+def extract_text_from_pdf(pdf_path, ocr_enabled=False):
+    text = ""
+    if ocr_enabled:
+        images = convert_from_path(pdf_path)
+        for image in images:
+            text += pytesseract.image_to_string(image)
+    else:
+        with pdfplumber.open(pdf_path) as pdf:
+            for page in pdf.pages:
+                text += page.extract_text()
+    return text
 def enhance_section_title(title, icon="✨"):
     """Helper function to create a styled section title with an icon."""
     st.markdown(f"<h2 style='border-bottom: 2px solid #ccc; padding-bottom: 5px;'>{icon} {title}</h2>", unsafe_allow_html=True)
             st.error(f"Prediction failed: {str(e)}")
 elif app_mode == "PDF Analysis":
     st.title("📄 Advanced PDF Analyzer")
+# PDF Upload with drag & drop zone
     with st.container(border=True):
         uploaded_pdfs = st.file_uploader("Drag & Drop PDF Files",
                                        type="pdf",