|
import os |
|
import requests |
|
import subprocess |
|
from keras.models import load_model |
|
from keras.preprocessing.text import Tokenizer |
|
from keras.preprocessing.sequence import pad_sequences |
|
from sklearn.preprocessing import LabelEncoder |
|
|
|
import pickle |
|
import numpy as np |
|
import streamlit as st |
|
|
|
|
|
headers = { |
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3', |
|
} |
|
|
|
|
|
st.write(f"Initial Current Working Directory: {os.getcwd()}") |
|
|
|
|
|
zip_file_path = "my_authorship_model_zip.zip" |
|
if not os.path.exists('my_authorship_model'): |
|
try: |
|
|
|
model_url = 'https://jaifar.net/ADS/my_authorship_model_zip.zip' |
|
r = requests.get(model_url, headers=headers) |
|
r.raise_for_status() |
|
|
|
|
|
st.write(f"Downloaded model size: {len(r.content)} bytes") |
|
|
|
|
|
with open(zip_file_path, "wb") as f: |
|
f.write(r.content) |
|
|
|
|
|
if os.path.exists(zip_file_path): |
|
st.write("Zip file exists") |
|
|
|
|
|
subprocess.run(['unzip', '-l', zip_file_path]) |
|
|
|
|
|
unzip_result = subprocess.run(['unzip', '-o', zip_file_path, '-d', 'my_authorship_model']) |
|
|
|
|
|
if unzip_result.returncode == 0: |
|
st.write("Model folder successfully extracted using unzip") |
|
|
|
st.write("Listing directory contents:") |
|
st.write(os.listdir('.')) |
|
|
|
else: |
|
st.write("Model folder was not extracted successfully using unzip") |
|
exit(1) |
|
else: |
|
st.write("Zip file does not exist") |
|
exit(1) |
|
except Exception as e: |
|
st.write(f"Failed to download or extract the model: {e}") |
|
exit(1) |
|
else: |
|
st.write("Model folder exists") |
|
|
|
|
|
st.write(f"Current Working Directory After Extraction: {os.getcwd()}") |
|
|
|
|
|
|
|
try: |
|
model_files = os.listdir('my_authorship_model') |
|
st.write(f"Files in model folder: {model_files}") |
|
except Exception as e: |
|
st.write(f"Could not list files in model folder: {e}") |
|
|
|
|
|
file_urls = { |
|
'tokenizer.pkl': 'https://jaifar.net/ADS/tokenizer.pkl', |
|
'label_encoder.pkl': 'https://jaifar.net/ADS/label_encoder.pkl' |
|
} |
|
|
|
for filename, url in file_urls.items(): |
|
try: |
|
r = requests.get(url, headers=headers) |
|
r.raise_for_status() |
|
with open(filename, 'wb') as f: |
|
f.write(r.content) |
|
except Exception as e: |
|
st.write(f"Failed to download {filename}: {e}") |
|
exit(1) |
|
|
|
|
|
loaded_model = load_model("my_authorship_model") |
|
|
|
|
|
with open('tokenizer.pkl', 'rb') as handle: |
|
tokenizer = pickle.load(handle) |
|
|
|
with open('label_encoder.pkl', 'rb') as handle: |
|
label_encoder = pickle.load(handle) |
|
|
|
max_length = 300 |
|
|
|
|
|
def predict_author(new_text, model, tokenizer, label_encoder): |
|
sequence = tokenizer.texts_to_sequences([new_text]) |
|
padded_sequence = pad_sequences(sequence, maxlen=max_length, padding='post', truncating='post') |
|
prediction = model.predict(padded_sequence) |
|
|
|
predicted_label = label_encoder.inverse_transform([prediction.argmax()])[0] |
|
|
|
probabilities = prediction[0] |
|
author_probabilities = {} |
|
for idx, prob in enumerate(probabilities): |
|
author = label_encoder.inverse_transform([idx])[0] |
|
author_probabilities[author] = prob |
|
|
|
return predicted_label, author_probabilities |
|
|
|
st.markdown("CNN : version: 1.2") |
|
new_text = st.text_area("Input your text here") |
|
|
|
|
|
|
|
|
|
predicted_author, author_probabilities = predict_author(new_text, loaded_model, tokenizer, label_encoder) |
|
sorted_probabilities = sorted(author_probabilities.items(), key=lambda x: x[1], reverse=True) |
|
|
|
st.write(f"The text is most likely written by: {predicted_author}") |
|
st.write("Probabilities for each author are (sorted):") |
|
for author, prob in sorted_probabilities: |
|
st.write(f"{author}: {prob * 100:.2f}%") |
|
|