Spaces:
Running
Running
from fastapi import FastAPI, HTTPException | |
from pydantic import BaseModel | |
from transformers import pipeline | |
import spacy | |
import subprocess | |
import nltk | |
from nltk.corpus import wordnet | |
from spellchecker import SpellChecker | |
# Initialize FastAPI app | |
app = FastAPI() | |
# Initialize the English text classification pipeline for AI detection | |
pipeline_en = pipeline(task="text-classification", model="Hello-SimpleAI/chatgpt-detector-roberta") | |
# Initialize the spell checker | |
spell = SpellChecker() | |
# Ensure necessary NLTK data is downloaded | |
nltk.download('wordnet') | |
nltk.download('omw-1.4') | |
# Ensure the SpaCy model is installed | |
try: | |
nlp = spacy.load("en_core_web_sm") | |
except OSError: | |
subprocess.run(["python", "-m", "spacy", "download", "en_core_web_sm"]) | |
nlp = spacy.load("en_core_web_sm") | |
# Request body models | |
class TextRequest(BaseModel): | |
text: str | |
class TextResponse(BaseModel): | |
result: str | |
# Function to predict the label and score for English text (AI Detection) | |
def predict_en(text: str): | |
res = pipeline_en(text)[0] | |
return {"label": res['label'], "score": res['score']} | |
# Function to get synonyms using NLTK WordNet | |
def get_synonyms_nltk(word: str, pos: str): | |
pos_tag = None | |
if pos == "VERB": | |
pos_tag = wordnet.VERB | |
elif pos == "NOUN": | |
pos_tag = wordnet.NOUN | |
elif pos == "ADJ": | |
pos_tag = wordnet.ADJ | |
elif pos == "ADV": | |
pos_tag = wordnet.ADV | |
synsets = wordnet.synsets(word, pos=pos_tag) | |
if synsets: | |
lemmas = synsets[0].lemmas() | |
return [lemma.name() for lemma in lemmas] | |
return [] | |
# Function to correct spelling errors | |
def correct_spelling(text: str): | |
words = text.split() | |
corrected_words = [] | |
for word in words: | |
corrected_word = spell.correction(word) | |
corrected_words.append(corrected_word) | |
return ' '.join(corrected_words) | |
# Function to rephrase text and replace words with their synonyms while maintaining form | |
def rephrase_with_synonyms(text: str): | |
doc = nlp(text) | |
rephrased_text = [] | |
for token in doc: | |
pos_tag = None | |
if token.pos_ == "NOUN": | |
pos_tag = "NOUN" | |
elif token.pos_ == "VERB": | |
pos_tag = "VERB" | |
elif token.pos_ == "ADJ": | |
pos_tag = "ADJ" | |
elif token.pos_ == "ADV": | |
pos_tag = "ADV" | |
if pos_tag: | |
synonyms = get_synonyms_nltk(token.text, pos_tag) | |
if synonyms: | |
synonym = synonyms[0] # Just using the first synonym for simplicity | |
if token.pos_ == "VERB": | |
if token.tag_ == "VBG": # Present participle (e.g., running) | |
synonym = synonym + 'ing' | |
elif token.tag_ == "VBD" or token.tag_ == "VBN": # Past tense or past participle | |
synonym = synonym + 'ed' | |
elif token.tag_ == "VBZ": # Third-person singular present | |
synonym = synonym + 's' | |
elif token.pos_ == "NOUN" and token.tag_ == "NNS": # Plural nouns | |
synonym += 's' if not synonym.endswith('s') else "" | |
rephrased_text.append(synonym) | |
else: | |
rephrased_text.append(token.text) | |
else: | |
rephrased_text.append(token.text) | |
return ' '.join(rephrased_text) | |
# FastAPI endpoints | |
def predict(text_request: TextRequest): | |
return predict_en(text_request.text) | |
def rephrase(text_request: TextRequest): | |
return {"result": rephrase_with_synonyms(text_request.text)} | |
def correct_spell(text_request: TextRequest): | |
return {"result": correct_spelling(text_request.text)} | |
# Additional endpoints for other functionalities can be added similarly | |
if __name__ == "__main__": | |
import uvicorn | |
uvicorn.run(app, host="127.0.0.1", port=8000) | |