Spaces:

polygraf-ai
/

article_writer

Runtime error

File size: 38,248 Bytes

2d6909b
 
db77dd7
2d6909b
34b1950
20dc449
b72ef7f
 
439d01d
e2a79fa
b72ef7f
 
132b0ec
5534eb0
e2a79fa
 
132b0ec
f716a54
5534eb0
708f094
e2a79fa
f716a54
132b0ec
 
 
 
a32fa53
89644d7
 
a32fa53
89644d7
 
132b0ec
 
a32fa53
 
132b0ec
 
a54c1ef
 
 
89644d7
132b0ec
 
 
e3277bc
89644d7
132b0ec
 
118507a
89644d7
118507a
 
 
 
d994b45
89644d7
20dc449
cf245ed
20dc449
 
cf245ed
 
20dc449
cf245ed
 
89644d7
43d4e83
 
 
 
 
34b1950
 
 
 
 
 
20dc449
34b1950
43d4e83
 
 
34b1950
 
 
 
 
43d4e83
 
 
34b1950
 
 
 
 
 
43d4e83
 
 
 
 
 
 
da88846
43d4e83
 
89644d7
43d4e83
34b1950
43d4e83
34b1950
 
 
 
 
 
 
6402181
89644d7
291ffbc
 
 
ef88cd6
132b0ec
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89644d7
132b0ec
 
89644d7
132b0ec
 
89644d7
34b1950
89644d7
132b0ec
 
 
89644d7
132b0ec
89644d7
 
132b0ec
89644d7
 
 
132b0ec
89644d7
 
132b0ec
 
89644d7
 
 
 
 
 
 
 
 
 
 
7c7ccca
89644d7
7c7ccca
 
89644d7
 
 
 
 
 
 
 
132b0ec
 
34b1950
89644d7
132b0ec
 
a32fa53
 
132b0ec
 
89644d7
5534eb0
 
 
 
 
132b0ec
 
5534eb0
 
 
 
 
 
 
 
 
 
 
 
 
132b0ec
 
5534eb0
 
 
 
 
 
 
 
 
 
 
 
 
 
132b0ec
5534eb0
 
 
 
 
 
89644d7
5534eb0
132b0ec
89644d7
a32fa53
da88846
34b1950
5534eb0
da88846
5534eb0
132b0ec
89644d7
132b0ec
 
 
 
 
 
 
20dc449
 
f14cff1
9177c6e
708f094
 
20dc449
 
 
 
 
 
 
 
 
 
 
 
 
 
34b1950
20dc449
 
 
43d4e83
 
20dc449
 
 
 
 
 
 
89644d7
7454788
 
f14cff1
7454788
 
708f094
 
7454788
708f094
34b1950
132b0ec
118507a
7454788
708f094
 
7454788
 
 
 
 
 
89644d7
20dc449
f14cff1
20dc449
708f094
20dc449
 
 
 
 
 
 
 
 
 
 
 
f14cff1
708f094
bf91121
708f094
7454788
 
20dc449
 
f14cff1
20dc449
708f094
cf245ed
20dc449
 
 
 
 
 
cf245ed
 
20dc449
cf245ed
f14cff1
7454788
 
20dc449
cf245ed
7454788
 
 
 
cf245ed
34b1950
 
bf91121
 
 
 
 
 
 
 
 
34b1950
cf245ed
20dc449
86218e7
89644d7
b96ba8b
 
439d01d
 
b96ba8b
 
 
439d01d
 
46f0706
cf245ed
 
 
20dc449
 
cf245ed
b96ba8b
20dc449
43d4e83
34b1950
e2a79fa
34b1950
20dc449
 
 
 
 
46f0706
43d4e83
439d01d
 
 
 
b96ba8b
d994b45
89644d7
20dc449
cf245ed
cc2969a
 
 
 
89644d7
d09cdf3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
e2a79fa
 
 
 
 
 
 
c412123
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20dc449
f14cff1
cf245ed
708f094
cf245ed
 
 
 
 
 
 
 
 
 
 
f14cff1
70d74f0
 
 
 
 
 
f14cff1
aaa4e80
 
03fd59b
b96ba8b
bf91121
 
7454788
 
20dc449
f14cff1
10aedaa
f14cff1
34b1950
 
 
 
 
 
 
 
 
 
 
ef88cd6
f14cff1
43d4e83
f14cff1
34b1950
 
 
2a53cb7
20dc449
f14cff1
2a53cb7
708f094
cf245ed
 
 
 
 
 
 
 
 
 
 
 
f14cff1
708f094
bf91121
03fd59b
7454788
 
20dc449
34b1950
 
 
 
439d01d
 
 
c412123
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
b96ba8b
20dc449
89644d7
20dc449
cf245ed
 
 
 
7454788
 
 
cf245ed
b96ba8b
70d74f0
 
 
 
20dc449
cf245ed
20dc449
 
 
 
f14cff1
cf245ed
 
 
 
 
708f094
 
 
 
 
cf245ed
 
 
 
 
 
20dc449
 
cf245ed
 
 
 
 
 
 
 
d09cdf3
3dae562
 
 
 
 
cf245ed
 
20dc449
cf245ed
20dc449
9177c6e
 
 
 
 
c85110b
9177c6e
89644d7
9177c6e
cf245ed
20dc449
 
cf245ed
 
 
 
 
 
 
 
 
20dc449
 
cf245ed
20dc449
 
 
 
 
cf245ed
20dc449
cf245ed
20dc449
cf245ed
 
 
 
 
 
 
 
20dc449
 
cf245ed
f8ec92b
20dc449
cf245ed
 
 
 
 
 
20dc449
 
cf245ed
e9640b0
20dc449
 
 
 
 
cf245ed
d09cdf3
20dc449
 
 
cf245ed
d09cdf3
20dc449
 
cf245ed
 
 
 
 
 
 
 
20dc449
 
cf245ed
20dc449
 
 
 
 
cf245ed
20dc449
 
 
c85110b
20dc449
cf245ed
20dc449
c85110b
43d4e83
03fd59b
43d4e83
59fbf6a
43d4e83
 
3dae562
 
 
 
 
 
 
 
 
 
 
 
f716a54
 
 
 
 
 
 
70d74f0
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
43d4e83
 
439d01d
59fbf6a
439d01d
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
20dc449
cf245ed
20dc449
439d01d
 
 
 
 
 
 
 
 
 
 
 
 
5534eb0
439d01d
e2a79fa
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
439d01d
2a53cb7
439d01d
 
 
2a53cb7
b96ba8b
 
2a53cb7
 
439d01d
 
2a53cb7
cf245ed
03fd59b
7454788
 
 
 
 
e3277bc
 
 
 
 
 
03fd59b
 
 
 
 
 
 
e3277bc
03fd59b
 
89644d7
7454788
e2a79fa
 
d09cdf3
e2a79fa
d09cdf3
20dc449
 
d994b45
f14cff1
cf245ed
708f094
20dc449
5c509dc
20dc449
 
 
 
 
 
 
 
 
708f094
f716a54
f14cff1
70d74f0
 
 
 
 
 
f14cff1
aaa4e80
 
03fd59b
b96ba8b
d994b45
b96ba8b
d994b45
e3277bc
7454788
 
 
f14cff1
7454788
708f094
7454788
 
 
 
 
 
 
 
 
 
 
708f094
f716a54
f14cff1
70d74f0
 
 
 
 
 
f14cff1
03fd59b
b96ba8b
7454788
aaa4e80
 
7454788
 
b96ba8b
7454788
d994b45
20dc449
 
 
5534eb0
20dc449
d994b45
20dc449
46f0706
afad1bb
20dc449
afad1bb
 
 
 
 
b96ba8b
afad1bb
b96ba8b
20dc449
d994b45
b96ba8b
 
 
2a53cb7
20dc449
d994b45
cf245ed
afad1bb
20dc449
8bd7fd1
f801525
 
8bd7fd1
ef88cd6

"""
nohup python3 app.py &
export GOOGLE_APPLICATION_CREDENTIALS="gcp_creds.json"
"""

import re
from typing import Dict
from collections import defaultdict
from datetime import date, datetime

import gradio as gr
import nltk
import torch
import numpy as np
from scipy.special import softmax
import language_tool_python
from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline

from utils import remove_special_characters, split_text_allow_complete_sentences_nltk
from google_search import google_search, months, domain_list, build_date
from humanize import humanize_text, device
from ai_generate import generate

print(f"Using device: {device}")

models = {
    "Polygraf AI (Base Model)": AutoModelForSequenceClassification.from_pretrained(
        "polygraf-ai/bc-roberta-openai-2sent"
    ).to(device),
    "Polygraf AI (Advanced Model)": AutoModelForSequenceClassification.from_pretrained(
        "polygraf-ai/bc_combined_3sent"
    ).to(device),
}
tokenizers = {
    "Polygraf AI (Base Model)": AutoTokenizer.from_pretrained("polygraf-ai/bc-roberta-openai-2sent"),
    "Polygraf AI (Advanced Model)": AutoTokenizer.from_pretrained("polygraf-ai/bc_combined_3sent"),
}

# grammar correction tool
tool = language_tool_python.LanguageTool("en-US")


# Function to move model to the appropriate device
def to_device(model):
    return model.to(device)


def copy_to_input(text):
    return text


def remove_bracketed_numbers(text):
    pattern = r"^\[\d+\]"
    cleaned_text = re.sub(pattern, "", text)
    return cleaned_text


def clean_text(text: str) -> str:
    paragraphs = text.split("\n\n")
    cleaned_paragraphs = []
    for paragraph in paragraphs:
        cleaned = re.sub(r"\s+", " ", paragraph).strip()
        cleaned = re.sub(r"(?<=\.) ([a-z])", lambda x: x.group(1).upper(), cleaned)
        cleaned_paragraphs.append(cleaned)
    return "\n".join(cleaned_paragraphs)


def format_references(text: str) -> str:
    body, references = split_text_from_refs(text)
    return body + references


def split_text_from_refs(text: str, sep="\n"):
    lines = text.split("\n")
    references = []
    article_text = []
    index_pattern = re.compile(r"\[(\d+)\]")
    in_references = False

    for line in lines:
        if line == "":
            continue
        match = re.search(r"[Rr]eferences:", line, re.DOTALL)
        if line.strip().lower() == "references" or line.strip().lower() == "references:":
            in_references = True
            continue
        if line.strip().lower().startswith("references:"):
            in_references = True
        if match:
            in_references = True
            line = line[match.end() :]
        if in_references:
            matches = index_pattern.split(line)
            for match in matches:
                if match.strip() and not match.isdigit() and not match.strip().lower().startswith("references:"):
                    references.append(match.strip())
        else:
            article_text.append(line.strip())

    if len(references) > 0:
        formatted_refs = []
        for i, ref in enumerate(references, 1):
            ref = remove_bracketed_numbers(ref)
            formatted_refs.append(f"[{i}] {ref}{sep}")
        formatted_refs = f"{sep}{sep}References:{sep}{sep}" + f"{sep}".join(formatted_refs)
    else:
        formatted_refs = ""

    body = f"{sep}{sep}".join(article_text)

    return body, formatted_refs


def ends_with_references(text):
    # Define a regular expression pattern for variations of "References:"
    pattern = re.compile(r"\b[Rr]eferences:\s*$", re.IGNORECASE | re.MULTILINE)
    # Check if the text ends with any form of "References:"
    return bool(pattern.search(text.strip()))


def format_and_correct_language_check(text: str) -> str:
    return tool.correct(text)


def predict(model, tokenizer, text):
    text = remove_special_characters(text)
    bc_token_size = 256
    with torch.no_grad():
        model.eval()
        tokens = tokenizer(
            text,
            padding="max_length",
            truncation=True,
            max_length=bc_token_size,
            return_tensors="pt",
        ).to(device)
        output = model(**tokens)
        output_norm = softmax(output.logits.detach().cpu().numpy(), 1)[0]
        output_norm = {"HUMAN": output_norm[0], "AI": output_norm[1]}
        return output_norm


def ai_generated_test(text, model="BC Original"):
    return predict(models[model], tokenizers[model], text)


def detection_polygraf(text, model="BC Original"):
    # sentences = split_into_sentences(text)
    sentences = nltk.sent_tokenize(text)
    num_sentences = len(sentences)
    scores = defaultdict(list)

    overall_scores = []

    # Process each chunk of 3 sentences and store the score for each sentence in the chunk
    for i in range(num_sentences):
        chunk = " ".join(sentences[i : i + 3])
        if chunk:
            # result = classifier(chunk)
            result = ai_generated_test(chunk, model)
            score = result["AI"]
            for j in range(i, min(i + 3, num_sentences)):
                scores[j].append(score)

    # Calculate the average score for each sentence and apply color coding
    paragraphs = text.split("\n")
    paragraphs = [s for s in paragraphs if s.strip()]
    colored_paragraphs = []
    i = 0
    for paragraph in paragraphs:
        temp_sentences = nltk.sent_tokenize(paragraph)
        colored_sentences = []
        for sentence in temp_sentences:
            if scores[i]:
                avg_score = sum(scores[i]) / len(scores[i])
                if avg_score >= 0.70:
                    colored_sentence = f"<span style='background-color:red;'>{sentence}</span>"
                elif avg_score >= 0.55:
                    colored_sentence = f"<span style='background-color:GoldenRod;'>{sentence}</span>"
                else:
                    colored_sentence = sentence
                colored_sentences.append(colored_sentence)
                overall_scores.append(avg_score)
            i = i + 1
        combined_sentences = " ".join(colored_sentences)
        colored_paragraphs.append(combined_sentences)

    overall_score = sum(overall_scores) / len(overall_scores)
    overall_score = {"HUMAN": 1 - overall_score, "AI": overall_score}
    return overall_score, "<br><br>".join(colored_paragraphs)


ai_check_options = [
    "Polygraf AI (Base Model)",
    "Polygraf AI (Advanced Model)",
]


MC_TOKEN_SIZE = 256
TEXT_MC_MODEL_PATH = "polygraf-ai/mc-model"
MC_LABEL_MAP = ["OpenAI GPT", "Mistral", "CLAUDE", "Gemini", "Grammar Enhancer"]
text_mc_tokenizer = AutoTokenizer.from_pretrained(TEXT_MC_MODEL_PATH)
text_mc_model = AutoModelForSequenceClassification.from_pretrained(TEXT_MC_MODEL_PATH).to(device)


def predict_mc(text):
    with torch.no_grad():
        text_mc_model.eval()
        tokens = text_mc_tokenizer(
            text,
            padding="max_length",
            truncation=True,
            return_tensors="pt",
            max_length=MC_TOKEN_SIZE,
        ).to(device)
        output = text_mc_model(**tokens)
        output_norm = softmax(output.logits.detach().cpu().numpy(), 1)[0]
        return output_norm


def predict_mc_scores(input, bc_score):
    mc_scores = []
    segments_mc = split_text_allow_complete_sentences_nltk(input, type_det="mc", tokenizer=text_mc_tokenizer)
    samples_len_mc = len(split_text_allow_complete_sentences_nltk(input, type_det="mc", tokenizer=text_mc_tokenizer))
    for i in range(samples_len_mc):
        cleaned_text_mc = remove_special_characters(segments_mc[i])
        mc_score = predict_mc(cleaned_text_mc)
        mc_scores.append(mc_score)
    mc_scores_array = np.array(mc_scores)
    average_mc_scores = np.mean(mc_scores_array, axis=0)
    mc_score_list = average_mc_scores.tolist()
    mc_score = {}
    for score, label in zip(mc_score_list, MC_LABEL_MAP):
        mc_score[label.upper()] = score

    sum_prob = 1 - bc_score["HUMAN"]
    for key, value in mc_score.items():
        mc_score[key] = value * sum_prob
    print("MC Score:", mc_score)
    if sum_prob < 0.01:
        mc_score = {}

    return mc_score


def highlighter_polygraf(text, model="Polygraf AI (Base Model)"):
    body, references = split_text_from_refs(text)
    score, text = detection_polygraf(text=body, model=model)
    mc_score = predict_mc_scores(body, score)  # mc score
    text = text + references.replace("\n", "<br>")
    return score, text, mc_score


def ai_check(text: str, option: str):
    if option.startswith("Polygraf AI"):
        return highlighter_polygraf(text, option)
    else:
        return highlighter_polygraf(text, option)


def generate_prompt(settings: Dict[str, str]) -> str:
    prompt = f"""
    I am a {settings['role']}
    Write a {settings['article_length']} words (around) {settings['format']} on {settings['topic']}.
    Context:
    - {settings['context']}
    
    Style and Tone:
    - Writing style: {settings['writing_style']}
    - Tone: {settings['tone']}
    - Target audience: {settings['user_category']}
    
    Content:
    - Depth: {settings['depth_of_content']}
    - Structure: {', '.join(settings['structure'])}
    
    Keywords to incorporate:
    {', '.join(settings['keywords'])}
    
    Additional requirements:
    - Don't start with "Here is a...", start with the requested text directly
    - Include {settings['num_examples']} relevant examples or case studies
    - Incorporate data or statistics from {', '.join(settings['references'])}
    - End with a {settings['conclusion_type']} conclusion
    - Add a "References" section in the format "References:" on a new line at the end with at least 3 credible detailed sources, formatted as [1], [2], etc. with each source on their own line
    - Do not repeat sources
    - Do not make any headline, title bold.
    
    Ensure proper paragraph breaks for better readability.
    Avoid any references to artificial intelligence, language models, or the fact that this is generated by an AI, and do not mention something like here is the article etc.
    """
    return prompt


def regenerate_prompt(settings: Dict[str, str]) -> str:
    prompt = f"""
    I am a {settings['role']}
    "{settings['generated_article']}"
    Edit the given text based on user comments.
    User Comments:
    - {settings['user_comments']}
    
    Requirements:
    - Don't start with "Here is a...", start with the requested text directly
    - The original content should not be changed. Make minor modifications based on user comments above.
    - Keep the references the same as the given text in the same format.
    - Do not make any headline, title bold.
    Context:
    - {settings['context']}
    
    Ensure proper paragraph breaks for better readability.
    Avoid any references to artificial intelligence, language models, or the fact that this is generated by an AI, and do not mention something like here is the article etc.
    """
    return prompt


def generate_article(
    input_role: str,
    topic: str,
    context: str,
    keywords: str,
    article_length: str,
    format: str,
    writing_style: str,
    tone: str,
    user_category: str,
    depth_of_content: str,
    structure: str,
    references: str,
    num_examples: str,
    conclusion_type: str,
    ai_model: str,
    content_string: str,
    url_content: str = None,
    api_key: str = None,
    pdf_file_input: list[str] = None,
    generated_article: str = None,
    user_comments: str = None,
) -> str:
    settings = {
        "role": input_role,
        "topic": topic,
        "context": context,
        "keywords": [k.strip() for k in keywords.split(",")],
        "article_length": article_length,
        "format": format,
        "writing_style": writing_style,
        "tone": tone,
        "user_category": user_category,
        "depth_of_content": depth_of_content,
        "structure": [s.strip() for s in structure.split(",")],
        "references": [r.strip() for r in references.split(",")],
        "num_examples": num_examples,
        "conclusion_type": conclusion_type,
        "sources": content_string,
        "generated_article": generated_article,
        "user_comments": user_comments,
    }

    if generated_article:
        prompt = regenerate_prompt(settings)
    else:
        prompt = generate_prompt(settings)

    print("Generated Prompt...\n", prompt)
    article = generate(
        prompt=prompt,
        topic=topic,
        model=ai_model,
        url_content=url_content,
        path=pdf_file_input,
        temperature=1,
        max_length=2048,
        api_key=api_key,
        sys_message="",
    )

    return clean_text(article)


def get_history(history):
    return history


def clear_history():
    # Return empty list for history state and display
    return [], []


def humanize(
    text: str,
    model: str,
    temperature: float = 1.2,
    repetition_penalty: float = 1,
    top_k: int = 50,
    length_penalty: float = 1,
    history=None,
) -> str:
    print("Humanizing text...")
    body, references = split_text_from_refs(text)
    result = humanize_text(
        text=body,
        model_name=model,
        temperature=temperature,
        repetition_penalty=repetition_penalty,
        top_k=top_k,
        length_penalty=length_penalty,
    )
    result = result + references
    corrected_text = format_and_correct_language_check(result)

    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    history.append((f"Humanized Text | {timestamp}\nInput: {model}", corrected_text))
    return corrected_text, history


def update_visibility_api(model: str):
    if model in ["OpenAI GPT 3.5", "OpenAI GPT 4"]:
        return gr.update(visible=True)
    else:
        return gr.update(visible=False)


# Function to update the default selected structure based on the selected format
def update_structure(format_choice):
    # List of formats that should use "Plain Text"
    plain_text_formats = [
        "TikTok Video Content",
        "Instagram Video Content",
        "LinkedIn post",
        "X (Twitter) post",
        "Facebook post",
        "Email",
    ]

    # Set the appropriate default structure based on the selected format
    if format_choice in plain_text_formats:
        return gr.update(value="Plain Text", interactive=True)
    else:
        return gr.update(value="Introduction, Body, Conclusion", interactive=True)


def update_temperature(model_dropdown):
    if model_dropdown == "Standard Model":
        return gr.update(value=1.2, interactive=True)
    elif model_dropdown == "Advanced Model (Beta)":
        return gr.update(value=1.0, interactive=True)


import uuid
import json
from datetime import datetime
from google.cloud import storage

# Initialize Google Cloud Storage client
client = storage.Client()
bucket_name = "ai-source-detection"
bucket = client.bucket(bucket_name)


def save_to_cloud_storage(
    article,
    input_role,
    topic_context,
    context,
    keywords,
    article_length,
    format,
    writing_style,
    tone,
    user_category,
    depth_of_content,
    structure,
    references,
    num_examples,
    conclusion_type,
    ai_model,
    content_string,
    url_content,
    generated_article,
    user_comments,
    timestamp,
):
    """Save generated article and metadata to Google Cloud Storage within a specific folder."""
    # Create a unique filename
    file_id = str(uuid.uuid4())

    # Define the file path and name in the bucket
    folder_path = "ai-writer/"
    file_name = f"{folder_path}{timestamp.replace(' ', '_').replace(':', '-')}_{file_id}.json"

    # Create a dictionary with the article and all relevant metadata
    data = {
        "article": article,
        "metadata": {
            "input_role": input_role,
            "topic_context": topic_context,
            "context": context,
            "keywords": keywords,
            "article_length": article_length,
            "format": format,
            "writing_style": writing_style,
            "tone": tone,
            "user_category": user_category,
            "depth_of_content": depth_of_content,
            "structure": structure,
            "references": references,
            "num_examples": num_examples,
            "conclusion_type": conclusion_type,
            "ai_model": ai_model,
            "content_string": content_string,
            "url_content": url_content,
            "generated_article": generated_article,
            "user_comments": user_comments,
            "timestamp": timestamp,
        },
    }

    # Convert data to JSON string
    json_data = json.dumps(data)

    # Create a blob and upload to GCS
    blob = bucket.blob(file_name)
    blob.upload_from_string(json_data, content_type="application/json")

    return f"Data saved as {file_name} in GCS."


def generate_and_format(
    input_role,
    topic,
    context,
    keywords,
    article_length,
    format,
    writing_style,
    tone,
    user_category,
    depth_of_content,
    structure,
    references,
    num_examples,
    conclusion_type,
    google_search_check,
    year_from,
    month_from,
    day_from,
    year_to,
    month_to,
    day_to,
    domains_to_include,
    include_sites,
    exclude_sites,
    pdf_file_input,
    history=None,
    ai_model="OpenAI GPT 4o",
    api_key=None,
    generated_article: str = None,
    user_comments: str = None,
):
    content_string = ""
    url_content = None
    if google_search_check:
        date_from = build_date(year_from, month_from, day_from)
        date_to = build_date(year_to, month_to, day_to)
        sorted_date = f"date:r:{date_from}:{date_to}"
        final_query = topic
        if include_sites:
            site_queries = [f"site:{site.strip()}" for site in include_sites.split(",")]
            final_query += " " + " OR ".join(site_queries)
        if exclude_sites:
            exclude_queries = [f"-site:{site.strip()}" for site in exclude_sites.split(",")]
            final_query += " " + " ".join(exclude_queries)
        print(f"Google Search Query: {final_query}")
        url_content = google_search(final_query, sorted_date, domains_to_include)
        content_string = "\n".join(
            f"{url.strip()}: \n{content.strip()[:2500]}" for url, content in url_content.items()
        )
        content_string = (
            "Use the trusted information here from the URLs and add them as References:\n" + content_string
        )
    topic_context = topic + ", " + context
    article = generate_article(
        input_role,
        topic_context,
        context,
        keywords,
        article_length,
        format,
        writing_style,
        tone,
        user_category,
        depth_of_content,
        structure,
        references,
        num_examples,
        conclusion_type,
        ai_model,
        content_string,
        url_content,
        api_key,
        pdf_file_input,
        generated_article,
        user_comments,
    )
    if ends_with_references(article) and url_content is not None:
        for url in url_content.keys():
            article += f"\n{url}"

    reference_formatted = format_references(article)
    timestamp = datetime.now().strftime("%Y-%m-%d %H:%M:%S")
    history.append((f"Generated Text | {timestamp}\nInput: {topic}", reference_formatted))

    # Save the article and metadata to Cloud Storage
    # We dont save if there is PDF input for privacy reasons
    if pdf_file_input is None:
        save_message = save_to_cloud_storage(
            article,
            input_role,
            topic_context,
            context,
            keywords,
            article_length,
            format,
            writing_style,
            tone,
            user_category,
            depth_of_content,
            structure,
            references,
            num_examples,
            conclusion_type,
            ai_model,
            content_string,
            url_content,
            generated_article,
            user_comments,
            timestamp,
        )
        print(save_message)

    return reference_formatted, history


def create_interface():
    with gr.Blocks(
        theme=gr.themes.Default(
            primary_hue=gr.themes.colors.pink, secondary_hue=gr.themes.colors.yellow, neutral_hue=gr.themes.colors.gray
        ),
        css="""
            .input-highlight-pink block_label {background-color: #008080}
            """,
    ) as demo:
        history = gr.State([])
        today = date.today()
        # dd/mm/YY
        d1 = today.strftime("%d/%B/%Y")
        d1 = d1.split("/")
        gr.Markdown("# Polygraf AI Content Writer", elem_classes="text-center text-3xl mb-6")

        with gr.Row():
            with gr.Column(scale=2):
                with gr.Group():
                    gr.Markdown("## Article Configuration", elem_classes="text-xl mb-4")
                    input_role = gr.Textbox(label="I am a", placeholder="Enter your role", value="Student")
                    input_topic = gr.Textbox(
                        label="Topic",
                        placeholder="Enter the main topic of your article",
                        elem_classes="input-highlight-pink",
                    )
                    input_context = gr.Textbox(
                        label="Context",
                        placeholder="Provide some context for your topic",
                        elem_classes="input-highlight-pink",
                    )
                    input_keywords = gr.Textbox(
                        label="Keywords",
                        placeholder="Enter comma-separated keywords",
                        elem_classes="input-highlight-yellow",
                    )

                    with gr.Row():
                        input_format = gr.Dropdown(
                            choices=[
                                "Article",
                                "Essay",
                                "Blog post",
                                "Report",
                                "Research paper",
                                "News article",
                                "White paper",
                                "Email",
                                "LinkedIn post",
                                "X (Twitter) post",
                                "Instagram Video Content",
                                "TikTok Video Content",
                                "Facebook post",
                            ],
                            value="Article",
                            label="Format",
                            elem_classes="input-highlight-turquoise",
                        )

                    input_length = gr.Slider(
                        minimum=50,
                        maximum=5000,
                        step=50,
                        value=300,
                        label="Article Length",
                        elem_classes="input-highlight-pink",
                    )

                    with gr.Row():
                        input_writing_style = gr.Dropdown(
                            choices=[
                                "Formal",
                                "Informal",
                                "Technical",
                                "Conversational",
                                "Journalistic",
                                "Academic",
                                "Creative",
                            ],
                            value="Formal",
                            label="Writing Style",
                            elem_classes="input-highlight-yellow",
                        )
                        input_tone = gr.Dropdown(
                            choices=["Friendly", "Professional", "Neutral", "Enthusiastic", "Skeptical", "Humorous"],
                            value="Professional",
                            label="Tone",
                            elem_classes="input-highlight-turquoise",
                        )

                    input_user_category = gr.Dropdown(
                        choices=[
                            "Students",
                            "Professionals",
                            "Researchers",
                            "General Public",
                            "Policymakers",
                            "Entrepreneurs",
                        ],
                        value="General Public",
                        label="Target Audience",
                        elem_classes="input-highlight-pink",
                    )
                    input_depth = gr.Dropdown(
                        choices=[
                            "Surface-level overview",
                            "Moderate analysis",
                            "In-depth research",
                            "Comprehensive study",
                        ],
                        value="Moderate analysis",
                        label="Depth of Content",
                        elem_classes="input-highlight-yellow",
                    )
                    input_structure = gr.Dropdown(
                        choices=[
                            "Introduction, Body, Conclusion",
                            "Abstract, Introduction, Methods, Results, Discussion, Conclusion",
                            "Executive Summary, Problem Statement, Analysis, Recommendations, Conclusion",
                            "Introduction, Literature Review, Methodology, Findings, Analysis, Conclusion",
                            "Plain Text",
                        ],
                        value="Introduction, Body, Conclusion",
                        label="Structure",
                        elem_classes="input-highlight-turquoise",
                        interactive=True,
                    )
                    input_references = gr.Dropdown(
                        choices=[
                            "Academic journals",
                            "Industry reports",
                            "Government publications",
                            "News outlets",
                            "Expert interviews",
                            "Case studies",
                        ],
                        value="News outlets",
                        label="References",
                        elem_classes="input-highlight-pink",
                    )
                    input_num_examples = gr.Dropdown(
                        choices=["1-2", "3-4", "5+"],
                        value="1-2",
                        label="Number of Examples/Case Studies",
                        elem_classes="input-highlight-yellow",
                    )
                    input_conclusion = gr.Dropdown(
                        choices=["Summary", "Call to Action", "Future Outlook", "Thought-provoking Question"],
                        value="Call to Action",
                        label="Conclusion Type",
                        elem_classes="input-highlight-turquoise",
                    )
                    gr.Markdown("# Search Options", elem_classes="text-center text-3xl mb-6")
                    google_default = False
                    with gr.Row():
                        google_search_check = gr.Checkbox(
                            label="Enable Internet Search For Recent Sources", value=google_default
                        )
                    with gr.Group(visible=google_default) as search_options:
                        with gr.Row():
                            include_sites = gr.Textbox(
                                label="Include Specific Websites",
                                placeholder="Enter comma-separated keywords",
                                elem_classes="input-highlight-yellow",
                            )
                        with gr.Row():
                            exclude_sites = gr.Textbox(
                                label="Exclude Specific Websites",
                                placeholder="Enter comma-separated keywords",
                                elem_classes="input-highlight-yellow",
                            )
                        with gr.Row():
                            domains_to_include = gr.Dropdown(
                                domain_list,
                                value=domain_list,
                                multiselect=True,
                                label="Domains To Include",
                            )
                        with gr.Row():
                            month_from = gr.Dropdown(
                                choices=months,
                                label="From Month",
                                value="January",
                                interactive=True,
                            )
                            day_from = gr.Textbox(label="From Day", value="01")
                            year_from = gr.Textbox(label="From Year", value="2000")

                        with gr.Row():
                            month_to = gr.Dropdown(
                                choices=months,
                                label="To Month",
                                value=d1[1],
                                interactive=True,
                            )
                            day_to = gr.Textbox(label="To Day", value=d1[0])
                            year_to = gr.Textbox(label="To Year", value=d1[2])

                    gr.Markdown("# Add Optional PDF Files with Information", elem_classes="text-center text-3xl mb-6")
                    pdf_file_input = gr.File(label="Upload PDF(s)", file_count="multiple", file_types=[".pdf"])
                """
                # NOTE: HIDE AI MODEL SELECTION
                with gr.Group():
                    gr.Markdown("## AI Model Configuration", elem_classes="text-xl mb-4")
                    ai_generator = gr.Dropdown(
                        choices=[
                            "OpenAI GPT 4",
                            "OpenAI GPT 4o",
                            "OpenAI GPT 4o Mini",
                            "Claude Sonnet 3.5",
                            "Gemini 1.5 Pro",
                            "LLaMA 3",
                        ],
                        value="OpenAI GPT 4o Mini",
                        label="AI Model",
                        elem_classes="input-highlight-pink",
                    )
                input_api = gr.Textbox(label="API Key", visible=False)
                ai_generator.change(update_visibility_api, ai_generator, input_api)
                """
                generate_btn = gr.Button("Generate Article", variant="primary")

            with gr.Column(scale=3):
                with gr.Tab("Text Generator"):
                    output_article = gr.Textbox(label="Generated Article", lines=20)
                    ai_comments = gr.Textbox(
                        label="Add comments to help edit generated text", interactive=True, visible=False
                    )
                    regenerate_btn = gr.Button("Regenerate Article", variant="primary", visible=False)
                    ai_detector_dropdown = gr.Radio(
                        choices=ai_check_options, label="Select AI Detector", value="Polygraf AI"
                    )
                    ai_check_btn = gr.Button("AI Check")

                    with gr.Accordion("AI Detection Results", open=True):
                        ai_check_result = gr.Label(label="AI Check Result")
                        mc_check_result = gr.Label(label="Creator Check Result")
                        highlighted_text = gr.HTML(label="Sentence Breakdown", visible=False)

                    with gr.Accordion("Advanced Humanizer Settings", open=False):
                        with gr.Row():
                            model_dropdown = gr.Radio(
                                choices=["Standard Model", "Advanced Model (Beta)"],
                                value="Advanced Model (Beta)",
                                label="Humanizer Model Version",
                            )
                        with gr.Row():
                            temperature_slider = gr.Slider(
                                minimum=0.5, maximum=2.0, step=0.1, value=1.0, label="Temperature"
                            )
                            top_k_slider = gr.Slider(minimum=0, maximum=300, step=25, value=40, label="Top k")
                        with gr.Row():
                            repetition_penalty_slider = gr.Slider(
                                minimum=1.0, maximum=2.0, step=0.1, value=1, label="Repetition Penalty"
                            )
                            length_penalty_slider = gr.Slider(
                                minimum=0.0, maximum=2.0, step=0.1, value=1.0, label="Length Penalty"
                            )

                    humanize_btn = gr.Button("Humanize")
                    # humanized_output = gr.Markdown(label="Humanized Article", value="\n\n\n\n", render=True)
                    # copy_to_input_btn = gr.Button("Copy to Input for AI Check")

                with gr.Tab("History"):
                    history_chat = gr.Chatbot(label="Generation History", height=1000)
                    clear_history_btn = gr.Button("Clear History")
                    clear_history_btn.click(clear_history, outputs=[history, history_chat])
                    """
                    # NOTE: REMOVED REFRESH BUTTON
                    refresh_button = gr.Button("Refresh History")
                    refresh_button.click(get_history, outputs=history_chat)
                    """

        def regenerate_visible(text):
            if text:
                return gr.update(visible=True)
            else:
                return gr.update(visible=False)

        def highlight_visible(text):
            if text.startswith("Polygraf"):
                return gr.update(visible=True)
            else:
                return gr.update(visible=False)

        def search_visible(toggle):
            if toggle:
                return gr.update(visible=True)
            else:
                return gr.update(visible=False)

        google_search_check.change(search_visible, inputs=google_search_check, outputs=search_options)
        ai_detector_dropdown.change(highlight_visible, inputs=ai_detector_dropdown, outputs=highlighted_text)
        output_article.change(regenerate_visible, inputs=output_article, outputs=ai_comments)
        ai_comments.change(regenerate_visible, inputs=output_article, outputs=regenerate_btn)
        ai_check_btn.click(highlight_visible, inputs=ai_detector_dropdown, outputs=highlighted_text)

        # Update the default structure based on the selected format
        # e.g. "Plain Text" for certain formats
        input_format.change(fn=update_structure, inputs=input_format, outputs=input_structure)
        model_dropdown.change(fn=update_temperature, inputs=model_dropdown, outputs=temperature_slider)

        generate_btn.click(
            fn=generate_and_format,
            inputs=[
                input_role,
                input_topic,
                input_context,
                input_keywords,
                input_length,
                input_format,
                input_writing_style,
                input_tone,
                input_user_category,
                input_depth,
                input_structure,
                input_references,
                input_num_examples,
                input_conclusion,
                # ai_generator,
                # input_api,
                google_search_check,
                year_from,
                month_from,
                day_from,
                year_to,
                month_to,
                day_to,
                domains_to_include,
                include_sites,
                exclude_sites,
                pdf_file_input,
                history,
            ],
            outputs=[output_article, history],
        )

        regenerate_btn.click(
            fn=generate_and_format,
            inputs=[
                input_role,
                input_topic,
                input_context,
                input_keywords,
                input_length,
                input_format,
                input_writing_style,
                input_tone,
                input_user_category,
                input_depth,
                input_structure,
                input_references,
                input_num_examples,
                input_conclusion,
                # ai_generator,
                # input_api,
                google_search_check,
                year_from,
                month_from,
                day_from,
                year_to,
                month_to,
                day_to,
                domains_to_include,
                pdf_file_input,
                history,
                output_article,
                include_sites,
                exclude_sites,
                ai_comments,
            ],
            outputs=[output_article, history],
        )

        ai_check_btn.click(
            fn=ai_check,
            inputs=[output_article, ai_detector_dropdown],
            outputs=[ai_check_result, highlighted_text, mc_check_result],
        )

        humanize_btn.click(
            fn=humanize,
            inputs=[
                output_article,
                model_dropdown,
                temperature_slider,
                repetition_penalty_slider,
                top_k_slider,
                length_penalty_slider,
                history,
            ],
            outputs=[output_article, history],
        )

        generate_btn.click(get_history, inputs=[history], outputs=[history_chat])
        regenerate_btn.click(get_history, inputs=[history], outputs=[history_chat])
        humanize_btn.click(get_history, inputs=[history], outputs=[history_chat])

    return demo


if __name__ == "__main__":
    demo = create_interface()
    # demo.queue(
    #     max_size=2,
    #     default_concurrency_limit=2,
    # ).launch(server_name="0.0.0.0", share=True, server_port=7890)
    demo.launch(server_name="0.0.0.0")