Spaces:

polygraf-ai
/

article_writer

Runtime error

App Files Files Community

minko186 commited on Aug 7, 2024

Commit

24a982b

2 Parent(s): 4b92a71 ca46543

Merge branch 'main' into staging + fixed url_content error

Browse files

Files changed (3) hide show

ai_generate.py +22 -18
app.py +10 -10
humanize.py +859 -103

ai_generate.py CHANGED Viewed

@@ -42,27 +42,35 @@ vertexai.init(project="proprietary-info-detection", location="us-central1")
 gemini_client = GenerativeModel("gemini-1.5-pro-001")
 claude_client = anthropic.Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY"))
 rag_llms = {
     "LLaMA 3": ChatGroq(
-        temperature=0,
         model_name="llama3-70b-8192",
     ),
     "OpenAI GPT 4o Mini": ChatOpenAI(
-        temperature=0,
         model_name="gpt-4o-mini",
     ),
     "OpenAI GPT 4o": ChatOpenAI(
-        temperature=0,
         model_name="gpt-4o",
     ),
     "OpenAI GPT 4": ChatOpenAI(
-        temperature=0,
         model_name="gpt-4-turbo",
     ),
-    "Gemini 1.5 Pro": ChatGoogleGenerativeAI(temperature=0, model="gemini-1.5-pro"),
     "Claude Sonnet 3.5": ChatAnthropic(
-        temperature=0,
         model_name="claude-3-5-sonnet-20240620",
     ),
 }
@@ -107,8 +115,8 @@ def generate_groq(text, model):
                 "content": "Please follow the instruction and write about the given topic in approximately the given number of words",
             },
         ],
-        temperature=1,
-        max_tokens=1024,
         stream=True,
         stop=None,
     )
@@ -124,8 +132,8 @@ def generate_openai(text, model, openai_client):
     response = openai_client.chat.completions.create(
         model=model,
         messages=message,
-        temperature=1,
-        max_tokens=1024,
     )
     return response.choices[0].message.content
@@ -138,9 +146,8 @@ def generate_gemini(text, model, gemini_client):
         generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
     }
     generation_config = {
-        "max_output_tokens": 1024,
-        "temperature": 1.0,
-        "top_p": 1.0,
     }
     response = gemini_client.generate_content(
         [text],
@@ -154,8 +161,8 @@ def generate_gemini(text, model, gemini_client):
 def generate_claude(text, model, claude_client):
     response = claude_client.messages.create(
         model=model,
-        max_tokens=1024,
-        temperature=1.0,
         system="You are helpful assistant.",
         messages=[{"role": "user", "content": [{"type": "text", "text": text}]}],
     )
@@ -165,9 +172,6 @@ def generate_claude(text, model, claude_client):
 def generate(text, model, path, api=None):
     if path:
         result = generate_rag(text, model, path)
-        if "references" not in result.lower():
-            result += "\n\n" + "References:"
-        result += "\n\n" + f"{path}"
         return result
     else:
         print(f"Generating text for {model}...")

 gemini_client = GenerativeModel("gemini-1.5-pro-001")
 claude_client = anthropic.Anthropic(api_key=os.environ.get("ANTHROPIC_API_KEY"))
+# For GPT-4 1 word is about 1.3 tokens.
+temperature = 1.0
+max_tokens = 2048
 rag_llms = {
     "LLaMA 3": ChatGroq(
+        temperature=temperature,
+        max_tokens=max_tokens,
         model_name="llama3-70b-8192",
     ),
     "OpenAI GPT 4o Mini": ChatOpenAI(
+        temperature=temperature,
+        max_tokens=max_tokens,
         model_name="gpt-4o-mini",
     ),
     "OpenAI GPT 4o": ChatOpenAI(
+        temperature=temperature,
+        max_tokens=max_tokens,
         model_name="gpt-4o",
     ),
     "OpenAI GPT 4": ChatOpenAI(
+        temperature=temperature,
+        max_tokens=max_tokens,
         model_name="gpt-4-turbo",
     ),
+    "Gemini 1.5 Pro": ChatGoogleGenerativeAI(temperature=temperature, max_tokens=max_tokens, model="gemini-1.5-pro"),
     "Claude Sonnet 3.5": ChatAnthropic(
+        temperature=temperature,
+        max_tokens=max_tokens,
         model_name="claude-3-5-sonnet-20240620",
     ),
 }
                 "content": "Please follow the instruction and write about the given topic in approximately the given number of words",
             },
         ],
+        temperature=temperature,
+        max_tokens=max_tokens,
         stream=True,
         stop=None,
     )
     response = openai_client.chat.completions.create(
         model=model,
         messages=message,
+        temperature=temperature,
+        max_tokens=max_tokens,
     )
     return response.choices[0].message.content
         generative_models.HarmCategory.HARM_CATEGORY_HARASSMENT: generative_models.HarmBlockThreshold.BLOCK_MEDIUM_AND_ABOVE,
     }
     generation_config = {
+        "max_output_tokens": max_tokens,
+        "temperature": temperature,
     }
     response = gemini_client.generate_content(
         [text],
 def generate_claude(text, model, claude_client):
     response = claude_client.messages.create(
         model=model,
+        max_tokens=max_tokens,
+        temperature=temperature,
         system="You are helpful assistant.",
         messages=[{"role": "user", "content": [{"type": "text", "text": text}]}],
     )
 def generate(text, model, path, api=None):
     if path:
         result = generate_rag(text, model, path)
         return result
     else:
         print(f"Generating text for {model}...")

app.py CHANGED Viewed

@@ -2,20 +2,18 @@
 nohup python3 app.py &
 """
-import openai
-import gradio as gr
-from typing import Dict, List
 import re
 import requests
 import language_tool_python
 import torch
-from gradio_client import Client
 from transformers import GPT2LMHeadModel, GPT2TokenizerFast
 from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
-from scipy.special import softmax
-from collections import defaultdict
-import nltk
-from datetime import date
 from utils import remove_special_characters
 from plagiarism import google_search, months, domain_list, build_date
@@ -37,6 +35,9 @@ tokenizers = {
     "Polygraf AI (Advanced Model)": AutoTokenizer.from_pretrained("polygraf-ai/bc_combined_3sent"),
 }
 # Function to move model to the appropriate device
 def to_device(model):
@@ -101,7 +102,6 @@ def ends_with_references(text):
 def format_and_correct_language_check(text: str) -> str:
-    tool = language_tool_python.LanguageTool("en-US")
     return tool.correct(text)
@@ -627,7 +627,7 @@ def create_interface():
                     )
                     gr.Markdown("# Search Options", elem_classes="text-center text-3xl mb-6")
                     with gr.Row():
-                        google_search_check = gr.Checkbox(label="Enable Google Search For Recent Sources", value=True)
                     with gr.Group(visible=True) as search_options:
                         with gr.Row():
                             include_sites = gr.Textbox(

 nohup python3 app.py &
 """
 import re
 import requests
+from typing import Dict
+from collections import defaultdict
+from datetime import date
+import gradio as gr
+from scipy.special import softmax
 import language_tool_python
+import nltk
 import torch
 from transformers import GPT2LMHeadModel, GPT2TokenizerFast
 from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
 from utils import remove_special_characters
 from plagiarism import google_search, months, domain_list, build_date
     "Polygraf AI (Advanced Model)": AutoTokenizer.from_pretrained("polygraf-ai/bc_combined_3sent"),
 }
+# grammar correction tool
+tool = language_tool_python.LanguageTool("en-US")
 # Function to move model to the appropriate device
 def to_device(model):
 def format_and_correct_language_check(text: str) -> str:
     return tool.correct(text)
                     )
                     gr.Markdown("# Search Options", elem_classes="text-center text-3xl mb-6")
                     with gr.Row():
+                        google_search_check = gr.Checkbox(label="Enable Google Search For Recent Sources", value=False)
                     with gr.Group(visible=True) as search_options:
                         with gr.Row():
                             include_sites = gr.Textbox(

humanize.py CHANGED Viewed

@@ -1,108 +1,864 @@
-import torch
-from nltk import sent_tokenize
-import nltk
-from tqdm import tqdm
 import gradio as gr
-from transformers import T5ForConditionalGeneration, T5Tokenizer
-nltk.download("punkt")
-# autodetect the available device
-GPU_IDX = 1  # which GPU to use
-if torch.cuda.is_available():
-    num_gpus = torch.cuda.device_count()
-    print(f"Number of available GPUs: {num_gpus}")
-    assert GPU_IDX < num_gpus, f"GPU index {GPU_IDX} not available."
-    device = torch.device(f"cuda:{GPU_IDX}")
-    print(f"Using GPU: {GPU_IDX}")
-else:
-    print("CUDA is not available. Using CPU instead.")
-    device = torch.device("cpu")
-# Configuration for models and their adapters
-model_config = {
-    "Base Model": "polygraf-ai/poly-humanizer-base",
-    "Large Model": "polygraf-ai/poly-humanizer-large",
-    "XL Model": {
-        "path": "google/flan-t5-xl",
-        "adapters": {
-            "XL Model Adapter": "polygraf-ai/poly-humanizer-XL-adapter",
-            # "XL Law Model Adapter": "polygraf-ai/poly-humanizer-XL-law-adapter",
-            # "XL Marketing Model Adapter": "polygraf-ai/marketing-cleaned-13K-grad-acum-4-full",
-            # "XL Child Style Model Adapter": "polygraf-ai/poly-humanizer-XL-children-adapter-checkpoint-4000",
-        },
-    },
 }
-# cache the base models, tokenizers, and adapters
-models, tokenizers = {}, {}
-for name, config in model_config.items():
-    path = config if isinstance(config, str) else config["path"]
-    # initialize model and tokenizer
-    model = T5ForConditionalGeneration.from_pretrained(path, torch_dtype=torch.bfloat16).to(device)
-    models[name] = model
-    tokenizers[name] = T5Tokenizer.from_pretrained(path)
-    # load all avalable adapters, each being additional roughly 150M parameters
-    if isinstance(config, dict) and "adapters" in config:
-        for adapter_name, adapter_path in config["adapters"].items():
-            model.load_adapter(adapter_path, adapter_name=adapter_name)
-            print(f"Loaded adapter: {adapter_name}, Num. params: {model.num_parameters()}")
-def paraphrase_text(
-    text,
-    progress=gr.Progress(),
-    model_name="Base Model",
-    temperature=1.2,
-    repetition_penalty=1.0,
-    top_k=50,
-    length_penalty=1.0,
-):
-    progress(0, desc="Starting to Humanize")
-    progress(0.05)
-    # select the model, tokenizer and adapter
-    if "XL" in model_name:  # dynamic adapter load/unload for XL models
-        # all adapter models use the XL model as the base
-        tokenizer, model = tokenizers["XL Model"], models["XL Model"]
-        # set the adapter if it's not already set
-        if model.active_adapters() != [f"{model_name} Adapter"]:
-            model.set_adapter(f"{model_name} Adapter")
-            print(f"Using adapter: {model_name} Adapter")
-    else:
-        tokenizer = tokenizers[model_name]
-        model = models[model_name]
-    # Split the text into paragraphs
     paragraphs = text.split("\n")
-    humanized_paragraphs = []
-    for paragraph in progress.tqdm(paragraphs, desc="Humanizing"):
-        # paraphrase each chunk of text
-        sentences = sent_tokenize(paragraph)
-        paraphrases = []
-        for sentence in sentences:
-            sentence = sentence.strip()
-            if len(sentence) == 0:
-                continue
-            inputs = tokenizer(
-                "Please paraphrase this sentence: " + sentence,
-                return_tensors="pt",
-            ).to(device)
-            outputs = model.generate(
-                **inputs,
-                do_sample=True,
-                temperature=temperature,
-                repetition_penalty=repetition_penalty,
-                max_length=128,
-                top_k=top_k,
-                length_penalty=length_penalty,
-            )
-            paraphrased_sentence = tokenizer.decode(outputs[0], skip_special_tokens=True)
-            paraphrases.append(paraphrased_sentence)
-            print(f"\nOriginal: {sentence}")
-            print(f"Paraphrased: {paraphrased_sentence}")
-        combined_paraphrase = " ".join(paraphrases)
-        humanized_paragraphs.append(combined_paraphrase)
-    humanized_text = "\n".join(humanized_paragraphs)
-    return humanized_text

+"""
+nohup python3 app.py &
+"""
+import re
+import requests
+from typing import Dict
+from collections import defaultdict
+from datetime import date
 import gradio as gr
+from scipy.special import softmax
+import language_tool_python
+import nltk
+import torch
+from transformers import GPT2LMHeadModel, GPT2TokenizerFast
+from transformers import AutoTokenizer, AutoModelForSequenceClassification, pipeline
+from utils import remove_special_characters
+from plagiarism import google_search, months, domain_list, build_date
+from humanize import paraphrase_text, device
+from ai_generate import generate
+print(f"Using device: {device}")
+models = {
+    "Polygraf AI (Base Model)": AutoModelForSequenceClassification.from_pretrained(
+        "polygraf-ai/bc-roberta-openai-2sent"
+    ).to(device),
+    "Polygraf AI (Advanced Model)": AutoModelForSequenceClassification.from_pretrained(
+        "polygraf-ai/bc_combined_3sent"
+    ).to(device),
+}
+tokenizers = {
+    "Polygraf AI (Base Model)": AutoTokenizer.from_pretrained("polygraf-ai/bc-roberta-openai-2sent"),
+    "Polygraf AI (Advanced Model)": AutoTokenizer.from_pretrained("polygraf-ai/bc_combined_3sent"),
 }
+# grammar correction tool
+tool = language_tool_python.LanguageTool("en-US")
+# Function to move model to the appropriate device
+def to_device(model):
+    return model.to(device)
+def copy_to_input(text):
+    return text
+def remove_bracketed_numbers(text):
+    pattern = r"^\[\d+\]"
+    cleaned_text = re.sub(pattern, "", text)
+    return cleaned_text
+def clean_text(text: str) -> str:
+    paragraphs = text.split("\n\n")
+    cleaned_paragraphs = []
+    for paragraph in paragraphs:
+        cleaned = re.sub(r"\s+", " ", paragraph).strip()
+        cleaned = re.sub(r"(?<=\.) ([a-z])", lambda x: x.group(1).upper(), cleaned)
+        cleaned_paragraphs.append(cleaned)
+    return "\n".join(cleaned_paragraphs)
+def split_text_from_refs(text: str, sep="\n"):
+    lines = text.split("\n")
+    references = []
+    article_text = []
+    index_pattern = re.compile(r"\[(\d+)\]")
+    in_references = False
+    for line in lines:
+        if line.strip().lower() == "references" or line.strip().lower() == "references:":
+            in_references = True
+            continue
+        if line.strip().lower().startswith("references:"):
+            in_references = True
+        if in_references:
+            matches = index_pattern.split(line)
+            for match in matches:
+                if match.strip() and not match.isdigit() and not match.strip().lower().startswith("references:"):
+                    references.append(match.strip())
+        else:
+            article_text.append(line)
+    formatted_refs = []
+    for i, ref in enumerate(references, 1):
+        ref = remove_bracketed_numbers(ref)
+        formatted_refs.append(f"[{i}] {ref}{sep}")
+    return "\n\n".join(article_text), f"{sep}{sep}References:{sep}" + f"{sep}".join(formatted_refs)
+def ends_with_references(text):
+    # Define a regular expression pattern for variations of "References:"
+    pattern = re.compile(r"\b[Rr]eferences:\s*$", re.IGNORECASE | re.MULTILINE)
+    # Check if the text ends with any form of "References:"
+    return bool(pattern.search(text.strip()))
+def format_and_correct_language_check(text: str) -> str:
+    return tool.correct(text)
+def predict(model, tokenizer, text):
+    text = remove_special_characters(text)
+    bc_token_size = 256
+    with torch.no_grad():
+        model.eval()
+        tokens = tokenizer(
+            text,
+            padding="max_length",
+            truncation=True,
+            max_length=bc_token_size,
+            return_tensors="pt",
+        ).to(device)
+        output = model(**tokens)
+        output_norm = softmax(output.logits.detach().cpu().numpy(), 1)[0]
+        output_norm = {"HUMAN": output_norm[0], "AI": output_norm[1]}
+        return output_norm
+def ai_generated_test(text, model="BC Original"):
+    return predict(models[model], tokenizers[model], text)
+def detection_polygraf(text, model="BC Original"):
+    # sentences = split_into_sentences(text)
+    sentences = nltk.sent_tokenize(text)
+    num_sentences = len(sentences)
+    scores = defaultdict(list)
+    overall_scores = []
+    # Process each chunk of 3 sentences and store the score for each sentence in the chunk
+    for i in range(num_sentences):
+        chunk = " ".join(sentences[i : i + 3])
+        if chunk:
+            # result = classifier(chunk)
+            result = ai_generated_test(chunk, model)
+            score = result["AI"]
+            for j in range(i, min(i + 3, num_sentences)):
+                scores[j].append(score)
+    # Calculate the average score for each sentence and apply color coding
     paragraphs = text.split("\n")
+    paragraphs = [s for s in paragraphs if s.strip()]
+    colored_paragraphs = []
+    i = 0
+    for paragraph in paragraphs:
+        temp_sentences = nltk.sent_tokenize(paragraph)
+        colored_sentences = []
+        for sentence in temp_sentences:
+            if scores[i]:
+                avg_score = sum(scores[i]) / len(scores[i])
+                if avg_score >= 0.65:
+                    colored_sentence = f"<span style='background-color:red;'>{sentence}</span>"
+                else:
+                    colored_sentence = sentence
+                colored_sentences.append(colored_sentence)
+                overall_scores.append(avg_score)
+            i = i + 1
+        combined_sentences = " ".join(colored_sentences)
+        colored_paragraphs.append(combined_sentences)
+    overall_score = sum(overall_scores) / len(overall_scores)
+    overall_score = {"HUMAN": 1 - overall_score, "AI": overall_score}
+    return overall_score, "<br><br>".join(colored_paragraphs)
+ai_check_options = [
+    "Polygraf AI (Base Model)",
+    "Polygraf AI (Advanced Model)",
+]
+def ai_generated_test_sapling(text: str) -> Dict:
+    response = requests.post(
+        "https://api.sapling.ai/api/v1/aidetect", json={"key": "60L9BPSVPIIOEZM0CD1DQWRBPJIUR7SB", "text": f"{text}"}
+    )
+    return {"AI": response.json()["score"], "HUMAN": 1 - response.json()["score"]}
+class GPT2PPL:
+    def __init__(self):
+        self.device = device
+        self.model = to_device(GPT2LMHeadModel.from_pretrained("gpt2"))
+        self.tokenizer = GPT2TokenizerFast.from_pretrained("gpt2")
+    def __call__(self, text):
+        encodings = self.tokenizer(text, return_tensors="pt")
+        encodings = {k: v.to(self.device) for k, v in encodings.items()}
+        max_length = self.model.config.n_positions
+        stride = 512
+        seq_len = encodings.input_ids.size(1)
+        nlls = []
+        for i in range(0, seq_len, stride):
+            begin_loc = max(i + stride - max_length, 0)
+            end_loc = min(i + stride, seq_len)
+            trg_len = end_loc - i
+            input_ids = encodings.input_ids[:, begin_loc:end_loc].to(self.device)
+            target_ids = input_ids.clone()
+            target_ids[:, :-trg_len] = -100
+            with torch.no_grad():
+                outputs = self.model(input_ids, labels=target_ids)
+                neg_log_likelihood = outputs.loss * trg_len
+            nlls.append(neg_log_likelihood)
+        ppl = torch.exp(torch.stack(nlls).sum() / end_loc)
+        return {"AI": float(ppl), "HUMAN": 1 - float(ppl)}
+def ai_generated_test_gptzero(text):
+    gptzero_model = GPT2PPL()
+    result = gptzero_model(text)
+    return result, None
+def highlighter_polygraf(text, model="Polygraf AI (Base Model)"):
+    body, references = split_text_from_refs(text, "<br>")
+    score, text = detection_polygraf(text=body, model=model)
+    text = text + "<br>" + references
+    return score, text
+def ai_check(text: str, option: str):
+    if option.startswith("Polygraf AI"):
+        return highlighter_polygraf(text, option)
+    elif option == "Sapling AI":
+        return ai_generated_test_sapling(text)
+    elif option == "GPTZero":
+        return ai_generated_test_gptzero(text)
+    else:
+        return highlighter_polygraf(text, option)
+def generate_prompt(settings: Dict[str, str]) -> str:
+    prompt = f"""
+    I am a {settings['role']}
+    Write a {settings['article_length']} words (around) {settings['format']} on {settings['topic']}.
+    Style and Tone:
+    - Writing style: {settings['writing_style']}
+    - Tone: {settings['tone']}
+    - Target audience: {settings['user_category']}
+    Content:
+    - Depth: {settings['depth_of_content']}
+    - Structure: {', '.join(settings['structure'])}
+    Keywords to incorporate:
+    {', '.join(settings['keywords'])}
+    Additional requirements:
+    - Don't start with "Here is a...", start with the requested text directly
+    - Include {settings['num_examples']} relevant examples or case studies
+    - Incorporate data or statistics from {', '.join(settings['references'])}
+    - End with a {settings['conclusion_type']} conclusion
+    - Add a "References" section in the format "References:\n" at the end with at least 3 credible sources, formatted as [1], [2], etc. with each source on their own line
+    - Do not make any headline, title bold.
+    {settings['sources']}
+    Ensure proper paragraph breaks for better readability.
+    Avoid any references to artificial intelligence, language models, or the fact that this is generated by an AI, and do not mention something like here is the article etc.
+    """
+    return prompt
+def regenerate_prompt(settings: Dict[str, str]) -> str:
+    prompt = f"""
+    I am a {settings['role']}
+    "{settings['generated_article']}"
+    Edit the given text based on user comments.
+    Comments:
+    - Don't start with "Here is a...", start with the requested text directly
+    - {settings['user_comments']}
+    - The original content should not be changed. Make minor modifications based on user comments above.
+    - Keep the references the same as the given text in the same format.
+    - Do not make any headline, title bold.
+    {settings['sources']}
+    Ensure proper paragraph breaks for better readability.
+    Avoid any references to artificial intelligence, language models, or the fact that this is generated by an AI, and do not mention something like here is the article etc.
+    """
+    return prompt
+def generate_article(
+    input_role: str,
+    topic: str,
+    keywords: str,
+    article_length: str,
+    format: str,
+    writing_style: str,
+    tone: str,
+    user_category: str,
+    depth_of_content: str,
+    structure: str,
+    references: str,
+    num_examples: str,
+    conclusion_type: str,
+    ai_model: str,
+    content_string: str,
+    # api_key: str = None,
+    pdf_file_input=None,
+    generated_article: str = None,
+    user_comments: str = None,
+) -> str:
+    settings = {
+        "role": input_role,
+        "topic": topic,
+        "keywords": [k.strip() for k in keywords.split(",")],
+        "article_length": article_length,
+        "format": format,
+        "writing_style": writing_style,
+        "tone": tone,
+        "user_category": user_category,
+        "depth_of_content": depth_of_content,
+        "structure": [s.strip() for s in structure.split(",")],
+        "references": [r.strip() for r in references.split(",")],
+        "num_examples": num_examples,
+        "conclusion_type": conclusion_type,
+        "sources": content_string,
+        "generated_article": generated_article,
+        "user_comments": user_comments,
+    }
+    if generated_article:
+        prompt = regenerate_prompt(settings)
+    else:
+        prompt = generate_prompt(settings)
+    print("Generated Prompt...\n", prompt)
+    article = generate(
+        prompt,
+        ai_model,
+        pdf_file_input,  # api_key
+    )
+    return clean_text(article)
+def humanize(
+    text: str,
+    model: str,
+    temperature: float = 1.2,
+    repetition_penalty: float = 1,
+    top_k: int = 50,
+    length_penalty: float = 1,
+) -> str:
+    body, references = split_text_from_refs(text)
+    result = paraphrase_text(
+        text=body,
+        model_name=model,
+        temperature=temperature,
+        repetition_penalty=repetition_penalty,
+        top_k=top_k,
+        length_penalty=length_penalty,
+    )
+    result = result + "\n\n" + references
+    return format_and_correct_language_check(result)
+def update_visibility_api(model: str):
+    if model in ["OpenAI GPT 3.5", "OpenAI GPT 4"]:
+        return gr.update(visible=True)
+    else:
+        return gr.update(visible=False)
+def format_references(text: str) -> str:
+    lines = text.split("\n")
+    references = []
+    article_text = []
+    index_pattern = re.compile(r"\[(\d+)\]")
+    in_references = False
+    for line in lines:
+        if line.strip().lower() == "references" or line.strip().lower() == "references:":
+            in_references = True
+            continue
+        if line.strip().lower().startswith("references:"):
+            in_references = True
+        if in_references:
+            matches = index_pattern.split(line)
+            for match in matches:
+                if match.strip() and not match.isdigit() and not match.strip().lower().startswith("references:"):
+                    references.append(match.strip())
+        else:
+            article_text.append(line)
+    formatted_refs = []
+    for i, ref in enumerate(references, 1):
+        ref = remove_bracketed_numbers(ref)
+        formatted_refs.append(f"[{i}] {ref}\n")
+    return "\n\n".join(article_text) + "\n\nReferences:\n" + "\n".join(formatted_refs)
+def generate_and_format(
+    input_role,
+    topic,
+    keywords,
+    article_length,
+    format,
+    writing_style,
+    tone,
+    user_category,
+    depth_of_content,
+    structure,
+    references,
+    num_examples,
+    conclusion_type,
+    ai_model,
+    # api_key,
+    google_search_check,
+    year_from,
+    month_from,
+    day_from,
+    year_to,
+    month_to,
+    day_to,
+    domains_to_include,
+    include_sites,
+    exclude_sites,
+    pdf_file_input,
+    generated_article: str = None,
+    user_comments: str = None,
+):
+    content_string = ""
+    url_content = None
+    if google_search_check:
+        date_from = build_date(year_from, month_from, day_from)
+        date_to = build_date(year_to, month_to, day_to)
+        sorted_date = f"date:r:{date_from}:{date_to}"
+        final_query = topic
+        if include_sites:
+            site_queries = [f"site:{site.strip()}" for site in include_sites.split(",")]
+            final_query += " " + " OR ".join(site_queries)
+        if exclude_sites:
+            exclude_queries = [f"-site:{site.strip()}" for site in exclude_sites.split(",")]
+            final_query += " " + " ".join(exclude_queries)
+        print(f"Google Search Query: {final_query}")
+        url_content = google_search(final_query, sorted_date, domains_to_include)
+        content_string = "\n".join(
+            f"{url.strip()}: \n{content.strip()[:2000]}" for url, content in url_content.items()
+        )
+        content_string = (
+            "Use the trusted information here from the URLs and add them as References:\n" + content_string
+        )
+    article = generate_article(
+        input_role,
+        topic,
+        keywords,
+        article_length,
+        format,
+        writing_style,
+        tone,
+        user_category,
+        depth_of_content,
+        structure,
+        references,
+        num_examples,
+        conclusion_type,
+        ai_model,
+        content_string,
+        # api_key,
+        pdf_file_input,
+        generated_article,
+        user_comments,
+    )
+    if ends_with_references(article) and url_content is not None:
+        for url in url_content.keys():
+            article += f"\n{url}"
+    return format_references(article)
+def create_interface():
+    with gr.Blocks(
+        theme=gr.themes.Default(
+            primary_hue=gr.themes.colors.pink, secondary_hue=gr.themes.colors.yellow, neutral_hue=gr.themes.colors.gray
+        ),
+        css="""
+            .input-highlight-pink block_label {background-color: #008080}
+            """,
+    ) as demo:
+        today = date.today()
+        # dd/mm/YY
+        d1 = today.strftime("%d/%B/%Y")
+        d1 = d1.split("/")
+        gr.Markdown("# Polygraf AI Content Writer", elem_classes="text-center text-3xl mb-6")
+        with gr.Row():
+            with gr.Column(scale=2):
+                with gr.Group():
+                    gr.Markdown("## Article Configuration", elem_classes="text-xl mb-4")
+                    input_role = gr.Textbox(label="I am a", placeholder="Enter your role", value="Student")
+                    input_topic = gr.Textbox(
+                        label="Topic",
+                        placeholder="Enter the main topic of your article",
+                        elem_classes="input-highlight-pink",
+                    )
+                    input_keywords = gr.Textbox(
+                        label="Keywords",
+                        placeholder="Enter comma-separated keywords",
+                        elem_classes="input-highlight-yellow",
+                    )
+                    with gr.Row():
+                        input_format = gr.Dropdown(
+                            choices=[
+                                "Article",
+                                "Essay",
+                                "Blog post",
+                                "Report",
+                                "Research paper",
+                                "News article",
+                                "White paper",
+                                "LinkedIn post",
+                                "X (Twitter) post",
+                                "Instagram Video Content",
+                                "TikTok Video Content",
+                                "Facebook post",
+                            ],
+                            value="Article",
+                            label="Format",
+                            elem_classes="input-highlight-turquoise",
+                        )
+                    input_length = gr.Slider(
+                        minimum=50,
+                        maximum=5000,
+                        step=50,
+                        value=300,
+                        label="Article Length",
+                        elem_classes="input-highlight-pink",
+                    )
+                    with gr.Row():
+                        input_writing_style = gr.Dropdown(
+                            choices=[
+                                "Formal",
+                                "Informal",
+                                "Technical",
+                                "Conversational",
+                                "Journalistic",
+                                "Academic",
+                                "Creative",
+                            ],
+                            value="Formal",
+                            label="Writing Style",
+                            elem_classes="input-highlight-yellow",
+                        )
+                        input_tone = gr.Dropdown(
+                            choices=["Friendly", "Professional", "Neutral", "Enthusiastic", "Skeptical", "Humorous"],
+                            value="Professional",
+                            label="Tone",
+                            elem_classes="input-highlight-turquoise",
+                        )
+                    input_user_category = gr.Dropdown(
+                        choices=[
+                            "Students",
+                            "Professionals",
+                            "Researchers",
+                            "General Public",
+                            "Policymakers",
+                            "Entrepreneurs",
+                        ],
+                        value="General Public",
+                        label="Target Audience",
+                        elem_classes="input-highlight-pink",
+                    )
+                    input_depth = gr.Dropdown(
+                        choices=[
+                            "Surface-level overview",
+                            "Moderate analysis",
+                            "In-depth research",
+                            "Comprehensive study",
+                        ],
+                        value="Moderate analysis",
+                        label="Depth of Content",
+                        elem_classes="input-highlight-yellow",
+                    )
+                    input_structure = gr.Dropdown(
+                        choices=[
+                            "Introduction, Body, Conclusion",
+                            "Abstract, Introduction, Methods, Results, Discussion, Conclusion",
+                            "Executive Summary, Problem Statement, Analysis, Recommendations, Conclusion",
+                            "Introduction, Literature Review, Methodology, Findings, Analysis, Conclusion",
+                        ],
+                        value="Introduction, Body, Conclusion",
+                        label="Structure",
+                        elem_classes="input-highlight-turquoise",
+                    )
+                    input_references = gr.Dropdown(
+                        choices=[
+                            "Academic journals",
+                            "Industry reports",
+                            "Government publications",
+                            "News outlets",
+                            "Expert interviews",
+                            "Case studies",
+                        ],
+                        value="News outlets",
+                        label="References",
+                        elem_classes="input-highlight-pink",
+                    )
+                    input_num_examples = gr.Dropdown(
+                        choices=["1-2", "3-4", "5+"],
+                        value="1-2",
+                        label="Number of Examples/Case Studies",
+                        elem_classes="input-highlight-yellow",
+                    )
+                    input_conclusion = gr.Dropdown(
+                        choices=["Summary", "Call to Action", "Future Outlook", "Thought-provoking Question"],
+                        value="Call to Action",
+                        label="Conclusion Type",
+                        elem_classes="input-highlight-turquoise",
+                    )
+                    gr.Markdown("# Search Options", elem_classes="text-center text-3xl mb-6")
+                    with gr.Row():
+                        google_search_check = gr.Checkbox(label="Enable Google Search For Recent Sources", value=False)
+                    with gr.Group(visible=True) as search_options:
+                        with gr.Row():
+                            include_sites = gr.Textbox(
+                                label="Include Specific Websites",
+                                placeholder="Enter comma-separated keywords",
+                                elem_classes="input-highlight-yellow",
+                            )
+                        with gr.Row():
+                            exclude_sites = gr.Textbox(
+                                label="Exclude Specific Websites",
+                                placeholder="Enter comma-separated keywords",
+                                elem_classes="input-highlight-yellow",
+                            )
+                        with gr.Row():
+                            domains_to_include = gr.Dropdown(
+                                domain_list,
+                                value=domain_list,
+                                multiselect=True,
+                                label="Domains To Include",
+                            )
+                        with gr.Row():
+                            month_from = gr.Dropdown(
+                                choices=months,
+                                label="From Month",
+                                value="January",
+                                interactive=True,
+                            )
+                            day_from = gr.Textbox(label="From Day", value="01")
+                            year_from = gr.Textbox(label="From Year", value="2000")
+                        with gr.Row():
+                            month_to = gr.Dropdown(
+                                choices=months,
+                                label="To Month",
+                                value=d1[1],
+                                interactive=True,
+                            )
+                            day_to = gr.Textbox(label="To Day", value=d1[0])
+                            year_to = gr.Textbox(label="To Year", value=d1[2])
+                    gr.Markdown("# Add Optional PDF File with Information", elem_classes="text-center text-3xl mb-6")
+                    pdf_file_input = gr.File(label="Upload PDF")
+                with gr.Group():
+                    gr.Markdown("## AI Model Configuration", elem_classes="text-xl mb-4")
+                    ai_generator = gr.Dropdown(
+                        choices=[
+                            "OpenAI GPT 4",
+                            "OpenAI GPT 4o",
+                            "OpenAI GPT 4o Mini",
+                            "Claude Sonnet 3.5",
+                            "Gemini 1.5 Pro",
+                            "LLaMA 3",
+                        ],
+                        value="OpenAI GPT 4o Mini",
+                        label="AI Model",
+                        elem_classes="input-highlight-pink",
+                    )
+                    # input_api = gr.Textbox(label="API Key", visible=False)
+                    # ai_generator.change(update_visibility_api, ai_generator, input_api)
+                generate_btn = gr.Button("Generate Article", variant="primary")
+                with gr.Accordion("Advanced Humanizer Settings", open=False):
+                    with gr.Row():
+                        model_dropdown = gr.Radio(
+                            choices=[
+                                "Base Model",
+                                "Large Model",
+                                "XL Model",
+                                # "XL Law Model",
+                                # "XL Marketing Model",
+                                # "XL Child Style Model",
+                            ],
+                            value="Large Model",
+                            label="Humanizer Model Version",
+                        )
+                    with gr.Row():
+                        temperature_slider = gr.Slider(
+                            minimum=0.5, maximum=2.0, step=0.1, value=1.3, label="Temperature"
+                        )
+                        top_k_slider = gr.Slider(minimum=0, maximum=300, step=25, value=50, label="Top k")
+                    with gr.Row():
+                        repetition_penalty_slider = gr.Slider(
+                            minimum=1.0, maximum=2.0, step=0.1, value=1, label="Repetition Penalty"
+                        )
+                        length_penalty_slider = gr.Slider(
+                            minimum=0.0, maximum=2.0, step=0.1, value=1.0, label="Length Penalty"
+                        )
+            with gr.Column(scale=3):
+                output_article = gr.Textbox(label="Generated Article", lines=20)
+                ai_comments = gr.Textbox(
+                    label="Add comments to help edit generated text", interactive=True, visible=False
+                )
+                regenerate_btn = gr.Button("Regenerate Article", variant="primary", visible=False)
+                ai_detector_dropdown = gr.Radio(
+                    choices=ai_check_options, label="Select AI Detector", value="Polygraf AI"
+                )
+                ai_check_btn = gr.Button("AI Check")
+                with gr.Accordion("AI Detection Results", open=True):
+                    ai_check_result = gr.Label(label="AI Check Result")
+                    highlighted_text = gr.HTML(label="Sentence Breakdown", visible=False)
+                humanize_btn = gr.Button("Humanize")
+                # humanized_output = gr.Textbox(label="Humanized Article", lines=20, elem_classes=["custom-textbox"])
+                humanized_output = gr.Markdown(label="Humanized Article", value="\n\n\n\n", render=True)
+                copy_to_input_btn = gr.Button("Copy to Input for AI Check")
+        def regenerate_visible(text):
+            if text:
+                return gr.update(visible=True)
+            else:
+                return gr.update(visible=False)
+        def highlight_visible(text):
+            if text.startswith("Polygraf"):
+                return gr.update(visible=True)
+            else:
+                return gr.update(visible=False)
+        def search_visible(toggle):
+            if toggle:
+                return gr.update(visible=True)
+            else:
+                return gr.update(visible=False)
+        google_search_check.change(search_visible, inputs=google_search_check, outputs=search_options)
+        ai_detector_dropdown.change(highlight_visible, inputs=ai_detector_dropdown, outputs=highlighted_text)
+        output_article.change(regenerate_visible, inputs=output_article, outputs=ai_comments)
+        ai_comments.change(regenerate_visible, inputs=output_article, outputs=regenerate_btn)
+        ai_check_btn.click(highlight_visible, inputs=ai_detector_dropdown, outputs=highlighted_text)
+        generate_btn.click(
+            fn=generate_and_format,
+            inputs=[
+                input_role,
+                input_topic,
+                input_keywords,
+                input_length,
+                input_format,
+                input_writing_style,
+                input_tone,
+                input_user_category,
+                input_depth,
+                input_structure,
+                input_references,
+                input_num_examples,
+                input_conclusion,
+                ai_generator,
+                # input_api,
+                google_search_check,
+                year_from,
+                month_from,
+                day_from,
+                year_to,
+                month_to,
+                day_to,
+                domains_to_include,
+                include_sites,
+                exclude_sites,
+                pdf_file_input,
+            ],
+            outputs=[output_article],
+        )
+        regenerate_btn.click(
+            fn=generate_and_format,
+            inputs=[
+                input_role,
+                input_topic,
+                input_keywords,
+                input_length,
+                input_format,
+                input_writing_style,
+                input_tone,
+                input_user_category,
+                input_depth,
+                input_structure,
+                input_references,
+                input_num_examples,
+                input_conclusion,
+                ai_generator,
+                # input_api,
+                google_search_check,
+                year_from,
+                month_from,
+                day_from,
+                year_to,
+                month_to,
+                day_to,
+                domains_to_include,
+                pdf_file_input,
+                output_article,
+                include_sites,
+                exclude_sites,
+                ai_comments,
+            ],
+            outputs=[output_article],
+        )
+        ai_check_btn.click(
+            fn=ai_check,
+            inputs=[output_article, ai_detector_dropdown],
+            outputs=[ai_check_result, highlighted_text],
+        )
+        humanize_btn.click(
+            fn=humanize,
+            inputs=[
+                output_article,
+                model_dropdown,
+                temperature_slider,
+                repetition_penalty_slider,
+                top_k_slider,
+                length_penalty_slider,
+            ],
+            outputs=[humanized_output],
+        )
+        copy_to_input_btn.click(
+            fn=copy_to_input,
+            inputs=[humanized_output],
+            outputs=[output_article],
+        )
+    return demo
+if __name__ == "__main__":
+    demo = create_interface()
+    # demo.launch(server_name="0.0.0.0", share=True, server_port=7890)
+    demo.launch(server_name="0.0.0.0")