import streamlit as st st.set_page_config(layout="wide") st.title("AI Detectability Index (ADI)") #st.write("As new LLMs continue to emerge at an accelerated pace, the usability of prevailing AGTD techniques might not endure indefinitely. To align with the ever-changing landscape of LLMs, we introduce the AI Detectability Index (ADI), which identifies the discernable range for LLMs based on SoTA AGTD techniques. The hypothesis behind this proposal is that both LLMs and AGTD techniques' SoTA benchmarks can be regularly updated to adapt to the evolving landscape. Additionally, ADI serves as a litmus test to gauge whether contemporary LLMs have surpassed the ADI benchmark and are thereby rendering themselves impervious to detection, or whether new methods for AI-generated text detection will require the ADI standard to be reset and re-calibrated.") # Create a container for the box box = st.container() from transformers import AutoModelForCausalLM, AutoTokenizer, StoppingCriteria, StoppingCriteriaList tokenizer = AutoTokenizer.from_pretrained("StabilityAI/stablelm-tuned-alpha-7b") model = AutoModelForCausalLM.from_pretrained("StabilityAI/stablelm-tuned-alpha-7b") model.half().cuda() class StopOnTokens(StoppingCriteria): def __call__(self, input_ids: torch.LongTensor, scores: torch.FloatTensor, **kwargs) -> bool: stop_ids = [50278, 50279, 50277, 1, 0] for stop_id in stop_ids: if input_ids[0][-1] == stop_id: return True return False system_prompt = """<|SYSTEM|># StableLM Tuned (Alpha version) - StableLM is a helpful and harmless open-source AI language model developed by StabilityAI. - StableLM is excited to be able to help the user, but will refuse to do anything that could be considered harmful to the user. - StableLM is more than just an information source, StableLM is also able to write poetry, short stories, and make jokes. - StableLM will refuse to participate in anything that could harm a human. """ prompt = f"{system_prompt}<|USER|>What's your mood today?<|ASSISTANT|>" inputs = tokenizer(prompt, return_tensors="pt").to("cuda") tokens = model.generate( **inputs, max_new_tokens=64, temperature=0.7, do_sample=True, stopping_criteria=StoppingCriteriaList([StopOnTokens()]) ) #print(tokenizer.decode(tokens[0], skip_special_tokens=True)) # Create two columns inside the box with box: col1, col2 = st.columns([0.4, 0.6]) # Add text to the left column (40% area) with col1: st.write("Watermarking: Watermarking AI-generated text, first proposed by Wiggers (2022), entails the incorporation of an imperceptible signal to establish the authorship of a specific text with a high degree of certainty. This approach is analogous to encryption and decryption. Kirchenbauer et al. (2023a) (wv1) were the first to present operational watermarking models for LLMs, but their initial proposal faced criticism. Sadasivan et al. (2023) shared their initial studies suggesting that paraphrasing can efficiently eliminate watermarks. In a subsequent paper (Kirchenbauer et al., 2023b) (wv2), the authors put forth evidently more resilient watermarking techniques, asserting that paraphrasing does not significantly disrupt watermark signals in this iteration of their research.") # Add text to the right column (60% area) with col2: st.write("By conducting extensive experiments (detailed in Section 3), our study provides a thorough investigation of the de-watermarking techniques wv1 and wv2, demonstrating that the watermarked texts generated by both methods can be circumvented, albeit with a slight decrease in de-watermarking accuracy observed with wv2. These results further strengthen our contention that text watermarking is fragile and lacks reliability for real-life applications.") st.write(tokenizer.decode(tokens[0], skip_special_tokens=True))