from transformers import GPT2LMHeadModel, AutoTokenizer, AutoModelForSeq2SeqLM import torch import gradio as gr # Function to load model and tokenizer def load_model(model_path): model = GPT2LMHeadModel.from_pretrained(model_path) tokenizer = AutoTokenizer.from_pretrained(model_path) tokenizer.pad_token = tokenizer.eos_token model.config.pad_token_id = tokenizer.eos_token_id return model, tokenizer # Load your three models deleuze, tokenizer1 = load_model('genaforvena/the_soft_delerizome_machine_a_thousand_guattaris_fourth_of_plateaus_once') gospel, tokenizer2 = load_model('genaforvena/the_soft_scum_gospel_delerizome_machine_a_thousand_guattaris') scum, tokenizer3 = load_model('genaforvena/the_soft_scum_delerizome_machine_a_thousand_guattaris') def generate_text_stream(model, tokenizer, prompt, max_new_tokens, temperature, top_k, top_p, repetition_penalty, no_repeat_ngram_size): inputs = tokenizer(prompt, return_tensors="pt").input_ids.to(model.device) generated_tokens = 0 output_text = "" print(f"Starting text generation for model: {model.config._name_or_path}") while generated_tokens < max_new_tokens: try: outputs = model.generate( inputs, max_new_tokens=1, do_sample=True, top_k=top_k, top_p=top_p, temperature=temperature, repetition_penalty=repetition_penalty, no_repeat_ngram_size=no_repeat_ngram_size, pad_token_id=tokenizer.eos_token_id, ) print(f"Generated token: {outputs}") new_token = tokenizer.decode(outputs[0, -1], skip_special_tokens=True) print(f"Decoded token: {new_token}") output_text += new_token generated_tokens += 1 print(f"Generated {generated_tokens} tokens so far") yield output_text inputs = outputs except Exception as e: print(f"Error occurred during text generation: {e}") break print(f"Final output for {model.config._name_or_path}:\n{output_text}\n") # Load BART model and tokenizer for summarization summarization_model_name = "facebook/bart-large-cnn" summarizer_model = AutoModelForSeq2SeqLM.from_pretrained(summarization_model_name).to(torch.device("cuda" if torch.cuda.is_available() else "cpu")) # Use GPU if available summarizer_tokenizer = AutoTokenizer.from_pretrained(summarization_model_name) # Function to generate summary with manual streaming (Corrected) def generate_summary_stream(model, tokenizer, text, max_length, min_length): inputs = tokenizer(text, return_tensors="pt", max_length=1024, truncation=True).to(model.device) summary_text = "" print(f"Starting summary generation for text: {text[:100]}...") # Use a more efficient way to generate the summary output = model.generate( inputs.input_ids, attention_mask=inputs.attention_mask, max_length=max_length, min_length=min_length, num_beams=4, no_repeat_ngram_size=2, early_stopping=False, ) # Decode the generated summary in chunks to simulate streaming chunk_size = 50 for i in range(0, len(output[0]), chunk_size): chunk = output[0][i:i + chunk_size] chunk_text = tokenizer.decode(chunk, skip_special_tokens=True) summary_text += chunk_text print(f"Generated summary chunk: {chunk_text}") yield summary_text print(f"Final summary:\n{summary_text}\n") def reply(prompt): yield "Morosia sees...." if len(prompt) == 0: prompt = "class " # --- Phase 1: Generate and Stream Combined Output --- combined_output = "" # Stream deleuze output print("Starting Deleuze model generation...") for text in generate_text_stream( deleuze, tokenizer1, prompt, max_new_tokens=300, temperature=1.5, top_k=180, top_p=0.95, repetition_penalty=1.2, no_repeat_ngram_size=2 ): combined_output = text print(f"Deleuze output: {text}") yield combined_output # Stream scum output (appending to the existing combined output) print("Starting Scum model generation...") scum_output = "" for text in generate_text_stream( scum, tokenizer3, combined_output, max_new_tokens=100, temperature=1.7, top_k=320, top_p=0.9, repetition_penalty=1.3, no_repeat_ngram_size=2 ): combined_output += text print(f"Scum output: {text}") yield combined_output scum_output = text # Stream gospel output (appending to the existing combined output) print("Starting Gospel model generation...") for text in generate_text_stream( gospel, tokenizer2, scum_output, max_new_tokens=100, temperature=1.0, top_k=190, top_p=0.95, repetition_penalty=1.2, no_repeat_ngram_size=2 ): combined_output += text print(f"Gospel output: {text}") yield combined_output # --- Phase 2: Stream final_output_for_summary --- print(f"Streaming final_output_for_summary: {combined_output[:100]}...") # Print first 100 chars yield "Morosia is analysing her visions... Wait..." # --- Phase 3: Generate and Stream Summary (Replacing Combined Output) --- print(f"Starting summary generation for final output: {combined_output[:100]}...") for text in generate_summary_stream(summarizer_model, summarizer_tokenizer, combined_output, max_length=100, min_length=30): print(f"Streaming summary: {text}") yield text # Gradio interface iface = gr.Interface(fn=reply, inputs="text", outputs="text") iface.launch()