Spaces:
Sleeping
Sleeping
File size: 3,996 Bytes
6648389 3d097bd 6648389 3d097bd 6648389 3d097bd bbb8ec0 3d097bd bbb8ec0 3d097bd 6648389 bbb8ec0 6648389 3d097bd 6648389 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 |
import gradio as gr
import os
import requests
import torch
from transformers import (
LEDTokenizer, LEDForConditionalGeneration,
BartTokenizer, BartForConditionalGeneration,
PegasusTokenizer, PegasusForConditionalGeneration,
AutoTokenizer, AutoModelForSeq2SeqLM
)
# OpenAI API Key
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY") # Ensure this is set in your environment variables
# List of models in priority order
MODELS = [
{
"name": "allenai/led-large-16384",
"tokenizer_class": LEDTokenizer,
"model_class": LEDForConditionalGeneration
},
{
"name": "facebook/bart-large-cnn",
"tokenizer_class": BartTokenizer,
"model_class": BartForConditionalGeneration
},
{
"name": "Falconsai/text_summarization",
"tokenizer_class": AutoTokenizer,
"model_class": AutoModelForSeq2SeqLM
},
{
"name": "google/pegasus-xsum",
"tokenizer_class": PegasusTokenizer,
"model_class": PegasusForConditionalGeneration
}
]
# Load models sequentially
loaded_models = []
for model_info in MODELS:
try:
tokenizer = model_info["tokenizer_class"].from_pretrained(model_info["name"])
model = model_info["model_class"].from_pretrained(model_info["name"])
loaded_models.append({"name": model_info["name"], "tokenizer": tokenizer, "model": model})
print(f"Loaded model: {model_info['name']}")
except Exception as e:
print(f"Failed to load {model_info['name']}: {e}")
def summarize_with_transformers(text):
"""
Try summarizing with locally loaded Transformer models in order of priority.
"""
for model_data in loaded_models:
try:
tokenizer = model_data["tokenizer"]
model = model_data["model"]
# Tokenize input with truncation
inputs = tokenizer([text], max_length=16384, return_tensors="pt", truncation=True)
# Generate summary
summary_ids = model.generate(
inputs["input_ids"],
num_beams=4,
max_length=512,
min_length=100,
early_stopping=True
)
summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
return summary # Return the first successful response
except Exception as e:
print(f"Error using {model_data['name']}: {e}")
return None # Indicate failure
def summarize_with_chatgpt(text):
"""
Fallback to OpenAI ChatGPT API if all other models fail.
"""
if not OPENAI_API_KEY:
return "Error: No OpenAI API key provided."
headers = {
"Authorization": f"Bearer {OPENAI_API_KEY}",
"Content-Type": "application/json"
}
payload = {
"model": "gpt-3.5-turbo",
"messages": [{"role": "user", "content": f"Summarize this article: {text}"}],
"max_tokens": 512
}
response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
if response.status_code == 200:
return response.json()["choices"][0]["message"]["content"]
else:
return f"Error: Failed to summarize with ChatGPT (status {response.status_code})"
def summarize_text(text):
"""
Main function to summarize text, trying Transformer models first, then ChatGPT if needed.
"""
summary = summarize_with_transformers(text)
if summary:
return summary # Return successful summary from a Transformer model
print("All Transformer models failed. Falling back to ChatGPT...")
return summarize_with_chatgpt(text) # Use ChatGPT as last resort
# Gradio Interface
iface = gr.Interface(
fn=summarize_text,
inputs="text",
outputs="text",
title="Multi-Model Summarizer with Fallback",
description="Tries multiple models for summarization, falling back to ChatGPT if needed."
)
if __name__ == "__main__":
iface.launch() |