File size: 3,996 Bytes
6648389
3d097bd
 
 
 
 
 
 
 
 
6648389
3d097bd
 
6648389
3d097bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
bbb8ec0
3d097bd
 
bbb8ec0
3d097bd
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6648389
bbb8ec0
6648389
 
 
 
3d097bd
 
6648389
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
import gradio as gr
import os
import requests
import torch
from transformers import (
    LEDTokenizer, LEDForConditionalGeneration,
    BartTokenizer, BartForConditionalGeneration,
    PegasusTokenizer, PegasusForConditionalGeneration,
    AutoTokenizer, AutoModelForSeq2SeqLM
)

# OpenAI API Key
OPENAI_API_KEY = os.getenv("OPENAI_API_KEY")  # Ensure this is set in your environment variables

# List of models in priority order
MODELS = [
    {
        "name": "allenai/led-large-16384",
        "tokenizer_class": LEDTokenizer,
        "model_class": LEDForConditionalGeneration
    },
    {
        "name": "facebook/bart-large-cnn",
        "tokenizer_class": BartTokenizer,
        "model_class": BartForConditionalGeneration
    },
    {
        "name": "Falconsai/text_summarization",
        "tokenizer_class": AutoTokenizer,
        "model_class": AutoModelForSeq2SeqLM
    },
    {
        "name": "google/pegasus-xsum",
        "tokenizer_class": PegasusTokenizer,
        "model_class": PegasusForConditionalGeneration
    }
]

# Load models sequentially
loaded_models = []
for model_info in MODELS:
    try:
        tokenizer = model_info["tokenizer_class"].from_pretrained(model_info["name"])
        model = model_info["model_class"].from_pretrained(model_info["name"])
        loaded_models.append({"name": model_info["name"], "tokenizer": tokenizer, "model": model})
        print(f"Loaded model: {model_info['name']}")
    except Exception as e:
        print(f"Failed to load {model_info['name']}: {e}")

def summarize_with_transformers(text):
    """
    Try summarizing with locally loaded Transformer models in order of priority.
    """
    for model_data in loaded_models:
        try:
            tokenizer = model_data["tokenizer"]
            model = model_data["model"]

            # Tokenize input with truncation
            inputs = tokenizer([text], max_length=16384, return_tensors="pt", truncation=True)

            # Generate summary
            summary_ids = model.generate(
                inputs["input_ids"],
                num_beams=4,
                max_length=512,
                min_length=100,
                early_stopping=True
            )

            summary = tokenizer.decode(summary_ids[0], skip_special_tokens=True)
            return summary  # Return the first successful response

        except Exception as e:
            print(f"Error using {model_data['name']}: {e}")

    return None  # Indicate failure

def summarize_with_chatgpt(text):
    """
    Fallback to OpenAI ChatGPT API if all other models fail.
    """
    if not OPENAI_API_KEY:
        return "Error: No OpenAI API key provided."

    headers = {
        "Authorization": f"Bearer {OPENAI_API_KEY}",
        "Content-Type": "application/json"
    }
    
    payload = {
        "model": "gpt-3.5-turbo",
        "messages": [{"role": "user", "content": f"Summarize this article: {text}"}],
        "max_tokens": 512
    }

    response = requests.post("https://api.openai.com/v1/chat/completions", headers=headers, json=payload)
    
    if response.status_code == 200:
        return response.json()["choices"][0]["message"]["content"]
    else:
        return f"Error: Failed to summarize with ChatGPT (status {response.status_code})"

def summarize_text(text):
    """
    Main function to summarize text, trying Transformer models first, then ChatGPT if needed.
    """
    summary = summarize_with_transformers(text)
    
    if summary:
        return summary  # Return successful summary from a Transformer model
    
    print("All Transformer models failed. Falling back to ChatGPT...")
    return summarize_with_chatgpt(text)  # Use ChatGPT as last resort

# Gradio Interface
iface = gr.Interface(
    fn=summarize_text,
    inputs="text",
    outputs="text",
    title="Multi-Model Summarizer with Fallback",
    description="Tries multiple models for summarization, falling back to ChatGPT if needed."
)

if __name__ == "__main__":
    iface.launch()