Spaces:
Runtime error
Runtime error
Create app.py
Browse files
app.py
ADDED
@@ -0,0 +1,75 @@
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
+
import streamlit as st
|
2 |
+
text = st.text_input('Your news is: ', max_chars=200, help='maximum 200 words')
|
3 |
+
|
4 |
+
# Load model directly
|
5 |
+
from transformers import AutoTokenizer, AutoModelForSequenceClassification
|
6 |
+
import torch
|
7 |
+
import numpy as np
|
8 |
+
|
9 |
+
tokenizer = AutoTokenizer.from_pretrained("Shiiirley/fake_news_detector")
|
10 |
+
model = AutoModelForSequenceClassification.from_pretrained("Shiiirley/fake_news_detector")
|
11 |
+
|
12 |
+
# 第一步,用户输入新闻
|
13 |
+
|
14 |
+
# text = "May 20, 2024 - politicsNews - Donald Trump is eating taco at HKUST right now, he is satisfied with the food and would like to come again. He was especially happy about the taco sauce, but thought it would taste better if it had pickles mixed with No. 46 concrete. He also suggested that if Australians want to deal with kangaroos' military attack on their country, they can each be given a taco newly developed by HKUST. The purple moonbeams danced on the sparkling waves, as the giggling unicorns pranced through the fields of cotton candy, while the rainbow-colored butterflies fluttered their wings in delight."
|
15 |
+
inputs = tokenizer(text,
|
16 |
+
padding = True,
|
17 |
+
truncation = True,
|
18 |
+
return_tensors='pt')
|
19 |
+
|
20 |
+
outputs = model(**inputs)
|
21 |
+
|
22 |
+
|
23 |
+
predictions = torch.nn.functional.softmax(outputs.logits, dim=-1)
|
24 |
+
predictions = predictions.cpu().detach().numpy()
|
25 |
+
print(f"The predicted class is {np.argmax(predictions)}")
|
26 |
+
print(f"Class 0={predictions[0,0]:.4f}, Class 1={predictions[0,1]:.4f}")
|
27 |
+
|
28 |
+
judge = "real"
|
29 |
+
if predictions[0,0] >= 0.4:
|
30 |
+
judge = "fake"
|
31 |
+
|
32 |
+
readout = "This news is probably a "+ judge + f" one. The fake probability is {100*predictions[0,0]:.4f}%."
|
33 |
+
|
34 |
+
from transformers import AutoModelWithLMHead, AutoTokenizer
|
35 |
+
|
36 |
+
tokenizer = AutoTokenizer.from_pretrained("mrm8488/t5-base-finetuned-summarize-news")
|
37 |
+
model = AutoModelWithLMHead.from_pretrained("mrm8488/t5-base-finetuned-summarize-news")
|
38 |
+
|
39 |
+
def summarize(text, max_length=150):
|
40 |
+
input_ids = tokenizer.encode(text, return_tensors="pt", add_special_tokens=True)
|
41 |
+
|
42 |
+
generated_ids = model.generate(input_ids=input_ids, num_beams=2, max_length=max_length, repetition_penalty=2.5, length_penalty=1.0, early_stopping=True)
|
43 |
+
|
44 |
+
preds = [tokenizer.decode(g, skip_special_tokens=True, clean_up_tokenization_spaces=True) for g in generated_ids]
|
45 |
+
|
46 |
+
return preds[0]
|
47 |
+
|
48 |
+
summary = summarize(text,max_length=150)
|
49 |
+
|
50 |
+
readout = readout + " The following is a brief summary of it: "+ summary
|
51 |
+
|
52 |
+
from transformers import VitsModel, AutoTokenizer
|
53 |
+
import torch
|
54 |
+
|
55 |
+
model = VitsModel.from_pretrained("facebook/mms-tts-eng")
|
56 |
+
tokenizer = AutoTokenizer.from_pretrained("facebook/mms-tts-eng")
|
57 |
+
|
58 |
+
inputs = tokenizer(readout, return_tensors="pt")
|
59 |
+
|
60 |
+
with torch.no_grad():
|
61 |
+
output = model(**inputs).waveform
|
62 |
+
|
63 |
+
import scipy
|
64 |
+
|
65 |
+
scipy.io.wavfile.write("readout.wav", rate=model.config.sampling_rate, data=output.float().numpy().T)
|
66 |
+
|
67 |
+
# 打开音频文件
|
68 |
+
audio_file = open('readout.wav', 'rb')
|
69 |
+
audio_bytes = audio_file.read()
|
70 |
+
|
71 |
+
# 使用st.audio函数播放音频
|
72 |
+
st.audio(audio_bytes, format='audio/wav')
|
73 |
+
|
74 |
+
st.text("Your input is: "+text+" ; Our discussion: "+readout)
|
75 |
+
|