Spaces:

VishnuPottabatthini
/

News-Article-Summary-Demo

Sleeping

App Files Files Community

VishnuPottabatthini commited on Oct 14, 2024

Commit

80b2f11

verified ·

1 Parent(s): 5097d11

Upload 9 files

Browse files

Files changed (9) hide show

BART model small/main.py +50 -0
BART model small/model/config.json +70 -0
BART model small/model/generation_config.json +16 -0
BART model small/model/merges.txt +0 -0
BART model small/model/model.safetensors +3 -0
BART model small/model/special_tokens_map.json +51 -0
BART model small/model/tokenizer_config.json +56 -0
BART model small/model/training_args.bin +3 -0
BART model small/model/vocab.json +0 -0

BART model small/main.py ADDED Viewed

	@@ -0,0 +1,50 @@

+from transformers import pipeline, BartForConditionalGeneration, BartTokenizer
+# Define the directory where you want to save the tokenizer files
+tokenizer_directory = '/Users/vishnu/Downloads/BART model small/model'
+# Download and save the tokenizer files from the original BART model
+tokenizer = BartTokenizer.from_pretrained('facebook/bart-large-cnn')
+# tokenizer.save_pretrained(tokenizer_directory)
+# print(f"Tokenizer saved to {tokenizer_directory}")
+# Load the fine-tuned model and tokenizer
+model_directory = './model'
+tokenizer = BartTokenizer.from_pretrained(model_directory)
+model = BartForConditionalGeneration.from_pretrained(model_directory)
+# Create a summarization pipeline
+summarizer = pipeline('summarization', model=model, tokenizer=tokenizer)
+# Sample news article
+article = """
+That fucking Putin,” Biden said to advisers in the Oval Office not long after Russia invasion of Ukraine, according to Woodward. “Putin is evil. We are dealing with the epitome of evil.”
+The book, “War,” also reveals new details about Donald Trump private conversations with Putin and a secret shipment of Covid-19 testing equipment Trump sent to the Russian president for his personal use during the height of the pandemic. Trump has denied those reports.
+Woodwards new book, which was obtained by CNN ahead of its October 15 release, gives an unvarnished, in-the-room account of key moments as Biden and his national security team navigate international crises, from the disastrous Afghanistan withdrawal to confronting Putin before he invaded Ukraine to private battles with Netanyahu.
+Based on hundreds of hours of interviews with firsthand participants, “War” is filled with newly reported details of high-stakes showdowns. The book explores the political and personal wars that Biden has fought during his presidency, including details about his decision to step aside from the 2024 campaign and conversations about his son Hunter Bidens legal troubles.
+Among the new details in “War”:
+ Woodward writes that Bidens national security team at one point believed there was a real threat, a 50hance, that Putin would use nuclear weapons in Ukraine.
+ Biden said he “should never have picked” Attorney General Merrick Garland during a conversation over his sons legal troubles.
+ Biden criticized former President Barack Obamas handling of Putins invasion of Crimea in 2014, concluding that “Barack never took Putin seriously.”
+ Citing a Trump aide, Woodward reports that there have been “maybe as many as seven” calls between Trump and Putin since Trump left the White House in 2021.
+In a statement, Trump spokesman Steven Cheung said Trump gave Woodward “absolutely no access” for the book. “None of these made up stories by Bob Woodward are true,” he said.
+Asked about the details that Woodward reports about Biden and Netanyahu, White House senior deputy press secretary Emilie Simons told reporters Tuesday, “They have a long-term relationship. They have a very honest and direct relationship, and I dont have a comment on those specific anecdotes.”
+"""
+# Generate the summary
+summary = summarizer(article, max_length=128, min_length=30, length_penalty=2.0, num_beams=4, early_stopping=True)
+# Output the summary
+print("\n\n Summary:", summary[0]['summary_text'])

BART model small/model/config.json ADDED Viewed

	@@ -0,0 +1,70 @@

+{
+  "_name_or_path": "facebook/bart-large-cnn",
+  "_num_labels": 3,
+  "activation_dropout": 0.0,
+  "activation_function": "gelu",
+  "add_final_layer_norm": false,
+  "architectures": [
+    "BartForConditionalGeneration"
+  ],
+  "attention_dropout": 0.0,
+  "bos_token_id": 0,
+  "classif_dropout": 0.0,
+  "classifier_dropout": 0.0,
+  "d_model": 1024,
+  "decoder_attention_heads": 16,
+  "decoder_ffn_dim": 4096,
+  "decoder_layerdrop": 0.0,
+  "decoder_layers": 12,
+  "decoder_start_token_id": 2,
+  "dropout": 0.1,
+  "early_stopping": true,
+  "encoder_attention_heads": 16,
+  "encoder_ffn_dim": 4096,
+  "encoder_layerdrop": 0.0,
+  "encoder_layers": 12,
+  "eos_token_id": 2,
+  "force_bos_token_to_be_generated": true,
+  "forced_bos_token_id": 0,
+  "forced_eos_token_id": 2,
+  "gradient_checkpointing": false,
+  "id2label": {
+    "0": "LABEL_0",
+    "1": "LABEL_1",
+    "2": "LABEL_2"
+  },
+  "init_std": 0.02,
+  "is_encoder_decoder": true,
+  "label2id": {
+    "LABEL_0": 0,
+    "LABEL_1": 1,
+    "LABEL_2": 2
+  },
+  "length_penalty": 2.0,
+  "max_length": 142,
+  "max_position_embeddings": 1024,
+  "min_length": 56,
+  "model_type": "bart",
+  "no_repeat_ngram_size": 3,
+  "normalize_before": false,
+  "num_beams": 4,
+  "num_hidden_layers": 12,
+  "output_past": true,
+  "pad_token_id": 1,
+  "prefix": " ",
+  "scale_embedding": false,
+  "task_specific_params": {
+    "summarization": {
+      "early_stopping": true,
+      "length_penalty": 2.0,
+      "max_length": 142,
+      "min_length": 56,
+      "no_repeat_ngram_size": 3,
+      "num_beams": 4
+    }
+  },
+  "torch_dtype": "float32",
+  "transformers_version": "4.44.2",
+  "use_cache": true,
+  "vocab_size": 50264
+}

BART model small/model/generation_config.json ADDED Viewed

	@@ -0,0 +1,16 @@

+{
+  "_from_model_config": true,
+  "bos_token_id": 0,
+  "decoder_start_token_id": 2,
+  "early_stopping": true,
+  "eos_token_id": 2,
+  "forced_bos_token_id": 0,
+  "forced_eos_token_id": 2,
+  "length_penalty": 2.0,
+  "max_length": 142,
+  "min_length": 56,
+  "no_repeat_ngram_size": 3,
+  "num_beams": 4,
+  "pad_token_id": 1,
+  "transformers_version": "4.44.2"
+}

BART model small/model/merges.txt ADDED Viewed

The diff for this file is too large to render. See raw diff

BART model small/model/model.safetensors ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:d42de8453934c7950a47b52a33643dcbd0076da21b64102d316bbb6100f663ab
+size 1625422896

BART model small/model/special_tokens_map.json ADDED Viewed

	@@ -0,0 +1,51 @@

+{
+  "bos_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "cls_token": {
+    "content": "<s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "eos_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "mask_token": {
+    "content": "<mask>",
+    "lstrip": true,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "pad_token": {
+    "content": "<pad>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "sep_token": {
+    "content": "</s>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  },
+  "unk_token": {
+    "content": "<unk>",
+    "lstrip": false,
+    "normalized": true,
+    "rstrip": false,
+    "single_word": false
+  }
+}

BART model small/model/tokenizer_config.json ADDED Viewed

	@@ -0,0 +1,56 @@

+{
+  "add_prefix_space": false,
+  "added_tokens_decoder": {
+    "0": {
+      "content": "<s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "1": {
+      "content": "<pad>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "2": {
+      "content": "</s>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "3": {
+      "content": "<unk>",
+      "lstrip": false,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    },
+    "50264": {
+      "content": "<mask>",
+      "lstrip": true,
+      "normalized": true,
+      "rstrip": false,
+      "single_word": false,
+      "special": true
+    }
+  },
+  "bos_token": "<s>",
+  "clean_up_tokenization_spaces": true,
+  "cls_token": "<s>",
+  "eos_token": "</s>",
+  "errors": "replace",
+  "mask_token": "<mask>",
+  "model_max_length": 1000000000000000019884624838656,
+  "pad_token": "<pad>",
+  "sep_token": "</s>",
+  "tokenizer_class": "BartTokenizer",
+  "unk_token": "<unk>"
+}

BART model small/model/training_args.bin ADDED Viewed

	@@ -0,0 +1,3 @@

+version https://git-lfs.github.com/spec/v1
+oid sha256:3ef7547aa51992de2795f0e7b617d77aaab1007f301047edb44e2f9b8448401c
+size 5176

BART model small/model/vocab.json ADDED Viewed

The diff for this file is too large to render. See raw diff