Update README.md
Browse files
README.md
CHANGED
@@ -4,16 +4,14 @@ base_model:
|
|
4 |
library_name: transformers
|
5 |
license: mit
|
6 |
---
|
7 |
-
# A Text-to-Triple Model Trained on WikiOfGraph dataset
|
8 |
|
9 |
Base Model: Flan-T5-Large
|
10 |
|
11 |
-
**Example Input:**
|
12 |
-
\
|
13 |
"William Gerald Standridge (November 27, 1953 – April 12, 2014) was an American stock car racing driver. He was a competitor in the NASCAR Winston Cup Series and Busch Series."
|
14 |
|
15 |
-
**Output:**
|
16 |
-
\
|
17 |
(S> William gerald standridge| P> Nationality| O> American),
|
18 |
\
|
19 |
(S> William gerald standridge| P> Occupation| O> Stock car racing driver),
|
@@ -26,7 +24,61 @@ Base Model: Flan-T5-Large
|
|
26 |
\
|
27 |
(S> William gerald standridge| P> Death date| O> April 12, 2014)
|
28 |
|
|
|
|
|
|
|
|
|
29 |
|
30 |
-
|
31 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
32 |
Daehee Kim et al., "Ontology-Free General-Domain Knowledge Graph-to-Text Generation Dataset Synthesis using Large Language Model", 2024.
|
|
|
4 |
library_name: transformers
|
5 |
license: mit
|
6 |
---
|
7 |
+
# **A Text-to-Triple Model Trained on WikiOfGraph dataset**
|
8 |
|
9 |
Base Model: Flan-T5-Large
|
10 |
|
11 |
+
## **Example Input:**
|
|
|
12 |
"William Gerald Standridge (November 27, 1953 – April 12, 2014) was an American stock car racing driver. He was a competitor in the NASCAR Winston Cup Series and Busch Series."
|
13 |
|
14 |
+
## **Output:**
|
|
|
15 |
(S> William gerald standridge| P> Nationality| O> American),
|
16 |
\
|
17 |
(S> William gerald standridge| P> Occupation| O> Stock car racing driver),
|
|
|
24 |
\
|
25 |
(S> William gerald standridge| P> Death date| O> April 12, 2014)
|
26 |
|
27 |
+
## **How to Run?**
|
28 |
+
```
|
29 |
+
from transformers import T5Tokenizer, T5ForConditionalGeneration
|
30 |
+
import torch
|
31 |
|
32 |
+
def generate_triples(input_text: str, model_path: str = "pat-jj/text2triple-flan-t5"):
|
33 |
+
# Initialize tokenizer and model
|
34 |
+
tokenizer = T5Tokenizer.from_pretrained(model_path)
|
35 |
+
model = T5ForConditionalGeneration.from_pretrained(
|
36 |
+
model_path,
|
37 |
+
device_map="auto",
|
38 |
+
torch_dtype=torch.bfloat16 # Use bfloat16 for efficiency
|
39 |
+
)
|
40 |
+
|
41 |
+
# Tokenize input with proper padding and attention mask
|
42 |
+
inputs = tokenizer(
|
43 |
+
input_text,
|
44 |
+
max_length=512,
|
45 |
+
padding='max_length',
|
46 |
+
truncation=True,
|
47 |
+
return_tensors="pt"
|
48 |
+
)
|
49 |
+
|
50 |
+
# Move inputs to the same device as model
|
51 |
+
input_ids = inputs['input_ids'].to(model.device)
|
52 |
+
attention_mask = inputs['attention_mask'].to(model.device)
|
53 |
+
|
54 |
+
# Generate with better parameters
|
55 |
+
with torch.no_grad():
|
56 |
+
outputs = model.generate(
|
57 |
+
input_ids=input_ids,
|
58 |
+
attention_mask=attention_mask,
|
59 |
+
max_length=512,
|
60 |
+
num_beams=4, # Use beam search
|
61 |
+
early_stopping=True,
|
62 |
+
length_penalty=0.6, # Penalize very long outputs
|
63 |
+
use_cache=True # Use KV cache for faster generation
|
64 |
+
)
|
65 |
+
|
66 |
+
# Decode and return the generated triples
|
67 |
+
generated_triples = tokenizer.decode(outputs[0], skip_special_tokens=True)
|
68 |
+
return generated_triples
|
69 |
+
```
|
70 |
+
|
71 |
+
## Example usage
|
72 |
+
```
|
73 |
+
input_text = """Albert Einstein was born in Ulm, Germany in 1879. He developed the theory of relativity and won the Nobel Prize in Physics in 1921.
|
74 |
+
Einstein worked as a professor at Princeton University until his death in 1955."""
|
75 |
+
|
76 |
+
generated_triples = generate_triples(input_text)
|
77 |
+
print("Generated triples:", generated_triples)
|
78 |
+
```
|
79 |
+
|
80 |
+
|
81 |
+
|
82 |
+
|
83 |
+
## **Paper of WikiOfGraph dataset**:
|
84 |
Daehee Kim et al., "Ontology-Free General-Domain Knowledge Graph-to-Text Generation Dataset Synthesis using Large Language Model", 2024.
|