neo4j
/

text2cypher-gemma-2-9b-it-finetuned-2024v1

Text2Text Generation

Model card Files Files and versions Community

mgoNeo4j commited on Oct 2, 2024

Commit

a1aa7e9

·

verified ·

1 Parent(s): 7bd3ab7

Update README.md

Files changed (1) hide show

README.md +75 -1

README.md CHANGED Viewed

@@ -253,4 +253,78 @@ Used RunPod with following setup:
 [More Information Needed] -->
 ### Framework versions
-- PEFT 0.12.0

 [More Information Needed] -->
 ### Framework versions
+- PEFT 0.12.0
+### Example Cypher generation
+```
+from peft import PeftModel, PeftConfig
+import torch
+from transformers import (
+    AutoModelForCausalLM,
+    AutoTokenizer,
+)
+instruction = (
+    "Generate Cypher statement to query a graph database. "
+    "Use only the provided relationship types and properties in the schema. \n"
+    "Schema: {schema} \n Question: {question}  \n Cypher output: "
+)
+def prepare_chat_prompt(question, schema) -> list[dict]:
+    chat = [
+        {
+            "role": "user",
+            "content": instruction.format(
+                schema=schema, question=question
+            ),
+        }
+    ]
+    return chat
+def _postprocess_output_cypher(output_cypher: str) -> str:
+    # Remove any explanation. E.g.  MATCH...\n\n**Explanation:**\n\n -> MATCH...
+    # Remove cypher indicator. E.g.```cypher\nMATCH...```` --> MATCH...
+    # Note: Possible to have both:
+    #   E.g. ```cypher\nMATCH...````\n\n**Explanation:**\n\n --> MATCH...
+    partition_by = "**Explanation:**"
+    output_cypher, _, _ = output_cypher.partition(partition_by)
+    output_cypher = output_cypher.strip("`\n")
+    output_cypher = output_cypher.lstrip("cypher\n")
+    output_cypher = output_cypher.strip("`\n ")
+    return output_cypher
+# Model
+base_model_name = "google/gemma-2-9b-it"
+model_name = "neo4j/text2cypher-gemma-2-9b-it-finetuned-2024v1"
+base_model = AutoModelForCausalLM.from_pretrained(base_model_name)
+config = PeftConfig.from_pretrained(model_name)
+model = PeftModel.from_pretrained(base_model, model_name)
+# Question
+question = "What are the movies of Tom Hanks?"
+schema = "(:Actor)-[:ActedIn]->(:Movie)"
+new_message = prepare_chat_prompt(question=question, schema=schema)
+tokenizer = AutoTokenizer.from_pretrained(model_name)
+prompt = tokenizer.apply_chat_template(new_message, add_generation_prompt=True, tokenize=False)
+inputs = tokenizer(prompt, return_tensors="pt", padding=True)
+# Any other parameters
+model_generate_parameters = {
+    "top_p": 0.9,
+    "temperature": 0.2,
+    "max_new_tokens": 512,
+    "do_sample": True,
+    "pad_token_id": tokenizer.eos_token_id,
+}
+inputs.to(model.device)
+model.eval()
+with torch.no_grad():
+    tokens = model.generate(**inputs, **model_generate_parameters)
+    tokens = tokens[:, inputs.input_ids.shape[1] :]
+    raw_outputs = tokenizer.batch_decode(tokens, skip_special_tokens=True)
+    outputs = [_postprocess_output_cypher(output) for output in raw_outputs]
+print(outputs)
+> ["MATCH (hanks:Actor {name: 'Tom Hanks'})-[:ActedIn]->(m:Movie) RETURN m"]
+```