Spaces:

ramalMr
/

data_gen

Running

ramalMr commited on Apr 6, 2024

Commit

aebf89a

verified ·

1 Parent(s): 231908c

Update app.py

Files changed (1) hide show

app.py CHANGED Viewed

@@ -10,6 +10,7 @@ import io
 import tempfile
 import re
 client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
 def extract_sentences_from_excel(file):
@@ -33,7 +34,7 @@ def generate(file, prompt, temperature, max_new_tokens, top_p, repetition_penalt
     all_outputs = []
     for sentence in sentences:
         try:
-            stream = client.text_generation(f"{prompt} Output the response in the following JSON format: {{'generated_sentence': 'The generated sentence text', 'confidence_score': 0.9}} {sentence}", **generate_kwargs, stream=True, details=True, return_full_text=False)
             output = ""
             for response in stream:
                 output += response.token.text
@@ -56,11 +57,8 @@ def generate(file, prompt, temperature, max_new_tokens, top_p, repetition_penalt
 def save_to_json(data, filename):
     json_data = []
     for item in data:
-        generated_sentences = []
-        confidence_scores = []
-        for match in re.finditer(r"{'generated_sentence': '(.+?)', 'confidence_score': ([\d\.]+)}", item['generated_data']):
-            generated_sentences.append(match.group(1))
-            confidence_scores.append(float(match.group(2)))
         json_data.append({
             'original_sentence': item['original_sentence'],
             'generated_sentences': generated_sentences,

 import tempfile
 import re
 client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")
 def extract_sentences_from_excel(file):
     all_outputs = []
     for sentence in sentences:
         try:
+            stream = client.text_generation(f"{prompt} {sentence}", **generate_kwargs, stream=True, details=True, return_full_text=False)
             output = ""
             for response in stream:
                 output += response.token.text
 def save_to_json(data, filename):
     json_data = []
     for item in data:
+        generated_sentences = re.findall(r"{'generated_sentence': '(.+?)'", item['generated_data'])
+        confidence_scores = [0.9] * len(generated_sentences)  # Varsayılan güven skoru
         json_data.append({
             'original_sentence': item['original_sentence'],
             'generated_sentences': generated_sentences,