ramalMr commited on
Commit
45b3e18
·
verified ·
1 Parent(s): a5056fa

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +5 -6
app.py CHANGED
@@ -30,13 +30,13 @@ def save_to_csv(sentence, output, filename="synthetic_data.csv"):
30
  def generate(file, temperature, max_new_tokens, top_p, repetition_penalty):
31
  text = extract_text_from_pdf(file)
32
  sentences = text.split('.')
33
- random.shuffle(sentences) # Shuffle sentences
34
 
35
- # Geçici dosya oluştur ve CSV yazıcısını başlat
36
  with tempfile.NamedTemporaryFile(mode='w', newline='', delete=False, suffix='.csv') as tmp:
37
  fieldnames = ['Original Sentence', 'Generated Sentence']
38
  writer = csv.DictWriter(tmp, fieldnames=fieldnames)
39
- writer.writeheader() # CSV dosyasına kolon isimleri yazılır
40
 
41
  for sentence in sentences:
42
  sentence = sentence.strip()
@@ -58,11 +58,10 @@ def generate(file, temperature, max_new_tokens, top_p, repetition_penalty):
58
  for response in stream:
59
  output += response.token.text
60
 
61
- # Modelden gelen yanıtı cümlelere ayır
62
- generated_sentences = re.split(r'[\.\?!]', output)
63
  generated_sentences = [s.strip() for s in generated_sentences if s.strip()]
64
 
65
- # Her cümleyi ayrı bir satır olarak CSV'ye yaz
66
  for generated_sentence in generated_sentences:
67
  writer.writerow({'Original Sentence': sentence, 'Generated Sentence': generated_sentence})
68
 
 
30
  def generate(file, temperature, max_new_tokens, top_p, repetition_penalty):
31
  text = extract_text_from_pdf(file)
32
  sentences = text.split('.')
33
+ random.shuffle(sentences)
34
 
35
+
36
  with tempfile.NamedTemporaryFile(mode='w', newline='', delete=False, suffix='.csv') as tmp:
37
  fieldnames = ['Original Sentence', 'Generated Sentence']
38
  writer = csv.DictWriter(tmp, fieldnames=fieldnames)
39
+ writer.writeheader()
40
 
41
  for sentence in sentences:
42
  sentence = sentence.strip()
 
58
  for response in stream:
59
  output += response.token.text
60
 
61
+
62
+ generated_sentences = re.split(r'(?<=[\.\!\?:])[\s\n]+', output)
63
  generated_sentences = [s.strip() for s in generated_sentences if s.strip()]
64
 
 
65
  for generated_sentence in generated_sentences:
66
  writer.writerow({'Original Sentence': sentence, 'Generated Sentence': generated_sentence})
67