cogcorp commited on
Commit
d48fb2c
·
1 Parent(s): 9a56aa1

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +8 -2
app.py CHANGED
@@ -79,10 +79,15 @@ def pdf_to_text(file, user_prompt):
79
  # Tokenize aggregated_text
80
  tokens = nltk.word_tokenize(aggregated_text)
81
  # Split into chunks if tokens are more than 4096
82
- if len(tokens) > 4096:
83
  # Here you may choose the strategy that fits best.
84
  # For instance, the first 4096 tokens could be used.
85
- tokens = tokens[:4096]
 
 
 
 
 
86
  # Create a single persona from all text
87
  persona = create_persona(' '.join(tokens))
88
  # Using OpenAI API
@@ -90,6 +95,7 @@ def pdf_to_text(file, user_prompt):
90
  return response
91
 
92
 
 
93
  iface = gr.Interface(
94
  fn=pdf_to_text,
95
  inputs=[
 
79
  # Tokenize aggregated_text
80
  tokens = nltk.word_tokenize(aggregated_text)
81
  # Split into chunks if tokens are more than 4096
82
+ while len(tokens) > 4096:
83
  # Here you may choose the strategy that fits best.
84
  # For instance, the first 4096 tokens could be used.
85
+ chunk = tokens[:4096]
86
+ chunk_text = ' '.join(chunk)
87
+ # Use OpenAI API to summarize the chunk
88
+ summary = call_openai_api("a professional summarizer", f"Please summarize this text: {chunk_text}")
89
+ # Replace the original chunk with the summary
90
+ tokens = summary.split() + tokens[4096:]
91
  # Create a single persona from all text
92
  persona = create_persona(' '.join(tokens))
93
  # Using OpenAI API
 
95
  return response
96
 
97
 
98
+
99
  iface = gr.Interface(
100
  fn=pdf_to_text,
101
  inputs=[