cogcorp commited on
Commit
954c6c1
·
1 Parent(s): 8c84ff9

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +10 -3
app.py CHANGED
@@ -50,8 +50,8 @@ def call_openai_api(persona, user_prompt):
50
  response = openai.ChatCompletion.create(
51
  model="gpt-3.5-turbo",
52
  messages=[
53
- {"role": "system", "content": f"You are a knowledgeable assistant with a persona based on this: \n{persona}"},
54
- {"role": "user", "content": user_prompt},
55
  ]
56
  )
57
  return response['choices'][0]['message']['content']
@@ -72,8 +72,15 @@ def pdf_to_text(file, user_prompt):
72
  pdf = PdfReader(pdf_file_io)
73
  for page in pdf.pages:
74
  aggregated_text += page.extract_text()
 
 
 
 
 
 
 
75
  # Create a single persona from all text
76
- persona = create_persona(aggregated_text)
77
  # Using OpenAI API
78
  response = call_openai_api(persona, user_prompt)
79
  return response
 
50
  response = openai.ChatCompletion.create(
51
  model="gpt-3.5-turbo",
52
  messages=[
53
+ {"role": "system", "content": f"You are a knowledgeable assistant that provides short factual answers"},
54
+ {"role": "user", "content": f"{persona}{user_prompt}",
55
  ]
56
  )
57
  return response['choices'][0]['message']['content']
 
72
  pdf = PdfReader(pdf_file_io)
73
  for page in pdf.pages:
74
  aggregated_text += page.extract_text()
75
+ # Tokenize aggregated_text
76
+ tokens = nltk.word_tokenize(aggregated_text)
77
+ # Split into chunks if tokens are more than 4096
78
+ if len(tokens) > 4096:
79
+ # Here you may choose the strategy that fits best.
80
+ # For instance, the first 4096 tokens could be used.
81
+ tokens = tokens[:4096]
82
  # Create a single persona from all text
83
+ persona = create_persona(' '.join(tokens))
84
  # Using OpenAI API
85
  response = call_openai_api(persona, user_prompt)
86
  return response