singhvaibhav924 commited on
Commit
dcf88db
·
1 Parent(s): ce136a3

minor improvements

Browse files
Files changed (2) hide show
  1. app.py +7 -5
  2. helper.py +4 -4
app.py CHANGED
@@ -24,6 +24,8 @@ sentence_plan = "1. Introduction sentence\n2. Overview of relevant studies\n3. D
24
 
25
  class RequestData(BaseModel):
26
  abstract: str
 
 
27
 
28
  class ResponseData(BaseModel):
29
  summary: str
@@ -31,7 +33,7 @@ class ResponseData(BaseModel):
31
 
32
  @app.post("/generateLiteratureSurvey/", response_model=ResponseData)
33
  async def generate_literature_survey(request_data: RequestData):
34
- summary, ids = summarize(request_data.abstract, llms)
35
  return {"summary": summary,
36
  "ids": ids
37
  }
@@ -48,11 +50,11 @@ async def root():
48
  return {"status": 0}
49
  return {"status": 1}
50
 
51
- def summarize(query, llms) :
52
  keywords = helper.extract_keywords(llms['feature_extractor'], query)
53
- papers = helper.search_papers(llms['arxiv_agent'], keywords)
54
- ranked_papers = helper.re_rank_papers(llms['ranker'], query, papers)
55
- literature_review, ids = helper.generate_related_work(llms['summarizer'], llms['summarizer_tokenizer'], query, ranked_papers, base_prompt, sentence_plan)
56
  return literature_review, ids
57
 
58
  print("Program running")
 
24
 
25
  class RequestData(BaseModel):
26
  abstract: str
27
+ words: str
28
+ papers: str
29
 
30
  class ResponseData(BaseModel):
31
  summary: str
 
33
 
34
  @app.post("/generateLiteratureSurvey/", response_model=ResponseData)
35
  async def generate_literature_survey(request_data: RequestData):
36
+ summary, ids = summarize(request_data.abstract, request_data.words, request_data.papers, llms)
37
  return {"summary": summary,
38
  "ids": ids
39
  }
 
50
  return {"status": 0}
51
  return {"status": 1}
52
 
53
+ def summarize(query, n_words, n_papers, llms) :
54
  keywords = helper.extract_keywords(llms['feature_extractor'], query)
55
+ papers = helper.search_papers(llms['arxiv_agent'], keywords, int(n_papers)*2)
56
+ ranked_papers = helper.re_rank_papers(llms['ranker'], query, papers, int(n_papers))
57
+ literature_review, ids = helper.generate_related_work(llms['summarizer'], llms['summarizer_tokenizer'], query, ranked_papers, base_prompt, sentence_plan, int(n_words))
58
  return literature_review, ids
59
 
60
  print("Program running")
helper.py CHANGED
@@ -53,14 +53,14 @@ def extract_keywords(model, abstract):
53
  return keyphrases
54
 
55
 
56
- def search_papers(arxiv_agent, keywords):
57
  query = " ".join(keywords)
58
  results = arxiv_agent.get_summaries_as_docs(query)
59
  #print("arxiv ouptut ")
60
  #print(results)
61
  return results
62
 
63
- def re_rank_papers(model, query_abstract, papers):
64
  summaries = {paper.page_content : {"Title":paper.metadata['Title']} for paper in papers}
65
  print(summaries)
66
  target_embeddings = model.encode([query_abstract])
@@ -121,7 +121,7 @@ def generate_refs(papers) :
121
  i+=1
122
  return refs, ids
123
 
124
- def generate_related_work(model, tokenizer, query_abstract, ranked_papers, base_prompt, sentence_plan):
125
  input_text = f"Abstract: {query_abstract}\n"
126
  i = 1
127
  for key in ranked_papers.keys():
@@ -141,7 +141,7 @@ def generate_related_work(model, tokenizer, query_abstract, ranked_papers, base_
141
  )
142
 
143
  generation_args = {
144
- "max_new_tokens": 1600,
145
  "return_full_text": False,
146
  "temperature": 0.0,
147
  "do_sample": False,
 
53
  return keyphrases
54
 
55
 
56
+ def search_papers(arxiv_agent, keywords, n_papers):
57
  query = " ".join(keywords)
58
  results = arxiv_agent.get_summaries_as_docs(query)
59
  #print("arxiv ouptut ")
60
  #print(results)
61
  return results
62
 
63
+ def re_rank_papers(model, query_abstract, papers, n_papers):
64
  summaries = {paper.page_content : {"Title":paper.metadata['Title']} for paper in papers}
65
  print(summaries)
66
  target_embeddings = model.encode([query_abstract])
 
121
  i+=1
122
  return refs, ids
123
 
124
+ def generate_related_work(model, tokenizer, query_abstract, ranked_papers, base_prompt, sentence_plan, n_words):
125
  input_text = f"Abstract: {query_abstract}\n"
126
  i = 1
127
  for key in ranked_papers.keys():
 
141
  )
142
 
143
  generation_args = {
144
+ "max_new_tokens": n_words,
145
  "return_full_text": False,
146
  "temperature": 0.0,
147
  "do_sample": False,