Spaces:
Sleeping
Sleeping
Commit
·
dcf88db
1
Parent(s):
ce136a3
minor improvements
Browse files
app.py
CHANGED
@@ -24,6 +24,8 @@ sentence_plan = "1. Introduction sentence\n2. Overview of relevant studies\n3. D
|
|
24 |
|
25 |
class RequestData(BaseModel):
|
26 |
abstract: str
|
|
|
|
|
27 |
|
28 |
class ResponseData(BaseModel):
|
29 |
summary: str
|
@@ -31,7 +33,7 @@ class ResponseData(BaseModel):
|
|
31 |
|
32 |
@app.post("/generateLiteratureSurvey/", response_model=ResponseData)
|
33 |
async def generate_literature_survey(request_data: RequestData):
|
34 |
-
summary, ids = summarize(request_data.abstract, llms)
|
35 |
return {"summary": summary,
|
36 |
"ids": ids
|
37 |
}
|
@@ -48,11 +50,11 @@ async def root():
|
|
48 |
return {"status": 0}
|
49 |
return {"status": 1}
|
50 |
|
51 |
-
def summarize(query, llms) :
|
52 |
keywords = helper.extract_keywords(llms['feature_extractor'], query)
|
53 |
-
papers = helper.search_papers(llms['arxiv_agent'], keywords)
|
54 |
-
ranked_papers = helper.re_rank_papers(llms['ranker'], query, papers)
|
55 |
-
literature_review, ids = helper.generate_related_work(llms['summarizer'], llms['summarizer_tokenizer'], query, ranked_papers, base_prompt, sentence_plan)
|
56 |
return literature_review, ids
|
57 |
|
58 |
print("Program running")
|
|
|
24 |
|
25 |
class RequestData(BaseModel):
|
26 |
abstract: str
|
27 |
+
words: str
|
28 |
+
papers: str
|
29 |
|
30 |
class ResponseData(BaseModel):
|
31 |
summary: str
|
|
|
33 |
|
34 |
@app.post("/generateLiteratureSurvey/", response_model=ResponseData)
|
35 |
async def generate_literature_survey(request_data: RequestData):
|
36 |
+
summary, ids = summarize(request_data.abstract, request_data.words, request_data.papers, llms)
|
37 |
return {"summary": summary,
|
38 |
"ids": ids
|
39 |
}
|
|
|
50 |
return {"status": 0}
|
51 |
return {"status": 1}
|
52 |
|
53 |
+
def summarize(query, n_words, n_papers, llms) :
|
54 |
keywords = helper.extract_keywords(llms['feature_extractor'], query)
|
55 |
+
papers = helper.search_papers(llms['arxiv_agent'], keywords, int(n_papers)*2)
|
56 |
+
ranked_papers = helper.re_rank_papers(llms['ranker'], query, papers, int(n_papers))
|
57 |
+
literature_review, ids = helper.generate_related_work(llms['summarizer'], llms['summarizer_tokenizer'], query, ranked_papers, base_prompt, sentence_plan, int(n_words))
|
58 |
return literature_review, ids
|
59 |
|
60 |
print("Program running")
|
helper.py
CHANGED
@@ -53,14 +53,14 @@ def extract_keywords(model, abstract):
|
|
53 |
return keyphrases
|
54 |
|
55 |
|
56 |
-
def search_papers(arxiv_agent, keywords):
|
57 |
query = " ".join(keywords)
|
58 |
results = arxiv_agent.get_summaries_as_docs(query)
|
59 |
#print("arxiv ouptut ")
|
60 |
#print(results)
|
61 |
return results
|
62 |
|
63 |
-
def re_rank_papers(model, query_abstract, papers):
|
64 |
summaries = {paper.page_content : {"Title":paper.metadata['Title']} for paper in papers}
|
65 |
print(summaries)
|
66 |
target_embeddings = model.encode([query_abstract])
|
@@ -121,7 +121,7 @@ def generate_refs(papers) :
|
|
121 |
i+=1
|
122 |
return refs, ids
|
123 |
|
124 |
-
def generate_related_work(model, tokenizer, query_abstract, ranked_papers, base_prompt, sentence_plan):
|
125 |
input_text = f"Abstract: {query_abstract}\n"
|
126 |
i = 1
|
127 |
for key in ranked_papers.keys():
|
@@ -141,7 +141,7 @@ def generate_related_work(model, tokenizer, query_abstract, ranked_papers, base_
|
|
141 |
)
|
142 |
|
143 |
generation_args = {
|
144 |
-
"max_new_tokens":
|
145 |
"return_full_text": False,
|
146 |
"temperature": 0.0,
|
147 |
"do_sample": False,
|
|
|
53 |
return keyphrases
|
54 |
|
55 |
|
56 |
+
def search_papers(arxiv_agent, keywords, n_papers):
|
57 |
query = " ".join(keywords)
|
58 |
results = arxiv_agent.get_summaries_as_docs(query)
|
59 |
#print("arxiv ouptut ")
|
60 |
#print(results)
|
61 |
return results
|
62 |
|
63 |
+
def re_rank_papers(model, query_abstract, papers, n_papers):
|
64 |
summaries = {paper.page_content : {"Title":paper.metadata['Title']} for paper in papers}
|
65 |
print(summaries)
|
66 |
target_embeddings = model.encode([query_abstract])
|
|
|
121 |
i+=1
|
122 |
return refs, ids
|
123 |
|
124 |
+
def generate_related_work(model, tokenizer, query_abstract, ranked_papers, base_prompt, sentence_plan, n_words):
|
125 |
input_text = f"Abstract: {query_abstract}\n"
|
126 |
i = 1
|
127 |
for key in ranked_papers.keys():
|
|
|
141 |
)
|
142 |
|
143 |
generation_args = {
|
144 |
+
"max_new_tokens": n_words,
|
145 |
"return_full_text": False,
|
146 |
"temperature": 0.0,
|
147 |
"do_sample": False,
|