Spaces:

vincentmin
/

ArxivNewsLetter

Sleeping

App Files Files Community

vincentmin commited on Jun 25, 2023

Commit

6e36ec1

1 Parent(s): 93f881f

Update app.py

Browse files

Files changed (1) hide show

app.py +5 -6

app.py CHANGED Viewed

@@ -19,14 +19,14 @@ document_prompt = PromptTemplate(
   input_variables=["page_content", "Title"],
 )
 prompt = PromptTemplate(
-  template="""Write a personalised newsletter for a researcher on the most recent exciting developments in his field. The researcher describes his work as follows:"{context}". Base the newsletter on the articles below. Extract the most exciting points and combine them into an excillerating newsletter. Use Markdown format\n#ARTICLES\n\n"{text}"\n\nNEWSLETTER:\n# Your AI curated newsletter\n""",
   input_variables=["context", "text"])
 REPO_ID = "HuggingFaceH4/starchat-beta"
 llm = HuggingFaceHub(
     repo_id=REPO_ID,
     model_kwargs={
-        "max_new_tokens": 300,
         "do_sample": True,
         "temperature": 0.8,
         "top_p": 0.9
@@ -52,19 +52,18 @@ def get_data(lookback_days: float, user_query: str):
     min_date = (max_date - timedelta(days=lookback_days))
     query = f"cat:hep-th AND submittedDate:[{min_date.strftime('%Y%m%d')} TO {max_date.strftime('%Y%m%d')}]"
     loader = ArxivLoader(query=query, load_max_docs=LOAD_MAX_DOCS)
-    docs = loader.load()
-    docs = [process_document(doc) for doc in docs]
     db = Chroma.from_documents(docs, embeddings)
     retriever = db.as_retriever()
     relevant_docs = retriever.get_relevant_documents(user_query)
     print(relevant_docs[0].metadata)
     articles = ""
     for doc in relevant_docs:
-        articles += f"**Title: {doc.metadata['Title']}**\n\nAbstract: {doc.metadata['Summary']}\n\n"
     output = stuff_chain({"input_documents": relevant_docs, "context": user_query})
     output_text = output["output_text"].split("<|end|>")[0]
     print("LLM output:", output_text)
-    return f"# Your AI curated newsletter\n{output['output_text']}\n\n\n\n## Used articles:\n\n{articles}"
 with gr.Blocks() as demo:
     gr.Markdown(

   input_variables=["page_content", "Title"],
 )
 prompt = PromptTemplate(
+  template="""Write an engaging newsletter on the most recent exciting developments in the following field:"{context}". Base the newsletter on the articles below. Extract the most exciting points and combine them into an excillerating newsletter. Use emojis to catch attention and use the Markdown format.\n\n#ARTICLES\n"{text}"\n\nNEWSLETTER:\n# AI curated newsletter\n""",
   input_variables=["context", "text"])
 REPO_ID = "HuggingFaceH4/starchat-beta"
 llm = HuggingFaceHub(
     repo_id=REPO_ID,
     model_kwargs={
+        "max_new_tokens": 400,
         "do_sample": True,
         "temperature": 0.8,
         "top_p": 0.9
     min_date = (max_date - timedelta(days=lookback_days))
     query = f"cat:hep-th AND submittedDate:[{min_date.strftime('%Y%m%d')} TO {max_date.strftime('%Y%m%d')}]"
     loader = ArxivLoader(query=query, load_max_docs=LOAD_MAX_DOCS)
+    docs = [process_document(doc) for doc in loader.load()]
     db = Chroma.from_documents(docs, embeddings)
     retriever = db.as_retriever()
     relevant_docs = retriever.get_relevant_documents(user_query)
     print(relevant_docs[0].metadata)
     articles = ""
     for doc in relevant_docs:
+        articles += f"**Title: {doc.metadata['Title']}**\n\nAuthors: {doc.metadata['Authors']}\n\nAbstract: {doc.metadata['Summary']}\n\n"
     output = stuff_chain({"input_documents": relevant_docs, "context": user_query})
     output_text = output["output_text"].split("<|end|>")[0]
     print("LLM output:", output_text)
+    return f"# Your AI curated newsletter\n{output_text}\n\n\n\n## Filtered {len(docs)} articles down to the following relevant articles:\n\n{articles}"
 with gr.Blocks() as demo:
     gr.Markdown(