Spaces:

omkar-surve126
/

NovaScholar

Build error

App Files Files Community

NovaScholar / Columns.py

omkar-surve126

Upload 38 files

b91146d verified 2 months ago

raw

history blame contribute delete

11.6 kB


	import streamlit as st
	import pandas as pd
	import PyPDF2
	import io
	import os
	from dotenv import load_dotenv
	import requests
	import time
	from mistralai import Mistral
	from typing import List, Dict
	from fpdf import FPDF

	load_dotenv()
	MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
	MISTRAL_API_URL = "https://api.mistral.ai/v1/completions"

	# Initialize the Mistral client
	client = Mistral(api_key=MISTRAL_API_KEY)

	def call_mistral_api(prompt: str) -> str:
	"""Call Mistral AI with a prompt, return the text response if successful."""
	messages = [
	{"role": "system", "content": "You are a helpful assistant."},
	{"role": "user", "content": prompt}
	]
	tools = [] # Add any tools if necessary

	try:
	# Make the API call
	response = client.chat.complete(
	model="mistral-large-latest",
	messages=messages,
	tools=tools,
	tool_choice="any",
	)
	return response.choices[0].message.content
	except Exception as e:
	print(f"API Error: {str(e)}")
	return ""

	def process_dataframe(df: pd.DataFrame) -> pd.DataFrame:
	"""Process the DataFrame and return a DataFrame with analysis results."""
	print("Processing DataFrame...")
	# Initialize results dictionary
	results = []

	# Process each column starting from the third column
	for i, column in enumerate(df.columns[2:], start=2):
	print(f"Processing column: {column}")
	# Extract text from column and attach values from the first and second columns
	text = " ".join(
	f"Column1-{row[df.columns[0]]}, Column2-{row[df.columns[1]]}, {value}"
	for _, row in df.iterrows()
	for value in [row[column]]
	if pd.notna(value)
	)

	# Generate prompt
	prompt = f"You are a Professional Researcher and Analyser with 10 yrs of Experience. Find details and Elaborate on Top Trends,Patterns ,Highlight Theories and Method in this topic.Support your answer with rightful evidence of corresponding DOI/SrNo and Frequency(how many times same topic repeated and in which papers):Make sure to limit the answer within 400 words ({column}):\n\n{text}"

	# Call Mistral API
	result1 = call_mistral_api(prompt)
	prompt1=f"""This result was the reponse of an earlier prompt Result -{result1}, Fact check the result with my original data -({column}):\n\n{text}. Return the refined Result(after careful fact checking and finding adequate evidence within the original data) , Make sure the meaning/structure of the Result doesnt change,only false/low evidence statements get eliminated.Limit the response to 400 words.MAKE SURE THERE IS NO CONTEXT CHANGE AND MEANING REMAINS SAME JUST WITH GOOD EVIDENCE AND REFINED RESULT. """
	result=call_mistral_api(prompt1)
	results.append({"Column": column, "Result": result})

	# Create DataFrame from results
	results_df = pd.DataFrame(results)
	print("DataFrame processing complete.")
	return results_df

	def split_dataframe(df: pd.DataFrame, max_rows: int = 52) -> List[pd.DataFrame]:
	"""
	Split a DataFrame into multiple smaller DataFrames, each having a maximum of `max_rows` rows.

	Args:
	df (pd.DataFrame): The original DataFrame to be split.
	max_rows (int): The maximum number of rows for each smaller DataFrame (excluding the header row).

	Returns:
	List[pd.DataFrame]: A list of smaller DataFrames.
	"""
	print("Splitting DataFrame...")
	# Calculate the number of splits needed
	num_splits = (len(df) + max_rows - 1) // max_rows

	# Split the DataFrame
	split_dfs = [df.iloc[i * max_rows:(i + 1) * max_rows].reset_index(drop=True) for i in range(num_splits)]
	print(f"DataFrame split into {len(split_dfs)} parts.")
	return split_dfs

	def generate_professional_review(df1: pd.DataFrame) -> str:
	"""
	Generate a professional literature review, trends analysis, TSM/ADO analysis, gaps, theories, and frameworks
	based on DOI and Serial Number as key value pairs.

	Args:
	df1 (pd.DataFrame): The first DataFrame.
	df2 (pd.DataFrame): The second DataFrame.

	Returns:
	str: The generated analysis text.
	"""
	print("Generating professional review...")
	# Concatenate DataFrames


	# Convert the concatenated DataFrame to a string format suitable for the prompt
	context = df1.to_string(index=True)

	# Generate a single prompt for the analysis
	prompt = f"""Generate a professional literature review, trends analysis, TCM ADO (Theories,Context,Method ,Ancedents,Decisions,Outcomes), gaps, theories, and frameworks
	based on the following data , If you find evidence as proper DOI make sure you analyze the whole
	table with more DOI,Serial No and find more evidence.Always give supporting evidence for your literature review,TCM ADO analysis,trends ,frameworks,
	check DOIs and find more evidence as inference again.Make sure the review is as professional as possible.Limit the answer to 500 words and only highlight the most imp trends with supporting evidence of DOI/SrNo and frequency(how many papers used that and top 2 DOI of that),Limit it to 500 words.Make sure all important details/frequently repeating trends/methods are highlighted.:\n\n{context}."""


	# Call Mistral API
	result = call_mistral_api(prompt)
	print("Professional review generated.")
	return result


	def main():
	st.title("Research Corpus Synthesis Tool")

	# Logout button
	if st.button("Logout", use_container_width=True):
	for key in st.session_state.keys():
	del st.session_state[key]
	st.rerun()

	# File uploader
	uploaded_file = st.file_uploader("Upload CSV file", type="csv")

	if uploaded_file:
	if st.button("Process CSV"):
	print("CSV file uploaded.")
	# Initialize progress bar
	progress_bar = st.progress(0)
	status_text = st.empty()

	# Read CSV file into DataFrame
	df = pd.read_csv(uploaded_file)
	print("CSV file read into DataFrame.")

	# Split DataFrame into smaller DataFrames
	split_dfs = split_dataframe(df, max_rows=52)

	# Initialize variable to concatenate all generated reviews
	concatenated_reviews = ""

	# Process each smaller DataFrame
	for i, split_df in enumerate(split_dfs):
	status_text.text(f"Processing part {i + 1} of {len(split_dfs)}")
	print(f"Processing part {i + 1} of {len(split_dfs)}")

	# Process the smaller DataFrame
	processed_df = process_dataframe(split_df)

	# Generate professional review
	review = generate_professional_review(processed_df)

	# Concatenate the generated review
	concatenated_reviews += review + "\n\n"

	# Update progress
	progress = (i + 1) / len(split_dfs)
	progress_bar.progress(progress)
	st.write(i)
	st.write(review)





	# Generate final analysis based on the concatenated reviews
	final_prompt = f"""
	Given is a consolidated research review of a huge number of research papers (evidence is DOI, Serial No). Perform this:
	Given as a context is a table of analyzing trends/frameworks analysis of a huge corpus of papers specific to the columns.
	Analyze the table properly and create a professional and accurate literature review (Ensure to cite DOI as evidence).

	Subheadings for Literature Review :
	1. Introduction
	○ Overview of the main topic or concept.
	○ Key research questions or objectives.
	2. Theoretical Foundations
	○ Exploration of dominant theories related to the topic.
	○ Domain-specific theoretical applications.
	3. Contextual Analysis
	○ Geographic contexts and challenges.
	○ Sectoral applications and digital infrastructure readiness.
	4. Methodological Approaches
	○ Qualitative, quantitative, and mixed-methods approaches used in research.
	5. Discussion and Future Research
	○ Current challenges and limitations.
	○ Potential areas for future study.
	6. Conclusion
	○ Summary of findings.
	○ Implications and future directions.

	TCM-ADO Framework in Research Analysis and Literature Review:
	Theory
	Theoretical foundations driving the research.
	● Focus on identifying and analyzing the conceptual models or frameworks that underpin the study.
	● Establish the intellectual basis and rationale for the research direction.
	Context
	Situational and environmental factors shaping the research.
	● Emphasis on geographic, sectoral, cultural, and infrastructural dimensions influencing the implementation or findings.
	● Examples include urban versus rural settings, digital infrastructure readiness, or policy landscapes.
	● Objective: To understand how external conditions impact the dynamics and applicability of the research.
	Method
	Research methodologies and analytical approaches utilized.
	● Covers the selection of qualitative, quantitative, or mixed-method approaches, along with tools and techniques employed.
	● Objective: To ensure methodological rigor and the validity of findings.
	Antecedents
	Pre-existing conditions enabling or constraining research or implementation.
	● Includes factors such as technological infrastructure, stakeholder preparedness, and
	regulatory frameworks.
	● To identify critical prerequisites that influence the starting point of the research or
	initiative.
	Decisions
	Strategic choices made throughout the implementation or research process.
	● Involves critical decision points in areas like technology adoption, governance
	frameworks, and operational strategies.
	● analyze how informed decision-making shapes the trajectory and success of the project.
	Outcomes
	Results and impacts observed as a consequence of the initiative or study.
	● Evaluates direct and indirect contributions to the research objectives or broader societal
	goals.
	● assess the effectiveness and long-term implications of the research or project outcomes.
	"""

	final_result = call_mistral_api(final_prompt)
	print("Final analysis generated.")

	# Display the final result
	st.subheader("Final Analysis")
	st.write(final_result)

	status_text.text("Processing complete!")
	progress_bar.progress(1.0)
	print("Processing complete.")

	if __name__ == "__main__":
	main()