Spaces:

omkar-surve126
/

NovaScholar

Build error

File size: 11,578 Bytes

b91146d


import streamlit as st
import pandas as pd
import PyPDF2
import io
import os
from dotenv import load_dotenv
import requests
import time
from mistralai import Mistral
from typing import List, Dict
from fpdf import FPDF

load_dotenv()
MISTRAL_API_KEY = os.getenv("MISTRAL_API_KEY")
MISTRAL_API_URL = "https://api.mistral.ai/v1/completions"

# Initialize the Mistral client
client = Mistral(api_key=MISTRAL_API_KEY)

def call_mistral_api(prompt: str) -> str:
    """Call Mistral AI with a prompt, return the text response if successful."""
    messages = [
        {"role": "system", "content": "You are a helpful assistant."},
        {"role": "user", "content": prompt}
    ]
    tools = []  # Add any tools if necessary

    try:
        # Make the API call
        response = client.chat.complete(
            model="mistral-large-latest",
            messages=messages,
            tools=tools,
            tool_choice="any",
        )
        return response.choices[0].message.content
    except Exception as e:
        print(f"API Error: {str(e)}")
        return ""

def process_dataframe(df: pd.DataFrame) -> pd.DataFrame:
    """Process the DataFrame and return a DataFrame with analysis results."""
    print("Processing DataFrame...")
    # Initialize results dictionary
    results = []

    # Process each column starting from the third column
    for i, column in enumerate(df.columns[2:], start=2):
        print(f"Processing column: {column}")
        # Extract text from column and attach values from the first and second columns
        text = " ".join(
            f"Column1-{row[df.columns[0]]}, Column2-{row[df.columns[1]]}, {value}"
            for _, row in df.iterrows()
            for value in [row[column]]
            if pd.notna(value)
        )

        # Generate prompt
        prompt = f"You are a Professional Researcher and Analyser with 10 yrs of Experience. Find details and Elaborate on Top Trends,Patterns ,Highlight Theories and Method in this topic.Support your answer with rightful evidence of corresponding DOI/SrNo and Frequency(how many times same topic repeated and in which papers):Make sure to limit the answer within 400 words ({column}):\n\n{text}"
        
        # Call Mistral API
        result1 = call_mistral_api(prompt)
        prompt1=f"""This result was the reponse of an earlier prompt Result -{result1}, Fact check the result with my original data -({column}):\n\n{text}. Return the refined Result(after careful fact checking and finding adequate evidence within the original data) , Make sure the meaning/structure of the Result doesnt change,only false/low evidence statements get eliminated.Limit the response to 400 words.MAKE SURE THERE IS NO CONTEXT CHANGE AND MEANING REMAINS SAME JUST WITH GOOD EVIDENCE AND REFINED RESULT. """
        result=call_mistral_api(prompt1)
        results.append({"Column": column, "Result": result})

    # Create DataFrame from results
    results_df = pd.DataFrame(results)
    print("DataFrame processing complete.")
    return results_df

def split_dataframe(df: pd.DataFrame, max_rows: int = 52) -> List[pd.DataFrame]:
    """

    Split a DataFrame into multiple smaller DataFrames, each having a maximum of `max_rows` rows.

    

    Args:

        df (pd.DataFrame): The original DataFrame to be split.

        max_rows (int): The maximum number of rows for each smaller DataFrame (excluding the header row).

    

    Returns:

        List[pd.DataFrame]: A list of smaller DataFrames.

    """
    print("Splitting DataFrame...")
    # Calculate the number of splits needed
    num_splits = (len(df) + max_rows - 1) // max_rows
    
    # Split the DataFrame
    split_dfs = [df.iloc[i * max_rows:(i + 1) * max_rows].reset_index(drop=True) for i in range(num_splits)]
    print(f"DataFrame split into {len(split_dfs)} parts.")
    return split_dfs

def generate_professional_review(df1: pd.DataFrame) -> str:
    """

    Generate a professional literature review, trends analysis, TSM/ADO analysis, gaps, theories, and frameworks

    based on DOI and Serial Number as key value pairs.

    

    Args:

        df1 (pd.DataFrame): The first DataFrame.

        df2 (pd.DataFrame): The second DataFrame.

    

    Returns:

        str: The generated analysis text.

    """
    print("Generating professional review...")
    # Concatenate DataFrames
    

    # Convert the concatenated DataFrame to a string format suitable for the prompt
    context = df1.to_string(index=True)
    
    # Generate a single prompt for the analysis
    prompt = f"""Generate a professional literature review, trends analysis, TCM ADO (Theories,Context,Method ,Ancedents,Decisions,Outcomes), gaps, theories, and frameworks

    based on the following data , If you find evidence as proper DOI make sure you analyze the whole

    table with more DOI,Serial No and find more evidence.Always give supporting evidence for your literature review,TCM ADO analysis,trends ,frameworks,

    check DOIs and find more evidence as inference again.Make sure the review is as professional as possible.Limit the answer to 500 words and only highlight the most imp trends with supporting evidence of DOI/SrNo and frequency(how many papers used that and top 2 DOI of that),Limit it to 500 words.Make sure all important details/frequently repeating trends/methods are highlighted.:\n\n{context}."""
  

    # Call Mistral API
    result = call_mistral_api(prompt)
    print("Professional review generated.")
    return result


def main():
    st.title("Research Corpus Synthesis Tool")

    # Logout button
    if st.button("Logout", use_container_width=True):
        for key in st.session_state.keys():
            del st.session_state[key]
        st.rerun()

    # File uploader
    uploaded_file = st.file_uploader("Upload CSV file", type="csv")

    if uploaded_file:
        if st.button("Process CSV"):
            print("CSV file uploaded.")
            # Initialize progress bar
            progress_bar = st.progress(0)
            status_text = st.empty()

            # Read CSV file into DataFrame
            df = pd.read_csv(uploaded_file)
            print("CSV file read into DataFrame.")

            # Split DataFrame into smaller DataFrames
            split_dfs = split_dataframe(df, max_rows=52)

            # Initialize variable to concatenate all generated reviews
            concatenated_reviews = ""

            # Process each smaller DataFrame
            for i, split_df in enumerate(split_dfs):
                status_text.text(f"Processing part {i + 1} of {len(split_dfs)}")
                print(f"Processing part {i + 1} of {len(split_dfs)}")

                # Process the smaller DataFrame
                processed_df = process_dataframe(split_df)

                # Generate professional review
                review = generate_professional_review(processed_df)

                # Concatenate the generated review
                concatenated_reviews += review + "\n\n"

                # Update progress
                progress = (i + 1) / len(split_dfs)
                progress_bar.progress(progress)
                st.write(i)
                st.write(review)
                
               
                
               

            # Generate final analysis based on the concatenated reviews
            final_prompt = f"""

            Given is a consolidated research review of a huge number of research papers (evidence is DOI, Serial No). Perform this:

            Given as a context is a table of analyzing trends/frameworks analysis of a huge corpus of papers specific to the columns.

            Analyze the table properly and create a professional and accurate literature review (Ensure to cite DOI as evidence).



            Subheadings for Literature Review :

            1. Introduction

               ○ Overview of the main topic or concept.

               ○ Key research questions or objectives.

            2. Theoretical Foundations

               ○ Exploration of dominant theories related to the topic.

               ○ Domain-specific theoretical applications.

            3. Contextual Analysis

               ○ Geographic contexts and challenges.

               ○ Sectoral applications and digital infrastructure readiness.

            4. Methodological Approaches

               ○ Qualitative, quantitative, and mixed-methods approaches used in research.

            5. Discussion and Future Research

               ○ Current challenges and limitations.

               ○ Potential areas for future study.

            6. Conclusion

               ○ Summary of findings.

               ○ Implications and future directions.



            TCM-ADO Framework in Research Analysis and Literature Review:

            Theory

               Theoretical foundations driving the research.

               ● Focus on identifying and analyzing the conceptual models or frameworks that underpin the study.

               ● Establish the intellectual basis and rationale for the research direction.

            Context

               Situational and environmental factors shaping the research.

               ● Emphasis on geographic, sectoral, cultural, and infrastructural dimensions influencing the implementation or findings.

               ● Examples include urban versus rural settings, digital infrastructure readiness, or policy landscapes.

               ● Objective: To understand how external conditions impact the dynamics and applicability of the research.

            Method

               Research methodologies and analytical approaches utilized.

               ● Covers the selection of qualitative, quantitative, or mixed-method approaches, along with tools and techniques employed.

               ● Objective: To ensure methodological rigor and the validity of findings.

            Antecedents

            Pre-existing conditions enabling or constraining research or implementation.

            ● Includes factors such as technological infrastructure, stakeholder preparedness, and

            regulatory frameworks.

            ● To identify critical prerequisites that influence the starting point of the research or

            initiative.

            Decisions

            Strategic choices made throughout the implementation or research process.

            ● Involves critical decision points in areas like technology adoption, governance

            frameworks, and operational strategies.

            ● analyze how informed decision-making shapes the trajectory and success of the project.

            Outcomes

            Results and impacts observed as a consequence of the initiative or study.

            ● Evaluates direct and indirect contributions to the research objectives or broader societal

            goals.

            ● assess the effectiveness and long-term implications of the research or project outcomes.

            """

            final_result = call_mistral_api(final_prompt)
            print("Final analysis generated.")

            # Display the final result
            st.subheader("Final Analysis")
            st.write(final_result)

            status_text.text("Processing complete!")
            progress_bar.progress(1.0)
            print("Processing complete.")

if __name__ == "__main__":
    main()