File size: 2,539 Bytes
5d14cc6
50d1ce2
4ba3023
ec1c0d9
 
 
 
 
 
 
 
 
 
 
 
2afa0ec
 
 
 
5d14cc6
 
 
 
bd94ab4
c276872
5d14cc6
 
bd94ab4
 
5d14cc6
2afa0ec
c276872
 
50d1ce2
 
 
 
 
2afa0ec
 
 
50d1ce2
 
 
 
 
 
5d14cc6
 
 
 
 
ec1c0d9
 
86551a1
ec1c0d9
 
4f86a6f
ec1c0d9
 
a91875b
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a5f868b
ec1c0d9
 
4ba3023
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import asyncio
from pydantic_ai.result import ResultData, RunResult
import streamlit as st
from pydantic_ai import Agent
from pydantic_ai.models.groq import GroqModel
import nest_asyncio
import pdfplumber
import os

api_key = os.getenv("API_KEY")
data = []


model = GroqModel('llama-3.1-70b-versatile', api_key = api_key)

def split_long_string(long_string, chunk_size=6000):
    return [long_string[i:i+chunk_size] for i in range(0, len(long_string), chunk_size)]


async def ppt_content(data):
    agent = Agent(model,system_prompt=(
        "You are an expert in making power-point perssentation",
        "Title Slide: Include the document's title, subtitle, author, and date.",
        "Methodology Slide: Summarize the methodology in detail",
        "Results Slide: Present key findings in detail in simple text and bullet points.",
        "Discussion Slide: Summarize the implications and limitations.",
        "Conclusion Slide: State the overall conclusion.",
        "Reference Slide: Include all citations used.",
        "Create 6 sliders",
    ))
    listOfString = split_long_string("".join(data))
    print(len(listOfString))

    # for x in listOfString:
    #     result: RunResult[str] = RunResult(_all_messages:[])
    #     result = agent.run_sync(user_prompt = x,message_history = result.new_messages())
    
    
    
    result_1 = agent.run_sync(user_prompt = listOfString[0])
    result_2 = agent.run_sync(user_prompt = listOfString[1],message_history=result_1.new_messages())
    result_3 = agent.run_sync(user_prompt = listOfString[2],message_history=result_2.new_messages())
    result_4 = agent.run_sync(user_prompt = listOfString[3],message_history=result_3.new_messages())
    result_5 = agent.run_sync(user_prompt = listOfString[4],message_history=result_4.new_messages())
    result_6 = agent.run_sync(user_prompt = listOfString[5],message_history=result_5.new_messages())

    print(result_6.data)

def ai_ppt(data):
    asyncio.run(ppt_content(data=data))


def extract_data(feed):
    
    with pdfplumber.open(feed) as pdf:
        pages = pdf.pages
        for p in pages:
            data.append(p.extract_text())
    return None 



# if data is not None:
#     st.caption(data)
#     ai_ppt(data=data)

def main():
    uploaded_file = st.file_uploader('Choose your .pdf file', type="pdf")
    if uploaded_file is not None:
        extract_data(uploaded_file)
    if st.button("Search"):
        ai_ppt(data)

if __name__ == '__main__':
    import asyncio
    nest_asyncio.apply()
    main()