File size: 3,774 Bytes
5d14cc6
c8ecbd4
f78e807
c8ecbd4
4ba3023
c8ecbd4
 
 
 
 
ec1c0d9
c52e882
d553fab
c8ecbd4
a8e9d4c
c8ecbd4
ec1c0d9
c8ecbd4
 
ec1c0d9
c8ecbd4
 
0e1f166
c8ecbd4
 
6993c74
f2fb591
185bc0f
f2fb591
 
 
 
c8ecbd4
2afa0ec
299c4e4
c8ecbd4
 
299c4e4
fae7389
 
c8ecbd4
2afa0ec
5d14cc6
c8ecbd4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2211ca4
8028338
c8ecbd4
 
b8dc120
c8ecbd4
766236b
c8ecbd4
 
c52e882
c8ecbd4
5d14cc6
 
c8ecbd4
 
 
 
5d14cc6
ec1c0d9
c8ecbd4
 
 
86551a1
c8ecbd4
 
 
 
a91875b
 
c8ecbd4
 
 
 
0042245
a91875b
 
612c0bf
c8ecbd4
 
 
 
 
 
 
 
612c0bf
c8ecbd4
a91875b
 
 
a5f868b
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
import asyncio
import os
import re
import pdfplumber
import streamlit as st
import torch
from transformers import pipeline
from dataclasses import dataclass
from streamlit_pdf_viewer import pdf_viewer
from pydantic_ai import Agent, RunContext, Tool
from pydantic_ai.models.groq import GroqModel
from pydantic_ai.messages import ModelMessage
import presentation as customClass
import nest_asyncio

# Load API key
api_key = os.getenv("API_KEY")
if not api_key:
    raise ValueError("API_KEY is not set in the environment variables.")

data = []
result_data: list[customClass.PPT] = []

# Initialize models
model = GroqModel("llama3-groq-70b-8192-tool-use-preview", api_key=api_key)
summarizer = pipeline("summarization", model="facebook/bart-large-cnn")

def split_into_token_chunks(text: str, max_tokens: int = 300) -> list:
    """
    Splits a long string into chunks of a specified maximum number of tokens (words).
    """
    tokens = text.split()
    return [' '.join(tokens[i:i + max_tokens]) for i in range(0, len(tokens), max_tokens)]

def return_data() -> str:
    """Returns concatenated extracted data."""
    return "\n".join(data)

@dataclass
class SupportDependencies:
    db: str

async def ppt_content(data):
    """
    Generates PowerPoint content using an AI model.
    """
    if not data:
        raise ValueError("No valid text found for PowerPoint generation.")

    agent = Agent(
        model,
        result_type=customClass.PPT,
        tools=[return_data],
        system_prompt="""
        You are an expert in creating PowerPoint presentations.
        Create 5 slides:
        1. Title Slide: Introduction about the presentation.
        2. Methodology Slide: Summarize the methodology in detail.
        3. Results Slide: Present key findings in bullet points.
        4. Discussion Slide: Summarize implications and limitations.
        5. Conclusion Slide: State the overall conclusion.
        
        Each slide should have:
        - Title: Clear and concise.
        - Text: Short and informative explanation.
        - Bullet Points: 3-5 summarized key takeaways.
        """
    )

    listOfString = split_into_token_chunks("\n".join(data))
    message_history: list[ModelMessage] = []

    result = agent.run_sync(user_prompt=f"Create a PowerPoint presentation from {listOfString[0]}", message_history=message_history)
    
    for i in range(1, len(listOfString)):
        result = agent.run_sync(user_prompt=f"Continue creating the PowerPoint presentation from {listOfString[i]}", message_history=result.all_messages())

    print(result.data)

def ai_ppt(data):
    """Runs the PowerPoint generation in an async loop."""
    loop = asyncio.new_event_loop()
    asyncio.set_event_loop(loop)
    loop.run_until_complete(ppt_content(data=data))

def extract_data(feed):
    """Extracts text from PDF and appends to `data` list."""
    global data
    data = []  # Reset data before extracting
    with pdfplumber.open(feed) as pdf:
        for p in pdf.pages:
            text = p.extract_text()
            if text:
                data.append(text)

def main():
    """Main Streamlit app function."""
    st.title("AI-Powered PowerPoint Generator")
    
    uploaded_file = st.file_uploader("Choose a PDF file", type="pdf")
    
    if uploaded_file is not None:
        extract_data(uploaded_file)
        
        if st.button("Generate PPT"):
            try:
                ai_ppt(data)
                st.success("PowerPoint generation completed!")
            except Exception as e:
                st.error(f"Error generating PPT: {e}")

        # Display PDF
        binary_data = uploaded_file.getvalue()
        pdf_viewer(input=binary_data, width=700)

if __name__ == '__main__':
    nest_asyncio.apply()
    main()