import streamlit as st from langchain.text_splitter import CharacterTextSplitter from langchain.embeddings.openai import OpenAIEmbeddings from langchain.vectorstores import FAISS from langchain.chains import LLMChain from langchain.prompts import PromptTemplate from langchain.chat_models import ChatOpenAI import os # Set up OpenAI API key OPENAI_API_KEY = "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" os.environ["OPENAI_API_KEY"] = OPENAI_API_KEY def train_model_with_transcript(transcript): """Train a language model using the transcript.""" # Split transcript into smaller chunks splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=100) docs = splitter.split_text(transcript) # Create embeddings and vector store embeddings = OpenAIEmbeddings() vectorstore = FAISS.from_texts(docs, embeddings) return vectorstore def generate_similar_content(query, vectorstore): """Generate content similar to the input query using the trained model.""" llm = ChatOpenAI(model_name="gpt-3.5-turbo") retriever = vectorstore.as_retriever() prompt_template = """ Use the context below to generate content similar to the provided input: Context: {context} Input Query: {query} Similar Content: """ prompt = PromptTemplate(input_variables=["context", "query"], template=prompt_template) chain = LLMChain(llm=llm, prompt=prompt) context = retriever.get_relevant_documents(query) context_text = " ".join([doc.page_content for doc in context]) result = chain.run({"context": context_text, "query": query}) return result # Streamlit app UI st.title("Text-based Content Generator") st.markdown("Upload a transcription file, train the model, and generate similar content.") uploaded_file = st.file_uploader("Upload Transcription File (TXT):", type=["txt"]) if uploaded_file: with st.spinner("Reading transcription file..."): transcription = uploaded_file.read().decode("utf-8") st.success("Transcription file loaded successfully!") if st.button("Train Model"): with st.spinner("Training model..."): vectorstore = train_model_with_transcript(transcription) st.success("Model trained successfully!") query = st.text_input("Enter your query to generate similar content:") if st.button("Generate Content"): if 'vectorstore' in locals(): with st.spinner("Generating content..."): result = generate_similar_content(query, vectorstore) st.success("Content generated successfully!") st.text_area("Generated Content", value=result, height=200) else: st.error("Please train the model first by uploading a transcription file.")