Spaces:

0504ankitsharma
/

ThaparGPT-Anthropic

Sleeping

App Files Files Community

0504ankitsharma commited on Jul 21, 2024

Commit

ce5090a

1 Parent(s): a8db048

Add application file

Browse files

Files changed (3) hide show

Dockerfile +16 -0
app.py +159 -0
requirements.txt +10 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,16 @@

+# read the doc: https://huggingface.co/docs/hub/spaces-sdks-docker
+# you will also find guides on how best to write your Dockerfile
+FROM python:3.9
+RUN useradd -m -u 1000 user
+USER user
+ENV PATH="/home/user/.local/bin:$PATH"
+WORKDIR /app
+COPY --chown=user ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir --upgrade -r requirements.txt
+COPY --chown=user . /app
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

app.py ADDED Viewed

	@@ -0,0 +1,159 @@

+import os
+from fastapi import FastAPI, HTTPException
+from fastapi.middleware.cors import CORSMiddleware
+from pydantic import BaseModel
+from langchain.embeddings import OpenAIEmbeddings
+from langchain.chat_models import ChatOpenAI
+from langchain.vectorstores import Pinecone
+from langchain.text_splitter import RecursiveCharacterTextSplitter
+from langchain.document_loaders import UnstructuredWordDocumentLoader as DocxLoader
+from langchain.chains import ConversationalRetrievalChain
+from langchain.prompts import ChatPromptTemplate
+from langchain.memory import ConversationBufferMemory
+from pinecone import Pinecone as PC, ServerlessSpec
+import time
+import re
+from dotenv import load_dotenv
+# Load environment variables
+load_dotenv()
+app = FastAPI()
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_credentials=True,
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+# Initialize Pinecone
+pinecone_api_key = os.environ.get("PINECONE_API_KEY")
+if not pinecone_api_key:
+    raise HTTPException(status_code=500, detail="PINECONE_API_KEY environment variable is not set")
+try:
+    pc = PC(api_key=pinecone_api_key)
+except Exception as e:
+    raise HTTPException(status_code=500, detail=f"Failed to initialize Pinecone: {str(e)}")
+index_name = "rag-project"  # Replace with your actual index name
+# Initialize OpenAI
+openai_api_key = os.environ.get("OPENAI_API_KEY")
+if not openai_api_key:
+    raise HTTPException(status_code=500, detail="OPENAI_API_KEY environment variable is not set")
+try:
+    embeddings = OpenAIEmbeddings(openai_api_key=openai_api_key)
+    llm = ChatOpenAI(api_key=openai_api_key, model="gpt-4")
+except Exception as e:
+    raise HTTPException(status_code=500, detail=f"Failed to initialize OpenAI: {str(e)}")
+class Query(BaseModel):
+    query_text: str
+    session_id: str
+def clean_response(response):
+    cleaned = response.strip()
+    cleaned = re.sub(r'^["\']+|["\']+$', '', cleaned)
+    cleaned = re.sub(r'\n+', '\n', cleaned)
+    cleaned = cleaned.replace('\\n', '')
+    return cleaned
+prompt = ChatPromptTemplate.from_template(
+"""
+You are a helpful assistant designed specifically for the Thapar Institute of Engineering and Technology (TIET), a renowned technical college. Your task is to answer all queries related to TIET. Every response you provide should be relevant to the context of TIET. If a question falls outside of this context, please decline by stating, 'Sorry, I cannot help with that.' If you do not know the answer to a question, do not attempt to fabricate a response; instead, politely decline.
+You may elaborate on your answers slightly to provide more information, but avoid sounding boastful or exaggerating. Stay focused on the context provided.
+Previous conversation:
+{chat_history}
+Context: {context}
+Human: {question}
+Assistant: Let's approach this step-by-step:
+"""
+)
+# Store conversation histories
+conversation_histories = {}
+@app.get("/")
+def read_root():
+    return {"Hello": "World"}
+@app.post("/query")
+def read_item(query: Query):
+    try:
+        vectorstore = Pinecone.from_existing_index(index_name, embeddings)
+    except Exception as e:
+        print(f"Error loading vector store: {str(e)}")
+        return {"response": "Vector Store Not Found or Error Loading. Please run /setup first."}
+    if query.query_text:
+        start = time.process_time()
+        # Get or create a new conversation memory for this session
+        if query.session_id not in conversation_histories:
+            conversation_histories[query.session_id] = ConversationBufferMemory(
+                memory_key="chat_history",
+                return_messages=True
+            )
+        memory = conversation_histories[query.session_id]
+        qa_chain = ConversationalRetrievalChain.from_llm(
+            llm=llm,
+            retriever=vectorstore.as_retriever(),
+            memory=memory,
+            combine_docs_chain_kwargs={"prompt": prompt}
+        )
+        response = qa_chain({"question": query.query_text})
+        print("Response time:", time.process_time() - start)
+        cleaned_response = clean_response(response['answer'])
+        print("Cleaned response:", repr(cleaned_response))
+        return {"response": cleaned_response}
+    else:
+        return {"response": "No Query Found"}
+@app.get("/setup")
+def setup():
+    try:
+        file_path = "./data/data.docx"
+        if not os.path.exists(file_path):
+            print(f"The file {file_path} does not exist.")
+            return {"response": "Error: Data file not found"}
+        loader = DocxLoader(file_path)
+        documents = loader.load()
+        print(f"Loaded document: {file_path}")
+        text_splitter = RecursiveCharacterTextSplitter(chunk_size=500, chunk_overlap=100)
+        chunks = text_splitter.split_documents(documents)
+        print(f"Created {len(chunks)} chunks.")
+        # Check if the index exists, if not, create it
+        if index_name not in pc.list_indexes().names():
+            pc.create_index(
+                name=index_name,
+                dimension=1536,  # This should match the dimension of your embeddings
+                metric='cosine',
+                spec=ServerlessSpec(cloud='aws', region='us-west-2')  # Adjust as needed
+            )
+        vectorstore = Pinecone.from_documents(chunks, embeddings, index_name=index_name)
+        print("Vector store created and saved successfully.")
+        return {"response": "Vector Store in Pinecone Is Ready"}
+    except Exception as e:
+        print(f"An error occurred: {str(e)}")
+        return {"response": f"Error: {str(e)}"}
+if __name__ == "__main__":
+    import uvicorn
+    uvicorn.run(app, host="0.0.0.0", port=8000)

requirements.txt ADDED Viewed

	@@ -0,0 +1,10 @@

+fastapi
+uvicorn
+langchain
+langchain_openai
+pinecone-client
+python-dotenv
+langchain_community
+unstructured[pdf]
+python-docx
+openai