coursera-assistant-3d-printing-revolution

Runtime error

rohan13 commited on Jun 9, 2023

Commit

6e37923

1 Parent(s): 9eb0222

pdf files

Files changed (4) hide show

mapping.py CHANGED Viewed

@@ -116,4 +116,6 @@ FILE_URL_MAPPING = {
         'https://www.coursera.org/learn/3d-printing-revolution/supplement/HZXB5/module-1-overview',
     'docs/02_module-1-what-is-3d-printing/02_3d-printing-insights/07_what-would-you-make-exercise_peer_assignment_instructions.html':
-        'https://www.coursera.org/learn/3d-printing-revolution/peer/t8bqq/what-would-you-make-exercise'}

         'https://www.coursera.org/learn/3d-printing-revolution/supplement/HZXB5/module-1-overview',
     'docs/02_module-1-what-is-3d-printing/02_3d-printing-insights/07_what-would-you-make-exercise_peer_assignment_instructions.html':
+        'https://www.coursera.org/learn/3d-printing-revolution/peer/t8bqq/what-would-you-make-exercise',
+    'docs/020 3DP Trend report 2023_DEF_BB.pdf':'https://www.hubs.com/get/trends/'
+}

models/openai_vs.pkl CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:807e1e7a285df094ec49ec67f44438d8a300e017fc1290073dd4c60432c9473b
-size 2513084

 version https://git-lfs.github.com/spec/v1
+oid sha256:fba431f53906d67789b21e6b7646f8cb526818db6270f29f427e6ed03e4c42b9
+size 3029176

requirements.txt CHANGED Viewed

@@ -8,4 +8,5 @@ gtts
 torch
 tiktoken
 huggingface-hub
-pymongo

 torch
 tiktoken
 huggingface-hub
+pymongo
+pypdf

utils.py CHANGED Viewed

@@ -7,7 +7,7 @@ from langchain import HuggingFaceHub
 from langchain.cache import InMemoryCache
 from langchain.chains import ConversationalRetrievalChain
 from langchain.chat_models import ChatOpenAI
-from langchain.document_loaders import DirectoryLoader, TextLoader, UnstructuredHTMLLoader
 from langchain.embeddings import OpenAIEmbeddings, HuggingFaceHubEmbeddings
 from langchain.memory import ConversationBufferWindowMemory
 from langchain.prompts.chat import (
@@ -149,6 +149,10 @@ def search_index_from_docs(source_chunks):
     return search_index
 def get_html_files():
     loader = DirectoryLoader('docs', glob="**/*.html", loader_cls=UnstructuredHTMLLoader, recursive=True)
     document_list = loader.load()
@@ -158,6 +162,7 @@ def get_html_files():
 def fetch_data_for_embeddings():
     document_list = get_text_files()
     document_list.extend(get_html_files())
     # use file_url_mapping to set metadata of document to url which has been set as the source
     for document in document_list:

 from langchain.cache import InMemoryCache
 from langchain.chains import ConversationalRetrievalChain
 from langchain.chat_models import ChatOpenAI
+from langchain.document_loaders import DirectoryLoader, TextLoader, UnstructuredHTMLLoader, PyPDFDirectoryLoader
 from langchain.embeddings import OpenAIEmbeddings, HuggingFaceHubEmbeddings
 from langchain.memory import ConversationBufferWindowMemory
 from langchain.prompts.chat import (
     return search_index
+def get_pdf_files():
+    loader = PyPDFDirectoryLoader('docs', glob="**/*.pdf", recursive=True)
+    document_list = loader.load()
+    return document_list
 def get_html_files():
     loader = DirectoryLoader('docs', glob="**/*.html", loader_cls=UnstructuredHTMLLoader, recursive=True)
     document_list = loader.load()
 def fetch_data_for_embeddings():
     document_list = get_text_files()
     document_list.extend(get_html_files())
+    document_list.extend(get_pdf_files())
     # use file_url_mapping to set metadata of document to url which has been set as the source
     for document in document_list: