Ayush Mangal commited on
Commit
9b89f54
1 Parent(s): e9240f6

Add resume loader

Browse files
Files changed (2) hide show
  1. app.py +14 -1
  2. requirements.txt +2 -1
app.py CHANGED
@@ -2,7 +2,7 @@ import streamlit as st
2
  from langchain import PromptTemplate
3
  from langchain.llms import Replicate
4
  import os
5
- from langchain.document_loaders import YoutubeLoader
6
  import requests
7
  import re
8
  from langchain.text_splitter import RecursiveCharacterTextSplitter
@@ -68,6 +68,19 @@ def get_query_chain():
68
  loader = WebBaseLoader(links)
69
  data = loader.load()
70
  video_data.extend(data)
 
 
 
 
 
 
 
 
 
 
 
 
 
71
  # print(data)
72
  text_splitter = RecursiveCharacterTextSplitter(chunk_size = 100, chunk_overlap = 0)
73
  all_splits = text_splitter.split_documents(video_data)
 
2
  from langchain import PromptTemplate
3
  from langchain.llms import Replicate
4
  import os
5
+ from langchain.document_loaders import YoutubeLoader, PyPDFLoader
6
  import requests
7
  import re
8
  from langchain.text_splitter import RecursiveCharacterTextSplitter
 
68
  loader = WebBaseLoader(links)
69
  data = loader.load()
70
  video_data.extend(data)
71
+
72
+ url = 'https://huggingface.co/spaces/ayushtues/personal-assistant/resolve/main/resume.pdf'
73
+ r = requests.get(url, stream=True)
74
+
75
+ with open('resume.pdf', 'wb') as fd:
76
+ for chunk in r.iter_content(2000):
77
+ fd.write(chunk)
78
+
79
+ loader = PyPDFLoader("resume.pdf")
80
+ pages = loader.load()
81
+ video_data.extend(pages)
82
+
83
+
84
  # print(data)
85
  text_splitter = RecursiveCharacterTextSplitter(chunk_size = 100, chunk_overlap = 0)
86
  all_splits = text_splitter.split_documents(video_data)
requirements.txt CHANGED
@@ -8,4 +8,5 @@ sentence_transformers
8
  youtube-transcript-api
9
  pytube
10
  bs4
11
- lxml
 
 
8
  youtube-transcript-api
9
  pytube
10
  bs4
11
+ lxml
12
+ pypdf