Spaces:
Runtime error
Runtime error
File size: 3,925 Bytes
55ce100 2182190 ffc924b 55ce100 2182190 55ce100 adef449 55ce100 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 |
import os
import requests
from dotenv import load_dotenv
from langchain import PromptTemplate
from langchain.chains import LLMChain
import pandas as pd
import numpy as np
import streamlit as st
from langchain_community.llms import HuggingFaceHub
from collections import OrderedDict
import torch
#from transformers import LlamaTokenizer, LlamaForCausalLM
load_dotenv()
if 'list_df' not in st.session_state:
st.session_state['list_df'] =''
if 'e_list' not in st.session_state:
st.session_state['e_list'] = ''
e_list = []
list_df = []
if 'profile' not in st.session_state:
st.session_state['profile'] = ''
token = os.environ['HUGGINGFACEHUB_API_TOKEN']
token2 = os.environ['PROXYCURL_API_KEY']
def scrape_linkedin_profile(linkedin_profile_url:str):
"""scrape information from LinkedIn profiles,
Manually scrape the information from the LinkedIn profile"""
headers = {'Authorization': 'Bearer ' + token2}
api_endpoint = 'https://nubela.co/proxycurl/api/v2/linkedin'
response = requests.get(
api_endpoint, params={"url": linkedin_profile_url}, headers=headers
)
data = response.json()
data = {
k: v
for k, v in data.items()
if v not in ([], "", "", None)
and k not in ["people_also_viewed", "certifications"]
}
if data.get("groups"):
for group_dict in data.get("groups"):
group_dict.pop("profile_pic_url")
return data
summary_template = """
Name of the person is {full_name}.
Given input information {information} about {full_name} from I want you to create:
Summarize {information} in 2 lines.Do not repeat.
"""
experience_template = """
Name of the person is {full_name}.
Summarize {information} in 2 lines. You have to mention names of companies where the person has worked.Do not repeat.
"""
education_template = """
Name of the person is {full_name}.
Given input information {information} about {full_name} from I want you to create:
Summarize {information} with the insitutes where education was pursued.
"""
st.write('hey there, Welcome!!!')
p1 = st.text_input('Enter the LinkedIn profile')
if st.button('Click for summary'):
with st.spinner("Generating response.."):
llm = HuggingFaceHub(repo_id="declare-lab/flan-alpaca-large",\
huggingfacehub_api_token = token, model_kwargs={"temperature":0, "max_length":512})
linkedin_data1 = scrape_linkedin_profile(p1)
full_name = linkedin_data1.get('full_name')
summary_prompt_template = PromptTemplate(input_variables = ["full_name","information"],template = summary_template)
chain = LLMChain(llm=llm, prompt = summary_prompt_template)
if(linkedin_data1.get('summary')):
df1 = chain.invoke({'full_name':full_name, 'information':linkedin_data1.get('summary')})
df1 = df1.get('text')
else:
df1 = ''
experience_prompt_template = PromptTemplate(input_variables = ["full_name","information"],template = experience_template)
chain = LLMChain(llm=llm, prompt = experience_prompt_template)
if(linkedin_data1.get('experiences')):
df2 = chain.invoke({'full_name':full_name, 'information':linkedin_data1.get('experiences')})
df2= df2.get('text')
else:
df2 = ''
education_prompt_template = PromptTemplate(input_variables = ["full_name","information"],template = education_template)
chain = LLMChain(llm=llm, prompt = education_prompt_template)
if(linkedin_data1.get('education')):
df3 = chain.invoke({'full_name':full_name, 'information':linkedin_data1.get('education')})
df3= df3.get('text')
else:
df3 = ''
test_string = df1+df2+df3
y =".".join(list(OrderedDict.fromkeys(test_string.split("."))))
st.write(df1+df2+df3) |