Spaces:

ravi6389
/

linkedin_summarizer

Runtime error

File size: 3,925 Bytes

import os
import requests
from dotenv import load_dotenv
from langchain import PromptTemplate

from langchain.chains import LLMChain
import pandas as pd
import numpy as np

import streamlit as st


from langchain_community.llms import HuggingFaceHub

from collections import OrderedDict







import torch
#from transformers import LlamaTokenizer, LlamaForCausalLM

load_dotenv()

if 'list_df' not in st.session_state:
    st.session_state['list_df'] =''

if 'e_list' not in st.session_state:
    st.session_state['e_list'] = ''
e_list = []
list_df = []

if 'profile' not in st.session_state:
    st.session_state['profile'] = ''


token = os.environ['HUGGINGFACEHUB_API_TOKEN']
token2 = os.environ['PROXYCURL_API_KEY']



def scrape_linkedin_profile(linkedin_profile_url:str):
    
    """scrape information from LinkedIn profiles,
    Manually scrape the information from the LinkedIn profile"""
    headers = {'Authorization': 'Bearer ' + token2}
    api_endpoint = 'https://nubela.co/proxycurl/api/v2/linkedin'
    

    response = requests.get(
        api_endpoint, params={"url": linkedin_profile_url}, headers=headers
    )

    data = response.json()
    data = {
        k: v
        for k, v in data.items()
        if v not in ([], "", "", None)
        and k not in ["people_also_viewed", "certifications"]
    }
    if data.get("groups"):
        for group_dict in data.get("groups"):
            group_dict.pop("profile_pic_url")

    return data
    

summary_template = """

Name of the person is {full_name}.
Given input information {information} about {full_name} from I want you to create:
Summarize {information} in 2 lines.Do not repeat.
"""

experience_template = """

Name of the person is {full_name}.
Summarize {information} in 2 lines. You have to mention names of companies where the person has worked.Do not repeat.
"""

education_template = """
Name of the person is {full_name}.
Given input information {information} about {full_name} from I want you to create:
Summarize {information} with the insitutes where education was pursued.
"""
st.write('hey there, Welcome!!!')
p1 = st.text_input('Enter the LinkedIn profile')


if st.button('Click for summary'):
    with st.spinner("Generating response.."):
    
        llm = HuggingFaceHub(repo_id="declare-lab/flan-alpaca-large",\
                        huggingfacehub_api_token = token, model_kwargs={"temperature":0, "max_length":512})
        linkedin_data1 = scrape_linkedin_profile(p1)
        full_name = linkedin_data1.get('full_name')
        
        summary_prompt_template = PromptTemplate(input_variables = ["full_name","information"],template = summary_template)
        chain = LLMChain(llm=llm, prompt = summary_prompt_template)

        if(linkedin_data1.get('summary')):
            df1 = chain.invoke({'full_name':full_name, 'information':linkedin_data1.get('summary')})
            df1 = df1.get('text')
        else:
            df1 = ''

        experience_prompt_template = PromptTemplate(input_variables = ["full_name","information"],template = experience_template)
        chain = LLMChain(llm=llm, prompt = experience_prompt_template)

        if(linkedin_data1.get('experiences')):
            df2 = chain.invoke({'full_name':full_name, 'information':linkedin_data1.get('experiences')})
            df2= df2.get('text')
        else:
            df2 = ''

        education_prompt_template = PromptTemplate(input_variables = ["full_name","information"],template = education_template)
        chain = LLMChain(llm=llm, prompt = education_prompt_template)

        if(linkedin_data1.get('education')):
            df3 = chain.invoke({'full_name':full_name, 'information':linkedin_data1.get('education')})
            df3= df3.get('text')
        else:
            df3 = ''

        
        test_string = df1+df2+df3

        y =".".join(list(OrderedDict.fromkeys(test_string.split("."))))
        st.write(df1+df2+df3)