import os import requests from dotenv import load_dotenv from langchain import PromptTemplate from langchain.chains import LLMChain import pandas as pd import numpy as np import streamlit as st from langchain_community.llms import HuggingFaceHub from collections import OrderedDict import torch #from transformers import LlamaTokenizer, LlamaForCausalLM load_dotenv() if 'list_df' not in st.session_state: st.session_state['list_df'] ='' if 'e_list' not in st.session_state: st.session_state['e_list'] = '' e_list = [] list_df = [] if 'profile' not in st.session_state: st.session_state['profile'] = '' token = os.environ['HUGGINGFACEHUB_API_TOKEN'] token2 = os.environ['PROXYCURL_API_KEY'] def scrape_linkedin_profile(linkedin_profile_url:str): """scrape information from LinkedIn profiles, Manually scrape the information from the LinkedIn profile""" headers = {'Authorization': 'Bearer ' + token2} api_endpoint = 'https://nubela.co/proxycurl/api/v2/linkedin' response = requests.get( api_endpoint, params={"url": linkedin_profile_url}, headers=headers ) data = response.json() data = { k: v for k, v in data.items() if v not in ([], "", "", None) and k not in ["people_also_viewed", "certifications"] } if data.get("groups"): for group_dict in data.get("groups"): group_dict.pop("profile_pic_url") return data summary_template = """ Name of the person is {full_name}. Given input information {information} about {full_name} from I want you to create: Summarize {information} in 2 lines.Do not repeat. """ experience_template = """ Name of the person is {full_name}. Summarize {information} in 2 lines. You have to mention names of companies where the person has worked.Do not repeat. """ education_template = """ Name of the person is {full_name}. Given input information {information} about {full_name} from I want you to create: Summarize {information} with the insitutes where education was pursued. """ st.write('hey there, Welcome!!!') p1 = st.text_input('Enter the LinkedIn profile') if st.button('Click for summary'): with st.spinner("Generating response.."): llm = HuggingFaceHub(repo_id="declare-lab/flan-alpaca-large",\ huggingfacehub_api_token = token, model_kwargs={"temperature":0, "max_length":512}) linkedin_data1 = scrape_linkedin_profile(p1) full_name = linkedin_data1.get('full_name') summary_prompt_template = PromptTemplate(input_variables = ["full_name","information"],template = summary_template) chain = LLMChain(llm=llm, prompt = summary_prompt_template) if(linkedin_data1.get('summary')): df1 = chain.invoke({'full_name':full_name, 'information':linkedin_data1.get('summary')}) df1 = df1.get('text') else: df1 = '' experience_prompt_template = PromptTemplate(input_variables = ["full_name","information"],template = experience_template) chain = LLMChain(llm=llm, prompt = experience_prompt_template) if(linkedin_data1.get('experiences')): df2 = chain.invoke({'full_name':full_name, 'information':linkedin_data1.get('experiences')}) df2= df2.get('text') else: df2 = '' education_prompt_template = PromptTemplate(input_variables = ["full_name","information"],template = education_template) chain = LLMChain(llm=llm, prompt = education_prompt_template) if(linkedin_data1.get('education')): df3 = chain.invoke({'full_name':full_name, 'information':linkedin_data1.get('education')}) df3= df3.get('text') else: df3 = '' test_string = df1+df2+df3 y =".".join(list(OrderedDict.fromkeys(test_string.split(".")))) st.write(df1+df2+df3)