File size: 3,268 Bytes
f635cd7
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
import streamlit as st
import numpy as np
import torch
from transformers import AutoTokenizer, AutoModel, RagTokenizer, RagRetriever, RagSequenceForGeneration
from pymilvus import connections, Collection, CollectionSchema, FieldSchema, DataType
from dotenv import load_dotenv
import os

# Load environment variables
load_dotenv()
GROQ_API_KEY = os.getenv('GROQ_API_KEY')

# Initialize Milvus connection
connections.connect("default", host="localhost", port="19530")

# Define Milvus schema and collection
fields = [
    FieldSchema(name="id", dtype=DataType.INT64, is_primary=True),
    FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=768)  # Adjust the dimension based on your model
]
schema = CollectionSchema(fields, "User Data Collection")
collection = Collection(name="user_data", schema=schema)

# Load Hugging Face models
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2")
tokenizer_rag = RagTokenizer.from_pretrained("facebook/rag-sequence-nq")
retriever = RagRetriever.from_pretrained("facebook/rag-sequence-nq", index_name="custom")
model_rag = RagSequenceForGeneration.from_pretrained("facebook/rag-sequence-nq")

# Define functions
def generate_embedding(text):
    inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True)
    with torch.no_grad():
        outputs = model(**inputs)
    return outputs.last_hidden_state.mean(dim=1).numpy().tolist()[0]

def insert_data(user_id, embedding):
    collection.insert([user_id, embedding])

def retrieve_relevant_data(query):
    query_embedding = generate_embedding(query)
    search_params = {"metric_type": "L2", "params": {"nprobe": 10}}
    results = collection.search(query_embedding, "embedding", search_params)
    return results

def generate_cv(job_description, company_profile=None):
    query = job_description
    if company_profile:
        query += f" Company profile: {company_profile}"
    
    relevant_data = retrieve_relevant_data(query)
    context = " ".join([data.text for data in relevant_data])
    
    inputs = tokenizer_rag(query, return_tensors="pt")
    context_inputs = tokenizer_rag(context, return_tensors="pt")
    outputs = model_rag.generate(input_ids=inputs['input_ids'], context_input_ids=context_inputs['input_ids'])
    return tokenizer_rag.decode(outputs[0], skip_special_tokens=True)

# Streamlit UI
st.title("Custom CV Generator")

st.sidebar.header("Input Data")
skills = st.sidebar.text_input("Enter your skills")
experience = st.sidebar.text_input("Enter your experience")
education = st.sidebar.text_input("Enter your education")
job_description = st.sidebar.text_area("Enter job description")
company_profile = st.sidebar.text_area("Enter company profile (optional)")

if st.sidebar.button("Generate CV"):
    # Insert user data (assuming single user for simplicity)
    user_data = f"Skills: {skills}. Experience: {experience}. Education: {education}."
    user_id = 1  # Example user ID
    user_embedding = generate_embedding(user_data)
    insert_data(user_id, user_embedding)
    
    # Generate CV
    cv_text = generate_cv(job_description, company_profile)
    st.write("Your Tailored CV:")
    st.write(cv_text)