Spaces:
Sleeping
Sleeping
import streamlit as st | |
import numpy as np | |
import torch | |
from transformers import AutoTokenizer, AutoModel, RagTokenizer, RagRetriever, RagSequenceForGeneration | |
from pymilvus import connections, Collection, CollectionSchema, FieldSchema, DataType | |
from dotenv import load_dotenv | |
import os | |
# Load environment variables | |
load_dotenv() | |
GROQ_API_KEY = os.getenv('GROQ_API_KEY') | |
# Initialize Milvus connection | |
connections.connect("default", host="localhost", port="19530") | |
# Define Milvus schema and collection | |
fields = [ | |
FieldSchema(name="id", dtype=DataType.INT64, is_primary=True), | |
FieldSchema(name="embedding", dtype=DataType.FLOAT_VECTOR, dim=768) # Adjust the dimension based on your model | |
] | |
schema = CollectionSchema(fields, "User Data Collection") | |
collection = Collection(name="user_data", schema=schema) | |
# Load Hugging Face models | |
tokenizer = AutoTokenizer.from_pretrained("sentence-transformers/all-MiniLM-L6-v2") | |
model = AutoModel.from_pretrained("sentence-transformers/all-MiniLM-L6-v2") | |
tokenizer_rag = RagTokenizer.from_pretrained("facebook/rag-sequence-nq") | |
retriever = RagRetriever.from_pretrained("facebook/rag-sequence-nq", index_name="custom") | |
model_rag = RagSequenceForGeneration.from_pretrained("facebook/rag-sequence-nq") | |
# Define functions | |
def generate_embedding(text): | |
inputs = tokenizer(text, return_tensors="pt", truncation=True, padding=True) | |
with torch.no_grad(): | |
outputs = model(**inputs) | |
return outputs.last_hidden_state.mean(dim=1).numpy().tolist()[0] | |
def insert_data(user_id, embedding): | |
collection.insert([user_id, embedding]) | |
def retrieve_relevant_data(query): | |
query_embedding = generate_embedding(query) | |
search_params = {"metric_type": "L2", "params": {"nprobe": 10}} | |
results = collection.search(query_embedding, "embedding", search_params) | |
return results | |
def generate_cv(job_description, company_profile=None): | |
query = job_description | |
if company_profile: | |
query += f" Company profile: {company_profile}" | |
relevant_data = retrieve_relevant_data(query) | |
context = " ".join([data.text for data in relevant_data]) | |
inputs = tokenizer_rag(query, return_tensors="pt") | |
context_inputs = tokenizer_rag(context, return_tensors="pt") | |
outputs = model_rag.generate(input_ids=inputs['input_ids'], context_input_ids=context_inputs['input_ids']) | |
return tokenizer_rag.decode(outputs[0], skip_special_tokens=True) | |
# Streamlit UI | |
st.title("Custom CV Generator") | |
st.sidebar.header("Input Data") | |
skills = st.sidebar.text_input("Enter your skills") | |
experience = st.sidebar.text_input("Enter your experience") | |
education = st.sidebar.text_input("Enter your education") | |
job_description = st.sidebar.text_area("Enter job description") | |
company_profile = st.sidebar.text_area("Enter company profile (optional)") | |
if st.sidebar.button("Generate CV"): | |
# Insert user data (assuming single user for simplicity) | |
user_data = f"Skills: {skills}. Experience: {experience}. Education: {education}." | |
user_id = 1 # Example user ID | |
user_embedding = generate_embedding(user_data) | |
insert_data(user_id, user_embedding) | |
# Generate CV | |
cv_text = generate_cv(job_description, company_profile) | |
st.write("Your Tailored CV:") | |
st.write(cv_text) | |