Spaces:
Runtime error
Runtime error
import streamlit as st | |
import os | |
from dotenv import load_dotenv | |
from langchain.document_loaders import GithubFileLoader | |
# from langchain.embeddings import HuggingFaceEmbeddings | |
from langchain_huggingface import HuggingFaceEmbeddings | |
from langchain_community.vectorstores import FAISS | |
from langchain_text_splitters import CharacterTextSplitter | |
load_dotenv() | |
#get the GITHUB_ACCESS_TOKEN from the .env file | |
GITHUB_ACCESS_TOKEN = os.getenv("GITHUB_ACCESS_TOKEN") | |
GITHUB_BASE_URL = "https://github.com/" | |
def get_hugging_face_model(): | |
model_name = "mchochlov/codebert-base-cd-ft" | |
hf = HuggingFaceEmbeddings(model_name=model_name) | |
return hf | |
def get_similar_files(query, db, embeddings): | |
docs_and_scores = db.similarity_search_with_score(query) | |
return docs_and_scores | |
# STREAMLIT INTERFACE | |
st.title("Find Similar Code") | |
st.markdown("This app takes a code sample you provide, and finds similar code in a Github repository.") | |
st.markdown("This functionality could ideally be implemented across multiple repos to allow you to find helpful examples of how to implement the code you are working on writing, or identify other code contributors who could help you resolve your issues") | |
USER = st.text_input("Enter the Github User", value = "heaversm") | |
REPO = st.text_input("Enter the Github Repository", value = "gdrive-docker") | |
FILE_TYPES_TO_LOAD = st.multiselect("Select File Types", [".py", ".ts",".js",".css",".html"], default = [".py"]) | |
text_input = st.text_area("Enter a Code Example", value = | |
""" | |
def create_app(): | |
app = connexion.FlaskApp(__name__, specification_dir="../.openapi") | |
app.add_api( | |
API_VERSION, resolver=connexion.resolver.RelativeResolver("provider.app") | |
) | |
""", height = 330 | |
) | |
button = st.button("Find Similar Code") | |
if button: | |
loader = GithubFileLoader( | |
repo=f"{USER}/{REPO}", | |
access_token=GITHUB_ACCESS_TOKEN, | |
github_api_url="https://api.github.com", | |
file_filter=lambda file_path: file_path.endswith( | |
tuple(FILE_TYPES_TO_LOAD) | |
) | |
) | |
documents = loader.load() | |
text_splitter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0) | |
docs = text_splitter.split_documents(documents) | |
embedding_vector = get_hugging_face_model() | |
db = FAISS.from_documents(docs, embedding_vector) | |
query = text_input | |
results_with_scores = get_similar_files(query, db, embedding_vector) | |
for doc, score in results_with_scores: | |
print(f"Path: {doc.metadata['path']}, Score: {score}") | |
top_file_path = results_with_scores[0][0].metadata['path'] | |
top_file_content = results_with_scores[0][0].page_content | |
top_file_score = results_with_scores[0][1] | |
top_file_link = f"{GITHUB_BASE_URL}{USER}/{REPO}/blob/main/{top_file_path}" | |
# write a clickable link in streamlit | |
st.markdown(f"[Top file link]({top_file_link})") | |
else: | |
st.info("Please Submit a Code Sample to Find Similar Code") |