Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
|
@@ -30,9 +30,16 @@ def generate_embedding(text, tokenizer, model, device):
|
|
| 30 |
|
| 31 |
# Load dataset
|
| 32 |
@st.cache_data
|
| 33 |
-
def load_data():
|
| 34 |
dataset = load_dataset("frankjosh/filtered_dataset", split="train")
|
| 35 |
df = pd.DataFrame(dataset).head(500) # Limit to 500 repositories
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
| 36 |
return df
|
| 37 |
|
| 38 |
def fetch_readme(repo_url):
|
|
@@ -54,7 +61,7 @@ def main():
|
|
| 54 |
|
| 55 |
# Load resources
|
| 56 |
tokenizer, model, device = load_model()
|
| 57 |
-
data = load_data()
|
| 58 |
|
| 59 |
# Input user query
|
| 60 |
user_query = st.text_input("Describe your project or learning goal:",
|
|
|
|
| 30 |
|
| 31 |
# Load dataset
|
| 32 |
@st.cache_data
|
| 33 |
+
def load_data(tokenizer, model, device):
|
| 34 |
dataset = load_dataset("frankjosh/filtered_dataset", split="train")
|
| 35 |
df = pd.DataFrame(dataset).head(500) # Limit to 500 repositories
|
| 36 |
+
|
| 37 |
+
# Generate embeddings for each row
|
| 38 |
+
def compute_embedding(row):
|
| 39 |
+
text = f"{row['docstring']} {row['summary']}" if 'docstring' in row and 'summary' in row else ""
|
| 40 |
+
return generate_embedding(text, tokenizer, model, device)
|
| 41 |
+
|
| 42 |
+
df['embedding'] = df.apply(compute_embedding, axis=1)
|
| 43 |
return df
|
| 44 |
|
| 45 |
def fetch_readme(repo_url):
|
|
|
|
| 61 |
|
| 62 |
# Load resources
|
| 63 |
tokenizer, model, device = load_model()
|
| 64 |
+
data = load_data(tokenizer, model, device)
|
| 65 |
|
| 66 |
# Input user query
|
| 67 |
user_query = st.text_input("Describe your project or learning goal:",
|