Spaces:

cogcorp
/

homework

Sleeping

App Files Files Community

cogcorp commited on Jun 7, 2023

Commit

8d19c24

1 Parent(s): 8a2b80d

Upload 2 files

Browse files

Files changed (2) hide show

app.txt +75 -0
requirements.txt +9 -0

app.txt ADDED Viewed

	@@ -0,0 +1,75 @@

+import gradio as gr
+import pandas as pd
+import os
+import spacy
+import numpy as np
+from sklearn.feature_extraction.text import TfidfVectorizer
+from sklearn.metrics.pairwise import cosine_similarity
+from keras.preprocessing.text import text_to_word_sequence
+from joblib import Parallel, delayed
+nlp = spacy.load('en_core_web_md')
+# Define vendor_df and vectorizer as global variables
+vendor_df = None
+vectorizer = TfidfVectorizer()
+# Function to preprocess text
+def preprocess_text(text):
+    if isinstance(text, str):
+        text = text.lower()
+        filters='!"#$%&()*+,-./:;<=>?@[\\]^_`{|}~\t\n'
+        text = text.translate(str.maketrans('', '', filters))
+        stop_words = ['does', 'the', 'offer', 'do', 'you', 'require']
+        words = text_to_word_sequence(text)
+        words = [word for word in words if word not in stop_words]
+        return ' '.join(words)
+    else:
+        return text
+# Function to perform semantic search
+def semantic_search(query, vendor_vectors):
+    query = preprocess_text(query)
+    query_vector = vectorizer.transform([query])
+    cosine_similarities = cosine_similarity(query_vector, vendor_vectors).flatten()
+    return np.argmax(cosine_similarities)
+# Function to parse number from text
+def parse_number(text):
+    if isinstance(text, str):
+        return int(''.join(filter(str.isdigit, text)))
+    else:
+        return 0
+# Function to process row
+def process_row(row, vendor_vectors):
+    most_similar_index = semantic_search(row[0], vendor_vectors)
+    most_similar_row = vendor_df.iloc[most_similar_index, :]
+    combined_row = pd.concat([row, most_similar_row])
+    combined_row['score'] = combined_row['score_client'] * combined_row['score_vendor']
+    return combined_row
+# Function to process file
+def process_file(mode, file):
+    global vendor_df  # Declare vendor_df as global
+    global vectorizer  # Declare vectorizer as global
+    if mode == 'Upload Vendor File':
+        vendor_df = pd.read_excel(file.name)
+        vendor_df.iloc[:, 2] = vendor_df.iloc[:, 2].apply(preprocess_text)
+        vendor_df['score_vendor'] = vendor_df.iloc[:, 4].apply(parse_number)
+        vectorizer.fit(vendor_df.iloc[:, 2])
+        return "Vendor data file has been uploaded.", None
+    elif mode == 'Compare with Client File':
+        client_df = pd.read_excel(file.name)
+        client_df = client_df[client_df.iloc[:, 1] == 'Yes']  # Only consider rows where the second column is 'Yes'
+        client_df.iloc[:, 0] = client_df.iloc[:, 0].apply(preprocess_text)
+        client_df['score_client'] = client_df.iloc[:, 2].apply(parse_number)
+        vendor_vectors = vectorizer.transform(vendor_df.iloc[:, 2])
+        common_list = Parallel(n_jobs=-1)(delayed(process_row)(row, vendor_vectors) for index, row in client_df.iterrows())
+        common_df = pd.DataFrame(common_list)
+        common_df.to_excel('matches.xlsx', index=False)
+        return "Matching data has been saved to 'matches.xlsx'. You can download it from the link below.", os.path.abspath('matches.xlsx')
+iface = gr.Interface(fn=process_file, inputs=[gr.components.Dropdown(choices=['Upload Vendor File', 'Compare with Client File']), "file"], outputs=["text", "file"])
+iface.launch()

requirements.txt ADDED Viewed

	@@ -0,0 +1,9 @@

+gradio
+pandas
+numpy
+spacy
+scikit-learn
+keras
+joblib
+openpyxl
+xlrd