Spaces:

aibmedia
/

aibsimilarityllm

Sleeping

App Files Files Community

aibmedia commited on Dec 30, 2024

Commit

9500a11

verified ·

1 Parent(s): eb98b33

Update main.py

Browse files

Files changed (1) hide show

main.py +223 -270

main.py CHANGED Viewed

@@ -2,6 +2,7 @@ import os , json
 from flask import Flask, render_template
 import threading
 import time
 import requests
 from langchain_core.tools import Tool
@@ -9,8 +10,6 @@ from langchain_google_community import GoogleSearchAPIWrapper, search
 from langchain_community.tools import DuckDuckGoSearchResults
 from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
 API_URL0 = "https://api-inference.huggingface.co/models/sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
@@ -19,7 +18,6 @@ API_URL2 = "https://api-inference.huggingface.co/models/sentence-transformers/al
 API_URL3 = "https://api-inference.huggingface.co/models/Snowflake/snowflake-arctic-embed-l-v2.0"
 # API_URL4 = "https://api-inference.huggingface.co/models/sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
 search = GoogleSearchAPIWrapper()
@@ -47,62 +45,175 @@ def server_one():
 @app.route('/')
 async def server_1():
-    # check docs first then check similarity
-    query_sentence = "Obama's first name"
     duck_results = []
     all_results = []
     try:
-        searchduck = DuckDuckGoSearchResults(output_format="list", max_results=5, num_results=5)
         duck_results = searchduck.invoke(query_sentence)
-        if type(duck_results) == list :
-            all_results = duck_results
     except:
         print("An exception occurred")
         duck_results = []
     tool = Tool(
         name="google_search",
         description="Search Google for recent results.",
         func=search.run,
     )
     try:
         google_results = search.results( query_sentence , 10 )
         print("type(duck_results)")
         print(type(duck_results))
         print(type(all_results))
-        if type(google_results) == list :
-            all_results = all_results + google_results
     except:
         print("An exception occurred")
     # get the snippet put into list
-    split_query_words = query_sentence.split(); important_keywords = []
     for x in split_query_words:
-        if x.isupper():
-            important_keywords.append(x)
-        if len(x) > 3 & x.isupper() == False:
             important_keywords.append(x)
-    # pull pages and split each html and count occurance of important keywords here & check snipp if snipp occurs between . and <p> its good not img
-    #
-    # get the longest word in sentence
-    # res = "" ; iteratorx = 0
-    # for word in split_query_words:
-    #     if len(word) > len(res):
-    #         res = word
-    # get google 20 items
-    # get user query in the url param
-    # truncate 130 characters
-    # if still no passed 2x4 matrix in log print increase chars by 200
-    # payload = {  "inputs": {  "source_sentence": "That is a green painted house",  "sentences": ["The house paint is green",  "That house is green","That house was painted","The house is green", "The house was bought yesterday", "This house was painted green",   "That house looks green",   "Today the house is clean "  ] } , }
-    # payload = {  "inputs": {  "source_sentence": "Manila is the capital city of the Philippines",  "sentences": ["The current capital city, Manila, has been the countrys capital throughout most of its history and regained the title through a presidential order in 1976",  "Manila officially the City of Manila (Filipino: Lungsod ng Maynila), is the capital and second-most populous city of the Philippines, after Quezon City.", "Dis 4, 2024 — Manila, capital and chief city of the Philippines. The city is the centre of the countrys economic, political, social, and cultural activity.", "Quezon City is the capital of the Philippines", "Manila is the capital of the philippines", "For sometime Manila has been the capital of of the Philippines" , "What is the capital of Philippines", "Manila is not the capital of the Phillipines",   "Quezon city was the capital of the Philippines, until President Ferdinand Marcos Sr. moved the capital to back to Manila. "  ] } , }
-    # payload = {  "inputs": {  "source_sentence": "Manila is the capital city of the Philippines",  "sentences": ["The current capital city, Manila, has been the country's capital throughout most of its history and regained the title through a presidential order in 1976"] } , }
     payload = {  "inputs": {  "source_sentence": "Manila is the capital city of the Philippines",  "sentences": ["The current capital city, Manila, has been the countrys capital throughout most","Manila officially the City of Manila (Filipino: Lungsod ng Maynila),","Dis 4, 2024 — Manila, capital and chief city of the Philippines. The city is the centre ","Quezon City is the capital of the Philippines","Manila is the capital of the philippines","For sometime Manila has been the capital of of the Philippines" ,"What is the capital of Philippines","Manila is not the capital of the Phillipines","Quezon city was the capital of the Philippines, until President Ferdinand "] } , }
     response0 =  requests.post(API_URL0, headers=headers, json=payload)
@@ -133,7 +244,7 @@ async def server_1():
     if varcontinue_similarity == 1 :
         # call processing with 10 google search result or 15 search results
         if len(all_results) == 10 :
-            result_processed = process_similarity_10(sorted0, sorted1, sorted2, sorted3,response0.json(), response1.json(), response2.json(), response3.json()  )
         if len(all_results) > 10 :
             result_processed = process_similarity_15(sorted0, sorted1, sorted2, sorted3,response0.json(), response1.json(), response2.json(), response3.json()  )
     # return all_results
@@ -154,8 +265,15 @@ def process_similarity_15(sorted0, sorted1, sorted2, sorted3, actualscore0, actu
     print("length")
     # print(len(similarity_scores))
     key_index = 0
-    #copy + loop to get index
     print("the sorted0-3")
     print(sorted0)
     print(sorted1)
@@ -219,251 +337,86 @@ def process_similarity_15(sorted0, sorted1, sorted2, sorted3, actualscore0, actu
                     print("sorted_with_index")
                     print(sorted3_with_index)
     print("sorted0-3_with_index")
     print(sorted0_with_index)
-    print(sorted1_with_index)
     print(sorted2_with_index)
     print(sorted3_with_index)
-    index_sorted0 = [] ; index_sorted1 = [] ; index_sorted2 = [] ; index_sorted3 = []
-    # lines 158 onwards is about scenario when sorted0_with_index values are greater then .78
-    # then combine the top 5 values from each list to get the top 3 of 4 llm
-    varcontinue = False
-    # we will only continue if each llm has resulted with values greater than .78 & if these llm result list has at least 2
-    if ( len(sorted1_with_index) >= 2 and len(sorted2_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or  ( len(sorted0_with_index) >= 2 and len(sorted2_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or  ( len(sorted1_with_index) >= 2 and len(sorted0_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or  ( len(sorted1_with_index) >= 2 and len(sorted2_with_index) >= 2 and len(sorted0_with_index) >= 2 ):
-            # continue variable set to true
-            # indent this here  # if( check if avarage of each any3 resp_list0-3 average is 0.85 or above ) !!then only continue!!!
-        varcontinue = True
-        print("continue variable set to true")
-    if ( len(sorted0_with_index) >= 2 and len(sorted1_with_index) >= 2 ) or ( len(sorted0_with_index) >= 2 and len(sorted2_with_index) >= 2 ) or  ( len(sorted0_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or  ( len(sorted1_with_index) >= 2 and len(sorted2_with_index) >= 2 ) or  ( len(sorted1_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or  ( len(sorted2_with_index) >= 2 and len(sorted3_with_index) >= 2 ):
-            # continue variable set to true
-                 # if( check if avarage of any3 resp_list0-3 average is 0.85 or above )!!then only continue!!!
-        varcontinue = True
-        print("continue variable set to true")
-        # check if llm 1 - 3 has minimum 3
-    if varcontinue == True:
-        if len(sorted0_with_index) < 5 and len(sorted0_with_index) > 0 :
-            print("len(sorted0_with_index) < 5 and len(sorted0_with_index) > 0")
-            print(len(sorted0_with_index) < 5 and len(sorted0_with_index) > 0)
-            print("sorted0_with_index")
-            print(sorted0_with_index)
-            for x in sorted0_with_index :
-                index_sorted0.append(x)
-            remaining_padding = 5 - len(index_sorted0)
-            while remaining_padding > 0 :
-                remaining_padding= remaining_padding - 1
-                index_sorted0.append(index_sorted0[0])
-        if len(sorted1_with_index) < 5 and len(sorted1_with_index) > 0 :
-            print("len(sorted1_with_index) < 5 and len(sorted1_with_index) > 0")
-            print(len(sorted1_with_index) < 5 and len(sorted1_with_index) > 0)
-            print("sorted1_with_index")
-            print(sorted1_with_index)
-            for x in sorted1_with_index :
-                index_sorted1.append(x)
-            remaining_padding = 5 - len(index_sorted1)
-            while remaining_padding > 0 :
-                remaining_padding= remaining_padding - 1
-                index_sorted1.append(index_sorted1[0])
-        if len(sorted2_with_index) < 5 and len(sorted2_with_index) > 0 :
-            print("len(sorted2_with_index) < 5 and len(sorted2_with_index) > 0")
-            print(len(sorted2_with_index) < 5 and len(sorted2_with_index) > 0)
-            print("sorted2_with_index")
-            print(sorted2_with_index)
-            for x in sorted2_with_index :
-                index_sorted2.append(x)
-            remaining_padding = 5 - len(index_sorted2)
-            while remaining_padding > 0 :
-                remaining_padding= remaining_padding - 1
-                index_sorted2.append(index_sorted2[0])
-        if len(sorted3_with_index) < 5 and len(sorted3_with_index) > 0 :
-                print("len(sorted3_with_index) < 5 and len(sorted3_with_index) > 0")
-                print(len(sorted3_with_index) < 5 and len(sorted3_with_index) > 0)
-                print("sorted3_with_index")
-                print(sorted3_with_index)
-                for x in sorted3_with_index :
-                    index_sorted3.append(x)
-                remaining_padding = 5 - len(index_sorted3)
-                while remaining_padding > 0 :
-                    remaining_padding= remaining_padding - 1
-                    index_sorted3.append(index_sorted3[0])
-        print("index_sorted0-1")
-        print(index_sorted0)
-        print(index_sorted1)
-        print(index_sorted2)
-        print(index_sorted3)
-    else:
-        print("No reliable similarity found by 4 llms")
-    return str( index_sorted0 ) + "," + str( index_sorted1 ) + "," + str( index_sorted2 ) + "," + str( index_sorted3 )
-def process_similarity_10(sorted0, sorted1, sorted2, sorted3, actualscore0, actualscore1, actualscore2, actualscore3):
-    # print(similarity_scores)
-    # print(type(similarity_scores))
-    print("length")
-    # print(len(similarity_scores))
-    key_index = 0
-    #copy + loop to get index
-    print("the sorted0-3")
-    print(sorted0)
-    print(sorted1)
-    print(sorted2)
-    print(sorted3)
-    print("end the sorted0-3")
-    # Get the index of the sorted list for resp_list0
-    sorted0_with_index = []
-    for x in sorted0:
-        for y in actualscore0:
-            if x == y:
-                print("index of sorted0")
-                print(actualscore0.index(y))
-                if x > 0.90:
-                    sorted0_with_index.append(actualscore0.index(y))
-                    print("sorted_with_index")
-                    print(sorted0_with_index)
-    print("sorted0_with_index")
-    print(sorted0_with_index)
-    sorted1_with_index = []
-    for x in sorted1:
-        for y in actualscore1:
-            if x == y:
-                print("index of sorted1")
-                print(actualscore1.index(y))
-                if y > 0.90:
-                    sorted1_with_index.append(actualscore1.index(y))
-                    print("sorted_with_index")
-                    print(sorted1_with_index)
-    print("sorted1_with_index")
-    print(sorted1_with_index)
-    sorted2_with_index = []
-    print("b4 for x in sorted2:")
-    print("resp_list2:" + str(actualscore2))
-    print("sorted:" + str(sorted2))
-    for x in sorted2:
-        for y in actualscore2:
-            if x == y:
-                print("index of sorted2")
-                print(actualscore2.index(y))
-                if y > 0.90:
-                    sorted2_with_index.append(actualscore2.index(y))
-                    print("sorted_with_index")
-                    print(sorted2_with_index)
-    print("sorted2_with_index")
-    print(sorted2_with_index)
-    sorted3_with_index = []
-    print("b4 for x in sorted3:")
-    print("resp_list3:" + str(actualscore3))
-    for x in sorted3:
-        for y in actualscore3:
-            if x == y:
-                print("index of sorted3")
-                print(actualscore3.index(y))
-                if y > 0.90:
-                    sorted3_with_index.append(actualscore3.index(y))
-                    print("sorted_with_index")
-                    print(sorted3_with_index)
-    print("sorted0-3_with_index")
-    print(sorted0_with_index)
-    print(sorted1_with_index)
-    print(sorted2_with_index)
-    print(sorted3_with_index)
-    index_sorted0 = [] ; index_sorted1 = [] ; index_sorted2 = [] ; index_sorted3 = []
-    # lines 158 onwards is about scenario when sorted0_with_index values are greater then .78
-    # then combine the top 5 values from each list to get the top 3 of 4 llm
-    varcontinue = False
-    # we will only continue if each llm has resulted with values greater than .78 & if these llm result list has at least 2
-    if ( len(sorted1_with_index) >= 2 and len(sorted2_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or  ( len(sorted0_with_index) >= 2 and len(sorted2_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or  ( len(sorted1_with_index) >= 2 and len(sorted0_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or  ( len(sorted1_with_index) >= 2 and len(sorted2_with_index) >= 2 and len(sorted0_with_index) >= 2 ):
-            # continue variable set to true
-            # indent this here  # if( check if avarage of each any3 resp_list0-3 average is 0.85 or above ) !!then only continue!!!
-        varcontinue = True
-        print("continue variable set to true")
-    if ( len(sorted0_with_index) >= 2 and len(sorted1_with_index) >= 2 ) or ( len(sorted0_with_index) >= 2 and len(sorted2_with_index) >= 2 ) or  ( len(sorted0_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or  ( len(sorted1_with_index) >= 2 and len(sorted2_with_index) >= 2 ) or  ( len(sorted1_with_index) >= 2 and len(sorted3_with_index) >= 2 ) or  ( len(sorted2_with_index) >= 2 and len(sorted3_with_index) >= 2 ):
-            # continue variable set to true
-                 # if( check if avarage of any3 resp_list0-3 average is 0.85 or above )!!then only continue!!!
-        varcontinue = True
-        print("continue variable set to true")
-        # check if llm 1 - 3 has minimum 3
-    if varcontinue == True:
-        if len(sorted0_with_index) < 5 and len(sorted0_with_index) > 0 :
-            print("len(sorted0_with_index) < 5 and len(sorted0_with_index) > 0")
-            print(len(sorted0_with_index) < 5 and len(sorted0_with_index) > 0)
-            print("sorted0_with_index")
-            print(sorted0_with_index)
-            for x in sorted0_with_index :
-                index_sorted0.append(x)
-            remaining_padding = 5 - len(index_sorted0)
-            while remaining_padding > 0 :
-                remaining_padding= remaining_padding - 1
-                index_sorted0.append(index_sorted0[0])
-        if len(sorted1_with_index) < 5 and len(sorted1_with_index) > 0 :
-            print("len(sorted1_with_index) < 5 and len(sorted1_with_index) > 0")
-            print(len(sorted1_with_index) < 5 and len(sorted1_with_index) > 0)
-            print("sorted1_with_index")
-            print(sorted1_with_index)
-            for x in sorted1_with_index :
-                index_sorted1.append(x)
-            remaining_padding = 5 - len(index_sorted1)
-            while remaining_padding > 0 :
-                remaining_padding= remaining_padding - 1
-                index_sorted1.append(index_sorted1[0])
-        if len(sorted2_with_index) < 5 and len(sorted2_with_index) > 0 :
-            print("len(sorted2_with_index) < 5 and len(sorted2_with_index) > 0")
-            print(len(sorted2_with_index) < 5 and len(sorted2_with_index) > 0)
-            print("sorted2_with_index")
-            print(sorted2_with_index)
-            for x in sorted2_with_index :
-                index_sorted2.append(x)
-            remaining_padding = 5 - len(index_sorted2)
-            while remaining_padding > 0 :
-                remaining_padding= remaining_padding - 1
-                index_sorted2.append(index_sorted2[0])
-        if len(sorted3_with_index) < 5 and len(sorted3_with_index) > 0 :
-                print("len(sorted3_with_index) < 5 and len(sorted3_with_index) > 0")
-                print(len(sorted3_with_index) < 5 and len(sorted3_with_index) > 0)
-                print("sorted3_with_index")
-                print(sorted3_with_index)
-                for x in sorted3_with_index :
-                    index_sorted3.append(x)
-                remaining_padding = 5 - len(index_sorted3)
-                while remaining_padding > 0 :
-                    remaining_padding= remaining_padding - 1
-                    index_sorted3.append(index_sorted3[0])
-        print("index_sorted0-1")
-        print(index_sorted0)
-        print(index_sorted1)
-        print(index_sorted2)
-        print(index_sorted3)
-    else:
-        print("No reliable similarity found by 4 llms")
-    return str( index_sorted0 ) + "," + str( index_sorted1 ) + "," + str( index_sorted2 ) + "," + str( index_sorted3 )
 if __name__ == '__main__':
-  app.run(host='0.0.0.0', port=8080)

 from flask import Flask, render_template
 import threading
 import time
+from pydantic.v1.utils import unique_list
 import requests
 from langchain_core.tools import Tool
 from langchain_community.tools import DuckDuckGoSearchResults
 from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
 API_URL0 = "https://api-inference.huggingface.co/models/sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
 API_URL3 = "https://api-inference.huggingface.co/models/Snowflake/snowflake-arctic-embed-l-v2.0"
 # API_URL4 = "https://api-inference.huggingface.co/models/sentence-transformers/multi-qa-MiniLM-L6-cos-v1"
 search = GoogleSearchAPIWrapper()
 @app.route('/')
 async def server_1():
+    # TODO :: check html first then check similarity
+    # TODO :: check parts of snipp to pass in the processing func
+    query_sentence = "capital city of the Philippines"
     duck_results = []
     all_results = []
     try:
+        searchduck = DuckDuckGoSearchResults(output_format="list", num_results=20)
         duck_results = searchduck.invoke(query_sentence)
+        print("type of duck")
+        print(type(duck_results))
     except:
         print("An exception occurred")
         duck_results = []
+    if type(duck_results) == list and len(duck_results) > 0 :
+        all_results = duck_results
     tool = Tool(
         name="google_search",
         description="Search Google for recent results.",
         func=search.run,
     )
     try:
         google_results = search.results( query_sentence , 10 )
         print("type(duck_results)")
         print(type(duck_results))
         print(type(all_results))
     except:
         print("An exception occurred")
+    if type(google_results) == list  and len(google_results) > 0:
+        all_results = all_results + google_results
+        print("len of google and duck")
+        print(len(all_results))
+        print(len(google_results))
+        print(len(duck_results))
+    print("type of google")
+    print(type(google_results))
+    print(all_results)
+    all_snipps = []
+    new_results = []
     # get the snippet put into list
+    split_query_words = query_sentence.split(); important_keywords = []; uppercased_keywords = [];
     for x in split_query_words:
+        print(" x.isupper()  ")
+        print(x)
+        print( x[0].isupper()  )
+        if x[0].isupper() == True :
+            uppercased_keywords.append(x)
+        if ( len(x) > 3 ) & ( x[0].isupper() == False ):
             important_keywords.append(x)
+    print("what is important and upper")
+    print(important_keywords)
+    print(uppercased_keywords)
+    snipp_score = 0
+    capitalized_score = 0
+    for x in all_results:
+        snipp_score = 0
+        capitalized_score = 0
+        for words in important_keywords:
+            print("The important words " )
+            print(words)
+            print("x[snippet].find(words)")
+            print(x["snippet"].find(words))
+            if x["snippet"].find(words) != -1 :
+                print("Found word")
+                snipp_score = snipp_score + 1
+        for words in uppercased_keywords:
+            print("The important words capitalized" )
+            print(words)
+            if x["snippet"].find(words) != -1 :
+                snipp_score = snipp_score + 1
+                capitalized_score = capitalized_score + 1
+        if ( snipp_score >= len(important_keywords) ) and ( ( capitalized_score <=  len(uppercased_keywords) and capitalized_score > 0 ) or ( len(uppercased_keywords) == 0 )  ):
+            new_results.append(x)
+            continue
+        if ( (snipp_score <= len(important_keywords) and snipp_score >= 2 ) and (len(important_keywords) <= 4) )  and ( (capitalized_score <= len(uppercased_keywords) and capitalized_score >= 1) or ( len(uppercased_keywords) == 0 )  ):
+            new_results.append(x)
+            continue
+        if ( ( snipp_score <= len(important_keywords) and snipp_score >= 4  ) and ( len(important_keywords) >= 5 and len(important_keywords) <= 7 )  ) and ( ( capitalized_score <=  len(uppercased_keywords) and capitalized_score > 0 ) or ( len(uppercased_keywords) == 0 )  ) :
+            new_results.append(x)
+            continue
+        else :
+            # skip the result
+            print("This is not added")
+            print(x["snippet"])
+            print("important keywords")
+            print(important_keywords)
+            print("capitalized_score")
+            print(capitalized_score)
+            print("snipp_score")
+            print(snipp_score)
+    print("these are new_results")
+    print("===============================")
+    print(new_results)
+    print("these are new_results")
+    print("===============================")
+    print( " len( new_results)  ")
+    print( len( new_results)  )
+    print("type of all_results")
+    # TODO :: check html first then check similarity
+    # TODO :: check parts of snipp to pass in the processing func
+    # TODO :: pull pages and split each html and count occurance of important keywords here & check snipp if snipp occurs between . and <p> its good not img
+    n_results = {}
+    iter_x = 0
+    for x in new_results:
+        n_results[iter_x] = []
+        print("x[snippet]")
+        print(x["snippet"])
+        for y in (x["snippet"]).split('.') :
+            score = 0 ; cap_score  = 0 ;
+            for words in important_keywords :
+                if y.find(words) != -1 :
+                    print(y)
+                    print(score)
+                    score = score + 1
+            for words in uppercased_keywords :
+                if y.find(words) != -1 :
+                    print(y)
+                    print(cap_score)
+                    cap_score = cap_score + 1
+            if ( score == ( len(important_keywords) )  ) and ( cap_score >= ( len(uppercased_keywords) ) ):
+                n_results[iter_x].append(y)
+            if ( score >= ( len(important_keywords)-1  )  ) or ( cap_score >=  len(uppercased_keywords) and (len(uppercased_keywords) > 0)  ):
+                n_results[iter_x].append(y)
+        iter_x = iter_x + 1
+        print("iterator")
+        print(iter_x)
+    print("n_results")
+    print(n_results)
+    print(len(n_results))
+    print("nresults")
+    print(n_results[1])
+#     nresults={}
+#     new_results loop
+#        sentences loop
+#           score = 0 ; cap_score  = 0
+#           words loop
+#              if found score ++
+#
+#          capitalized loop
+#              if found cap_score ++
+#           if cap_score >= len words &&  if score >= len words
+#
+#
+#                 nresults[i].append(x)
+    # TODO :: check parts of snipp
+    # TODO :: check parts of snipp
+    # TODO :: check parts of snipp
     payload = {  "inputs": {  "source_sentence": "Manila is the capital city of the Philippines",  "sentences": ["The current capital city, Manila, has been the countrys capital throughout most","Manila officially the City of Manila (Filipino: Lungsod ng Maynila),","Dis 4, 2024 — Manila, capital and chief city of the Philippines. The city is the centre ","Quezon City is the capital of the Philippines","Manila is the capital of the philippines","For sometime Manila has been the capital of of the Philippines" ,"What is the capital of Philippines","Manila is not the capital of the Phillipines","Quezon city was the capital of the Philippines, until President Ferdinand "] } , }
     response0 =  requests.post(API_URL0, headers=headers, json=payload)
     if varcontinue_similarity == 1 :
         # call processing with 10 google search result or 15 search results
         if len(all_results) == 10 :
+            result_processed = process_similarity_15(sorted0, sorted1, sorted2, sorted3,response0.json(), response1.json(), response2.json(), response3.json()  )
         if len(all_results) > 10 :
             result_processed = process_similarity_15(sorted0, sorted1, sorted2, sorted3,response0.json(), response1.json(), response2.json(), response3.json()  )
     # return all_results
     print("length")
     # print(len(similarity_scores))
     key_index = 0
+    # copy + loop to get index
+    print("actual scores")
+    print("actual scores")
+    print(actualscore0)
+    print(actualscore1)
+    print(actualscore2)
+    print(actualscore3)
+    print("the sorted0-3")
     print("the sorted0-3")
     print(sorted0)
     print(sorted1)
                     print("sorted_with_index")
                     print(sorted3_with_index)
+    print("sorted0-3_with_index")
     print("sorted0-3_with_index")
     print(sorted0_with_index)
+    print(sorted1_with_index)
     print(sorted2_with_index)
     print(sorted3_with_index)
+    print("sorted0-3_with_index")
+    # At this point the scores have been sorted also indexes are stored in lists
+    # At this point the scores have been sorted also indexes are stored in lists
+    this_unique_list = set( sorted0_with_index + sorted1_with_index + sorted2_with_index + sorted3_with_index )
+    webgraph_list = []
+    iterator_x = 0
+    for x in sorted0_with_index:
+        print(x)
+        if ( x in sorted3_with_index and x in sorted1_with_index and x in sorted2_with_index ) :
+            webgraph_list.append(x)
+        if ( x in sorted1_with_index and x in sorted2_with_index ) or ( x in sorted3_with_index and x in sorted2_with_index ) or ( x in sorted1_with_index and x in sorted3_with_index ):
+            webgraph_list.append(x)
+        if (x in sorted1_with_index or x in sorted2_with_index or x in sorted3_with_index ) and actualscore0[iterator_x] > 0.96 :
+            webgraph_list.append(x)
+        iterator_x = iterator_x + 1
+    print("webgraph_list0")
+    print("webgraph_list0")
+    print(webgraph_list)
+    iterator_x = 0
+    for x in sorted1_with_index:
+        print(x)
+        if x in sorted3_with_index and x in sorted0_with_index and x in sorted2_with_index :
+            webgraph_list.append(x)
+        if ( x in sorted0_with_index and x in sorted2_with_index ) or ( x in sorted3_with_index and x in sorted2_with_index ) or ( x in sorted0_with_index and x in sorted3_with_index ):
+            webgraph_list.append(x)
+        if (x in sorted0_with_index or x in sorted2_with_index or x in sorted3_with_index ) and actualscore1[iterator_x] > 0.96 :
+            webgraph_list.append(x)
+        iterator_x = iterator_x + 1
+    print("webgraph_list1")
+    print("webgraph_list1")
+    print(webgraph_list)
+    iterator_x = 0
+    for x in sorted2_with_index:
+        print(x)
+        if x in sorted3_with_index and x in sorted0_with_index and x in sorted1_with_index :
+            webgraph_list.append(x)
+        if ( x in sorted0_with_index and x in sorted1_with_index ) or ( x in sorted3_with_index and x in sorted1_with_index ) or ( x in sorted0_with_index and x in sorted3_with_index ):
+            webgraph_list.append(x)
+        if (x in sorted0_with_index or x in sorted1_with_index or x in sorted3_with_index ) and actualscore2[iterator_x] > 0.96 :
+            webgraph_list.append(x)
+        iterator_x = iterator_x + 1
+    print("webgraph_list2")
+    print("webgraph_list2")
+    print(webgraph_list)
+    iterator_x = 0
+    for x in sorted3_with_index:
+        print(x)
+        if x in sorted1_with_index and x in sorted0_with_index and x in sorted2_with_index :
+            webgraph_list.append(x)
+        if ( x in sorted0_with_index and x in sorted2_with_index ) or ( x in sorted1_with_index and x in sorted2_with_index ) or ( x in sorted0_with_index and x in sorted1_with_index ):
+            webgraph_list.append(x)
+        if (x in sorted0_with_index or x in sorted2_with_index or x in sorted1_with_index ) and actualscore3[iterator_x] > 0.96 :
+            webgraph_list.append(x)
+        iterator_x = iterator_x + 1
+    print("webgraph_list3")
+    print("webgraph_list3")
+    print(webgraph_list)
+    print("webgraph_list")
+    print(webgraph_list)
+    return str( list(set(webgraph_list ) ) )
 if __name__ == '__main__':
+  app.run(host='0.0.0.0', port=8081)