File size: 5,706 Bytes
c02b4bf
 
 
 
 
6113980
 
570c651
c02b4bf
6113980
 
02412d9
 
 
 
 
 
b5d5c1b
 
02412d9
f63704d
02412d9
9928996
02412d9
 
 
 
ce7fd6e
02412d9
 
 
f63704d
 
 
02412d9
 
 
 
 
 
 
 
 
 
 
 
 
 
6113980
 
 
 
 
 
 
 
 
d3ae86a
 
c02b4bf
d3ae86a
c02b4bf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7502cc2
c02b4bf
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
cfc7235
c02b4bf
 
 
 
 
 
 
 
 
 
a126020
 
 
c02b4bf
 
 
 
 
 
 
 
 
 
 
 
 
570c651
4dfdae4
570c651
c02b4bf
 
6c4b824
c02b4bf
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
import chromadb
import os
import gradio as gr
import json
from huggingface_hub import InferenceClient
import gspread
from oauth2client.service_account import ServiceAccountCredentials
from datetime import datetime

# Google Sheets setup
scope = ["https://spreadsheets.google.com/feeds", "https://www.googleapis.com/auth/drive"]
import json
import requests
from huggingface_hub import HfApi, HfFolder
import os

api_key = os.getenv("key")
print("AIKEY")
print(api_key)
# Step 1: Authenticate with Hugging Face Hub

# Step 2: Specify your Hugging Face username, Space name, and file name
username = 'Thiloid'
space_name = 'envapi'
file_name = 'nestolechatbot-5fe2aa26cb52.json'

# Step 3: Get download URL for the JSON file
download_url = f'https://huggingface.co/datasets/{username}/{space_name}/blob/main/{file_name}'

# Step 4: Download the file content
try:
    headers = {'Authorization': f'Bearer {api_key}'}
    response = requests.get(download_url, headers=headers)
    response.raise_for_status()
except requests.exceptions.HTTPError as e:
    print(f"HTTP error occurred: {e}")
    exit()

# Step 5: Load JSON data
try:
    json_data = json.loads(response.content)
    print("JSON data loaded successfully:")
except json.JSONDecodeError as e:
    print(f"JSON decoding error occurred: {e}")
creds = ServiceAccountCredentials.from_json_keyfile_name(json_data, scope)


#creds = ServiceAccountCredentials.from_json_keyfile_name('/home/user/app/chromaold/nestolechatbot-5fe2aa26cb52.json', scope)
client = gspread.authorize(creds)
sheet = client.open("nestolechatbot").sheet1  # Open the sheet

def save_to_sheet(date,name, message):
    # Write user input to the Google Sheet
    sheet.append_row([date,name, message])
    return f"Thanks {name}, your message has been saved!"

    
path='/Users/thiloid/Desktop/LSKI/ole_nest/Chatbot/LLM/chromaTS'
if(os.path.exists(path)==False): path="/home/user/app/chromaTS"

print(path)
#path='chromaTS'
#settings = Settings(persist_directory=storage_path)
#client = chromadb.Client(settings=settings)
client = chromadb.PersistentClient(path=path)
print(client.heartbeat()) 
print(client.get_version())  
print(client.list_collections()) 
from chromadb.utils import embedding_functions
default_ef = embedding_functions.DefaultEmbeddingFunction()
sentence_transformer_ef = embedding_functions.SentenceTransformerEmbeddingFunction(model_name="T-Systems-onsite/cross-en-de-roberta-sentence-transformer")#"VAGOsolutions/SauerkrautLM-Mixtral-8x7B-Instruct")
#instructor_ef = embedding_functions.InstructorEmbeddingFunction(model_name="hkunlp/instructor-large", device="cuda")
#print(str(client.list_collections()))
collection = client.get_collection(name="chromaTS", embedding_function=sentence_transformer_ef)

client = InferenceClient("mistralai/Mixtral-8x7B-Instruct-v0.1")


def format_prompt(message, history):
  prompt = "" #"<s>"
  #for user_prompt, bot_response in history:
  #  prompt += f"[INST] {user_prompt} [/INST]"
  #  prompt += f" {bot_response}</s> "
  prompt += f"[INST] {message} [/INST]"
  return prompt

def response(
    prompt, history,temperature=0.9, max_new_tokens=500, top_p=0.95, repetition_penalty=1.0,
):
    temperature = float(temperature)
    if temperature < 1e-2: temperature = 1e-2
    top_p = float(top_p)
    generate_kwargs = dict(
        temperature=temperature,
        max_new_tokens=max_new_tokens,
        top_p=top_p,
        repetition_penalty=repetition_penalty,
        do_sample=True,
        seed=42,
    )
    addon=""
    results=collection.query(
      query_texts=[prompt],
      n_results=60,
      #where={"source": "google-docs"}
      #where_document={"$contains":"search_string"}
    )
    #print("REsults")
    #print(results)
    #print("_____")
    dists=["<br><small>(relevance: "+str(round((1-d)*100)/100)+";" for d in results['distances'][0]]
  
    #sources=["source: "+s["source"]+")</small>" for s in results['metadatas'][0]]
    results=results['documents'][0]
    print("TEst")
    print(results)
    print("_____")
    combination = zip(results,dists)
    combination = [' '.join(triplets) for triplets in combination]
    #print(str(prompt)+"\n\n"+str(combination))
    if(len(results)>1):
      addon=" Bitte berücksichtige bei deiner Antwort ausschießlich folgende Auszüge aus unserer Datenbank, sofern sie für die Antwort erforderlich sind. Beantworte die Frage knapp und präzise. Ignoriere unpassende Datenbank-Auszüge OHNE sie zu kommentieren, zu erwähnen oder aufzulisten:\n"+"\n".join(results)
    system="Du bist ein deutschsprachiges KI-basiertes Studienberater Assistenzsystem, das zu jedem Anliegen möglichst geeignete Studieninformationen empfiehlt."+addon+"\n\nUser-Anliegen:"   
    formatted_prompt = format_prompt(system+"\n"+prompt,history)
    stream = client.text_generation(formatted_prompt, **generate_kwargs, stream=True, details=True, return_full_text=False)
    output = ""
    for response in stream:
        output += response.token.text
        yield output
    #output=output+"\n\n<br><details open><summary><strong>Sources</strong></summary><br><ul>"+ "".join(["<li>" + s + "</li>" for s in combination])+"</ul></details>"
    # Get current date and time
    now = str(datetime.now())
    save_to_sheet(now,prompt, output)
    yield output

gr.ChatInterface(response, chatbot=gr.Chatbot(value=[[None,"Herzlich willkommen! Ich bin Chätti ein KI-basiertes Studienassistenzsystem, das für jede Anfrage die am besten Studieninformationen empfiehlt.<br>Erzähle mir, was du gerne tust!"]],render_markdown=True),title="German Studyhelper Chätti").queue().launch(share=True) #False, server_name="0.0.0.0", server_port=7864)
print("Interface up and running!")