Spaces:

georgeek
/

hf-llm-bill-chat

Sleeping

App Files Files Community

georgeek commited on Jan 16

Commit

b4d9fb1

1 Parent(s): 0ea4bb9

two models

Browse files

Files changed (4) hide show

bill.py → One_model.py +0 -0
billPdf.py +0 -216
logs/conversation_logs.json +48 -0
pages/Two_models.py +306 -0

bill.py → One_model.py RENAMED Viewed

File without changes

billPdf.py DELETED Viewed

@@ -1,216 +0,0 @@
-#!/usr/bin/env -S poetry run python
-import os
-import json
-#import pdfplumber
-import streamlit as st
-from openai import OpenAI
-client = OpenAI()
-def load_user_data(user_id):
-    file_path = os.path.join("data", "user_data", f"user_data_{user_id}.json")
-    if not os.path.exists(file_path):
-        return {}
-    with open(file_path, "r") as file:
-        return json.load(file)
-def parse_pdf_to_json(pdf_path):
-    user_id = {}
-    serie_factura = {}
-    data_factura = {}
-    costuri = {}
-    with pdfplumber.open(pdf_path, ) as pdf:
-        for page in pdf.pages:
-            text = page.extract_text()
-            if text:
-                lines = text.split('\n')
-                # Process each line and look for specific categories
-                for line in lines:
-                    # Check for 'Data emiterii facturii'
-                    if 'Data facturii' in line:
-                        date = line.split()[-1]
-                        data_factura['Data factura'] = date
-                    # Check for 'Serie factură'
-                    if 'rul facturii:' in line:
-                        serie = line.split()[-1]
-                        serie_factura['Serie numar'] = serie
-                    # Check for 'Cont client'
-                    if 'Cont client' in line:
-                        cont = line.split()[-1]
-                        user_id['Cont client'] = cont
-                    # Check for 'Valoare facturată fără TVA'
-                    if 'Sold precedent' in line:
-                        value = line.split()[-2].replace(',', '.')  # Extract and convert to float
-                        costuri['Sold precedent'] = value
-                    # Check for 'Total bază de impozitare TVA'
-                    elif 'din sold precedent' in line:
-                        value = line.split()[-2].replace(',', '.')  # Extract and convert to float
-                        costuri['Total platit din sold precedent'] = value
-                    # Check for 'TVA'
-                    elif 'TVA' in line and '%' in line:
-                        value = line.split()[-2].replace(',', '.')  # Extract and convert to float
-                        costuri['TVA'] = value
-                    # Check for 'Dobânzi penalizatoare'
-                    elif 'Abonamente' in line:
-                        value = line.split()[-2].replace(',', '.')  # Extract and convert to float
-                        costuri['Abonamente si extraopiuni'] = value
-                    # Check for 'TOTAL DE PLATĂ FACTURĂ CURENTĂ'
-                    elif 'Total factura curenta fara TVA' in line:
-                        value = float(line.split()[-2].replace(',', '.'))  # Extract and convert to float
-                        costuri['Total factura curenta fara TVA'] = value
-                    # Check for 'Sold Cont Contract'
-                    elif 'Servicii utilizate' in line:
-                        value = line.split()[-2].replace(',', '.')  # Extract and convert to float
-                        costuri['Servicii utilizate'] = value
-                    # Check for 'Compensatii'
-                    elif 'Rate terminal' in line:
-                        value = float(line.split()[-2].replace(',', '.'))  # Extract and convert to float
-                        costuri['Rate terminal'] = value
-                    # Check for 'TVA 19,00%'
-                    elif 'TVA 19,00%' in line:
-                        value = float(line.split()[-2].replace(',', '.'))  # Extract and convert to float
-                        costuri['TVA'] = value
-                    # Check for 'Compensatii'
-                    elif 'Total factura curenta' in line:
-                        value = float(line.split()[-2].replace(',', '.'))  # Extract and convert to float
-                        costuri['Total factura curenta'] = value
-    return costuri
-def check_related_keys(question, user_id):
-    user_data = load_user_data(user_id)
-    bill_keys = set()
-    for bill in user_data.get("bills", []):
-        bill_keys.update(bill.keys())
-    return [key for key in bill_keys if key.lower() in question.lower()]
-def process_query(query, user_id):
-    user_data = load_user_data(user_id)
-    bill_info = user_data.get("bills", [])
-    related_keys = check_related_keys(query, user_id)
-    related_keys_str = ", ".join(related_keys) if related_keys else "N/A"
-    if related_keys_str != "N/A":
-        context = (
-            f"Citeste informatiile despre costrurile in lei facturate din dictionar: {bill_info} "
-            f"si raspunde la intrebarea: '{query}' dar numai cu info legate de: {related_keys_str}"
-        )
-    else:
-        context = (
-            f"Citeste informatiile despre costrurile in lei facturate din dictionar: {bill_info} "
-            f"si raspunde la intrebarea: '{query}' dar numai cu info legate de factura"
-        )
-    max_input_length = 550
-    st.write(f"Context:\n{context}")
-    st.write(f"Context size: {len(context)} characters")
-    if len(context) > max_input_length:
-        st.warning("Prea multe caractere în context, solicitarea nu va fi trimisă.")
-        return None
-    return context
-def main():
-    st.title("Telecom Bill Chat with LLM Agent")
-    if "user_id" not in st.session_state:
-        st.session_state.user_id = None
-    user_id = st.sidebar.text_input("Introdu numărul de telefon:")
-    if user_id and user_id != st.session_state.user_id:
-        data = load_user_data(user_id)
-        if data:
-            st.session_state.user_id = user_id
-            st.success("Utilizator găsit!")
-        else:
-            st.warning("Nu am găsit date pentru acest ID. Încărcați o factură PDF la nevoie.")
-            st.session_state.user_id = user_id
-    uploaded_file = st.file_uploader("Încarcă factura PDF", type="pdf")
-    if uploaded_file and st.session_state.user_id:
-        bill_data = parse_pdf_to_json(uploaded_file)
-        existing_data = load_user_data(st.session_state.user_id)
-        if "bills" not in existing_data:
-            existing_data["bills"] = []
-        existing_data["bills"].append(bill_data)
-        file_path = os.path.join("data", "user_data", f"user_data_{st.session_state['user_id']}.json")
-        os.makedirs(os.path.dirname(file_path), exist_ok=True)
-        with open(file_path, "w") as file:
-            json.dump(existing_data, file)
-        st.success("Factura a fost încărcată și salvată cu succes!")
-    if st.session_state.user_id:
-        data = load_user_data(st.session_state.user_id)
-        st.write(f"Phone Number: {st.session_state.user_id}")
-        st.write("Facturi existente:")
-        for bill in data.get("bills", []):
-            st.write(bill)
-    else:
-        st.info("Introduceți un ID și/sau încărcați o factură PDF pentru a continua.")
-    # Initialize conversation in the session state
-    # "context_prompt_added" indicates whether we've added the specialized "bill info" context yet.
-    if "messages" not in st.session_state:
-        st.session_state["messages"] = [
-            {"role": "assistant", "content": "Cu ce te pot ajuta?"}
-        ]
-    if "context_prompt_added" not in st.session_state:
-        st.session_state.context_prompt_added = False
-    st.write("---")
-    st.subheader("Chat")
-    for msg in st.session_state["messages"]:
-        st.chat_message(msg["role"]).write(msg["content"])
-    if prompt := st.chat_input("Introduceți întrebarea aici:"):
-        if not st.session_state.user_id:
-            st.error("Trebuie să introduceți un număr de telefon valid sau să încărcați date.")
-            return
-        # If the context prompt hasn't been added yet, build & inject it once;
-        # otherwise, just add the user's raw question.
-        if not st.session_state.context_prompt_added:
-            final_prompt = process_query(prompt, st.session_state["user_id"])
-            if final_prompt is None:
-                st.stop()
-            st.session_state["messages"].append({"role": "user", "content": final_prompt})
-            st.session_state.context_prompt_added = True
-        else:
-            st.session_state["messages"].append({"role": "user", "content": prompt})
-        # Display the latest user message in the chat
-        st.chat_message("user").write(st.session_state["messages"][-1]["content"])
-        # Now call GPT-4 with the entire conversation
-        completion = client.chat.completions.create(
-            model="gpt-4",
-            messages=st.session_state["messages"]
-        )
-        response_text = completion.choices[0].message.content.strip()
-        st.session_state["messages"].append({"role": "assistant", "content": response_text})
-        st.chat_message("assistant").write(response_text)
-        if hasattr(completion, "usage"):
-            st.write("Prompt tokens:", completion.usage.prompt_tokens)
-            st.write("Completion tokens:", completion.usage.completion_tokens)
-            st.write("Total tokens:", completion.usage.total_tokens)
-if __name__ == "__main__":
-    main()

logs/conversation_logs.json CHANGED Viewed

@@ -58,5 +58,53 @@
             "total_tokens": 1229
         },
         "cost": 0.018435
     }
 ]

             "total_tokens": 1229
         },
         "cost": 0.018435
+    },
+    {
+        "timestamp": "2025-01-16T10:54:40.132782",
+        "user_id": "0724077190",
+        "user_query": "cat am de plata",
+        "assistant_response": "Pentru a te ajuta, am nevoie de mai multe informa\u021bii despre contextul pl\u0103\u021bii. Este vorba despre o factur\u0103, un credit, un serviciu de telecomunica\u021bii sau altceva? Te rog s\u0103-mi oferi detalii suplimentare pentru a putea oferi un r\u0103spuns corect.",
+        "tokens": {
+            "prompt_tokens": 22,
+            "completion_tokens": 69,
+            "total_tokens": 91
+        },
+        "cost": 0.001365
+    },
+    {
+        "timestamp": "2025-01-16T11:50:17.716138",
+        "user_id": "0724077190",
+        "user_query": "nu e cam mult?",
+        "assistant_response": "Analiz\u00e2nd informa\u021biile furnizate privind factura, observ\u0103m urm\u0103toarele detalii:\n\n1. **Total factur\u0103 curent\u0103:** 104,93 lei\n2. **TVA 19%:** 16,75 lei\n3. **Total rate terminal:** 5.445,53 lei\n4. **Abonamente \u0219i extraop\u021biuni:** 91,17 lei\n5. **Reduceri:** -2,99 lei\n6. **Rest plat\u0103:** -5.073,74 lei\n7. **Sold precedent:** -5.073,74 lei\n8. **Total de plat\u0103:** 476,72 lei\n\nPrincipalul factor care face suma facturii mare este valoarea considerabil\u0103 pentru \"Rate terminal\", care este 5.445,53 lei. Aceasta este o sum\u0103 semnificativ\u0103 \u00een totalul facturii, dar se pare c\u0103 a fost acoperit\u0103 par\u021bial de ajust\u0103rile \u0219i soldul precedent, rezult\u00e2nd \u00eentr-un \"Total de plat\u0103\" de 476,72 lei.\n\nDac\u0103 aceast\u0103 sum\u0103 vi se pare mare, ar putea fi util s\u0103 reevalua\u021bi planurile tarifare \u0219i ratele terminalului pentru o reducere poten\u021bial\u0103 a costurilor viitoare. De asemenea, verificarea detaliat\u0103 a sumelor pentru terminale \u0219i beneficiile sau reducerile aplicate poate oferi o perspectiv\u0103 mai clar\u0103 asupra factorilor care contribuie la totalul de plat\u0103.",
+        "tokens": {
+            "prompt_tokens": 835,
+            "completion_tokens": 313,
+            "total_tokens": 1148
+        },
+        "cost": 0.03444
+    },
+    {
+        "timestamp": "2025-01-16T11:50:17.730726",
+        "user_id": "0724077190",
+        "user_query": "nu e cam mult?",
+        "assistant_response": "Factura are un total de plata de 476.72 lei, iar costurile suplimentare se ridic\u0103 la 5445.53 lei. De asemenea, exist\u0103 o serie de reduceri \u0219i corec\u021bii la costurile utilizatorilor care afecteaz\u0103 suma total\u0103. Chiar dac\u0103 sumele par semnificative, acestea sunt influen\u021bate de diverse categorii de costuri, inclusiv ratele terminale \u0219i abonamentele. A\u0219adar, \u00een contextul totalului de plat\u0103, s-ar putea spune c\u0103 suma este relativ mare, dar trebuie analizat\u0103 \u00een func\u021bie de serviciile \u0219i produsele incluse.",
+        "tokens": {
+            "prompt_tokens": 835,
+            "completion_tokens": 133,
+            "total_tokens": 968
+        },
+        "cost": 0.014519999999999998
+    },
+    {
+        "timestamp": "2025-01-16T11:53:39.957975",
+        "user_id": "0724077190",
+        "user_query": "dar pe umere care costa mai mult?",
+        "assistant_response": "Pe baza informa\u021biilor furnizate \u00een factur\u0103:\n\n1. **Total Rate Terminal:** 5.445,53 lei\n2. **Abonamente \u0219i Extraop\u021biuni:** 91,17 lei (incluz\u00e2nd costuri \u0219i reduceri)\n\nDin aceste date, reiese c\u0103 **ratele terminale**, cu o valoare semnificativ\u0103 de 5.445,53 lei, cost\u0103 mult mai mult comparativ cu **abonamentele \u0219i extraop\u021biunile** care totalizeaz\u0103 91,17 lei (dup\u0103 aplicarea reducerilor).\n\nA\u0219adar, costul pe ratele terminale este semnificativ mai mare dec\u00e2t costurile pentru abonamente \u0219i extraop\u021biuni.",
+        "tokens": {
+            "prompt_tokens": 1300,
+            "completion_tokens": 152,
+            "total_tokens": 1452
+        },
+        "cost": 0.021779999999999997
     }
 ]

pages/Two_models.py ADDED Viewed

	@@ -0,0 +1,306 @@

+#!/usr/bin/env -S poetry run python
+import os
+import json
+import streamlit as st
+from openai import OpenAI
+from dotenv import load_dotenv
+# Load environment variables from .env file
+load_dotenv()
+# Get the OpenAI API key from environment variables
+api_key = os.getenv("OPENAI_API_KEY")
+if not api_key:
+    raise ValueError("The OPENAI_API_KEY environment variable is not set.")
+client = OpenAI()
+def load_user_data(user_id):
+    file_path = os.path.join(os.getcwd(), "data", "user_data", f"user_data_{user_id}.json")
+    #st.write(f"Loading user data from: {file_path}")
+    #st.write(f"Current working directory: {os.getcwd()}")
+    #Verify if the file exists
+    if not os.path.exists(file_path):
+        #st.write("File does not exist.")
+        return {}
+    try:
+        with open(file_path, "r") as file:
+            data = json.load(file)
+            #st.write(f"Loaded data: {data}")
+            return data
+    except json.JSONDecodeError:
+        st.write("Error decoding JSON.")
+        return {}
+    except Exception as e:
+        st.write(f"An error occurred: {e}")
+        return {}
+def save_user_data(user_id, data):
+    file_path = os.path.join("data", "user_data", f"user_data_{user_id}.json")
+    os.makedirs(os.path.dirname(file_path), exist_ok=True)
+    with open(file_path, "w") as file:
+        json.dump(data, file)
+def parseBill(data):
+    billDate = data.get("billDate")
+    billNo = data.get("billNo")
+    amountDue = data.get("amountDue")
+    extraCharge = data.get("extraCharge")
+    taxItems = data.get("taxItem", [])
+    subscribers = data.get("subscribers", [])
+    totalBillCosts = [{"categorie": t.get("cat"), "amount": t.get("amt")} for t in taxItems]
+    subscriberCosts = []
+    categories = set()
+    names = set()
+    for sub in subscribers:
+        logicalResource = sub.get("logicalResource")
+        billSummaryItems = sub.get("billSummaryItem", [])
+        for item in billSummaryItems:
+            try:
+                categories.add(item["cat"]),
+                names.add(item["name"])
+            except KeyError:
+                continue
+            subscriberCosts.append({
+                "Numar telefon": logicalResource,
+                "Categorie cost": item["cat"],
+                "Cost": item["name"],
+                "Valoare": item["amt"]
+            })
+    return {
+        "Data factura": billDate,
+        "Serie numar factura": billNo,
+        "De plata": amountDue,
+        "Costuri suplimentare": extraCharge,
+        "Total plata factura": totalBillCosts,
+        "Costuri utilizatori": subscriberCosts,
+        "Categorii": list(categories),
+        "Costuri": list(names)
+    }
+def check_related_keys(question, user_id):
+    user_data = load_user_data(user_id)
+    categories = set()
+    for bill in user_data.get("bills", []):
+        categories.update(bill.get("Categorii", []))
+    st.write(f"Categorii: {categories}")
+    return [category for category in categories if category.lower() in question.lower()]
+def process_query(query, user_id, model_name):
+    user_data = load_user_data(user_id)
+    bill_info = user_data.get("bills", [])
+    related_keys = check_related_keys(query, user_id)
+    related_keys_str = ", ".join(related_keys) if related_keys else "N/A"
+    if related_keys_str != "N/A":
+        context = (
+            f"Citeste informatiile despre costurile in lei facturate din json: {bill_info} "
+            f"si raspunde la intrebarea: '{query}' dar numai cu info legate de: {related_keys_str}"
+        )
+    else:
+        context = (
+            f"Citeste informatiile despre costrurile in lei facturate din json: {bill_info} "
+            f"si raspunde la intrebarea: '{query}' dar numai cu info legate de factura"
+        )
+    max_input_length = 5550
+    #st.write(f"Context:\n{context}")
+    st.write(f"Context size: {len(context)} characters")
+    if len(context) > max_input_length:
+        st.warning("Prea multe caractere în context, solicitarea nu va fi trimisă.")
+        return None
+    # Update this part to run the chosen model
+    if model_name == "gpt-4o-mini":
+        # Code to run model 4o mini
+        st.write("Running model GPT-4o-mini")
+    elif model_name == "gpt-4o":
+        # Code to run model 4o
+        st.write("Running model GPT-4o")
+    return context
+# import the datetime class from the datetime module
+from datetime import datetime
+def log_conversation(user_id, user_query, assistant_response, tokens, cost):
+    log_entry = {
+        "timestamp": datetime.now().isoformat(),
+        "user_id": user_id,
+        "user_query": user_query,
+        "assistant_response": assistant_response,
+        "tokens": tokens,
+        "cost": cost
+    }
+    log_file_path = os.path.join("logs", "conversation_logs.json")
+    os.makedirs(os.path.dirname(log_file_path), exist_ok=True)
+    if os.path.exists(log_file_path):
+        with open(log_file_path, "r") as log_file:
+            logs = json.load(log_file)
+    else:
+        logs = []
+    logs.append(log_entry)
+    with open(log_file_path, "w") as log_file:
+        json.dump(logs, log_file, indent=4)
+def main():
+    st.title("Bill info LLM Agent (OpenAI)")
+    st.image("https://miro.medium.com/v2/resize:fit:100/format:webp/1*NfE0G4nEj4xX7Z_8dSx83g.png")
+    # Create a sidebar menu to choose between models
+    model_name = st.sidebar.selectbox("Choose OpenAI Model", ["gpt-4o-mini", "gpt-4o"])
+    if "user_id" not in st.session_state:
+        st.session_state.user_id = None
+    user_id = st.sidebar.text_input("Introdu numărul de telefon:", placeholder="Incearca 0724077190")
+    # use a predefined user_id for testing
+    # display the user data if the user_id is set
+    #st.write(f"User ID: {user_id}")
+    st.session_state.user_data = None
+    if user_id and user_id != st.session_state.user_id:
+        data = load_user_data(user_id)
+        st.session_state.user_data = data
+        if data:
+            st.session_state.user_id = user_id
+            st.success("Utilizator găsit!")
+            st.write(f"Numar telefon: {st.session_state.user_id}")
+            st.session_state.user_data = data
+        else:
+            st.warning("Nu am găsit date pentru acest ID.")
+            st.warning("Încărcați o factură json.")
+            st.session_state.user_id = user_id
+            st.session_state.user_data = None
+    # If the user has no data yet Show the upload (st.file_uploader...) dialog ,
+    # If the user has stored data in data\user_data\"user_data{user_id}.json, display the existing bills data - st.write(bill) but compacted
+    if st.session_state.user_data:
+        st.write("Facturi existente (extras):")
+        for bill in st.session_state.user_data.get("bills", []):
+            st.write({
+                "Data factura": bill.get("Data factura"),
+                "Serie numar factura": bill.get("Serie numar factura"),
+                "De plata": bill.get("De plata"),
+                "Costuri suplimentare": bill.get("Costuri suplimentare")
+            })
+    if not st.session_state.user_data:
+        uploaded_file = st.file_uploader("Incarca factura", type="json")
+        if uploaded_file and st.session_state.user_id:
+            bill_data = json.load(uploaded_file)
+            parsed_bill = parseBill(bill_data)
+            existing_data = load_user_data(st.session_state.user_id)
+            # Check if the billNo already exists in the existing data
+            existing_bill_nos = [bill.get("Data factura") for bill in existing_data.get("bills", [])]
+            if parsed_bill.get("Data factura") in existing_bill_nos:
+                st.warning("Factură existentă.")
+            else:
+                if "bills" not in existing_data:
+                    existing_data["bills"] = []
+                existing_data["bills"].append(parsed_bill)
+                save_user_data(st.session_state.user_id, existing_data)
+                st.success("Factura a fost încărcată și salvată cu succes!")
+    # Initialize conversation in the session state
+    # "context_prompt_added" indicates whether we've added the specialized "bill info" context yet.
+    if "messages" not in st.session_state:
+        st.session_state["messages"] = [
+            {"role": "assistant", "content": "Cu ce te pot ajuta?"}
+        ]
+    if "context_prompt_added" not in st.session_state:
+        st.session_state.context_prompt_added = False
+    st.write("---")
+    st.subheader("Chat")
+    for msg in st.session_state["messages"]:
+        st.chat_message(msg["role"]).write(msg["content"])
+    if prompt := st.chat_input("Introduceți întrebarea aici:"):
+        if not st.session_state.user_id:
+            st.error("Trebuie să introduci un număr de telefon valid sau să încarci date.")
+            return
+        # If the context prompt hasn't been added yet, build & inject it once;
+        # otherwise, just add the user's raw question.
+        if not st.session_state.context_prompt_added:
+            final_prompt = process_query(prompt, st.session_state["user_id"], "gpt-4o")
+            if final_prompt is None:
+                st.stop()
+            st.session_state["messages"].append({"role": "user", "content": final_prompt})
+            st.session_state.context_prompt_added = True
+        else:
+            st.session_state["messages"].append({"role": "user", "content": prompt})
+        # Display the latest user message in the chat
+        st.chat_message("user").write(st.session_state["messages"][-1]["content"])
+        # Display the related keys
+        related_keys = check_related_keys(prompt, st.session_state["user_id"])
+        st.write("Focus pe entitatile:", related_keys)
+        # Call GPT-4o model
+        completion_4o = client.chat.completions.create(
+            model="gpt-4o",
+            messages=st.session_state["messages"]
+        )
+        response_text_4o = completion_4o.choices[0].message.content.strip()
+        # Call GPT-4o-mini model
+        completion_4o_mini = client.chat.completions.create(
+            model="gpt-4o-mini",
+            messages=st.session_state["messages"]
+        )
+        response_text_4o_mini = completion_4o_mini.choices[0].message.content.strip()
+        st.session_state["messages"].append({"role": "assistant", "content": response_text_4o})
+        st.session_state["messages"].append({"role": "assistant", "content": response_text_4o_mini})
+        col1, col2 = st.columns(2)
+        with col1:
+            st.subheader("GPT-4o")
+            st.chat_message("assistant").write(response_text_4o)
+        # Log the conversation for GPT-4o
+            if hasattr(completion_4o, "usage"):
+                prompt_tokens_4o = completion_4o.usage.prompt_tokens
+                completion_tokens_4o = completion_4o.usage.completion_tokens
+                total_tokens_4o = completion_4o.usage.total_tokens
+                st.write("GPT-4o Prompt tokens:", prompt_tokens_4o)
+                st.write("GPT-4o Completion tokens:", completion_tokens_4o)
+                st.write("GPT-4o Total tokens:", total_tokens_4o)
+        with col2:
+            st.subheader("GPT-4o-mini")
+            st.chat_message("assistant").write(response_text_4o_mini)
+            if hasattr(completion_4o_mini, "usage"):
+                prompt_tokens_4o_mini = completion_4o_mini.usage.prompt_tokens
+                completion_tokens_4o_mini = completion_4o_mini.usage.completion_tokens
+                total_tokens_4o_mini = completion_4o_mini.usage.total_tokens
+                st.write("GPT-4o-mini Prompt tokens:", prompt_tokens_4o_mini)
+                st.write("GPT-4o-mini Completion tokens:", completion_tokens_4o_mini)
+                st.write("GPT-4o-mini Total tokens:", total_tokens_4o_mini)
+if __name__ == "__main__":
+    main()