Spaces:

ajeetkumar01
/

Penal_Code_Description_Extractor

Sleeping

App Files Files Community

ajeetkumar01 commited on Sep 18, 2024

Commit

51391bc

verified ·

1 Parent(s): 9ace159

Create app.py

Browse files

Files changed (1) hide show

app.py +101 -0

app.py ADDED Viewed

	@@ -0,0 +1,101 @@

+import streamlit as st
+import pandas as pd
+from transformers import AutoTokenizer, AutoModelForCausalLM
+import torch
+from huggingface_hub import login  # For authentication
+# Authenticate with Hugging Face
+def authenticate_huggingface():
+    token = os.getenv("llama2_token")  # Load token from environment variable
+    if token:
+        login(token)  # This logs in using the Hugging Face token
+    else:
+        st.error("Hugging Face token not found. Please set the HF_TOKEN environment variable.")
+# Load the Llama 2 model from Hugging Face
+@st.cache_resource
+def load_llama_model():
+    authenticate_huggingface()  # Ensure authentication is done before loading
+    model_name = "meta-llama/Llama-2-7b-hf"
+    tokenizer = AutoTokenizer.from_pretrained(model_name, use_auth_token=True)
+    model = AutoModelForCausalLM.from_pretrained(model_name, use_auth_token=True)
+    return tokenizer, model
+# Function to query the Llama 2 model
+def query_llama_model(penal_code, tokenizer, model):
+    prompt = f"What is California Penal Code {penal_code}?"
+    # Tokenize the input prompt
+    inputs = tokenizer(prompt, return_tensors="pt")
+    # Generate output from the model
+    outputs = model.generate(**inputs, max_new_tokens=100)
+    # Decode the generated text
+    description = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    return description
+# Function to process CSV and update descriptions
+def update_csv_with_descriptions(csv_file, tokenizer, model):
+    # Read the CSV file
+    df = pd.read_csv(csv_file)
+    # Dictionary to store penal codes and their descriptions
+    penal_code_dict = {}
+    # Iterate through each row in the CSV
+    for index, row in df.iterrows():
+        penal_code = row['Offense Number']
+        # Check if description is already present
+        if not row['Description']:
+            st.write(f"Querying description for {penal_code}...")
+            description = query_llama_model(penal_code, tokenizer, model)
+            # Update the dataframe with the description
+            df.at[index, 'Description'] = description
+            # Add to dictionary
+            penal_code_dict[penal_code] = description
+    # Save the updated CSV file
+    updated_file_path = 'updated_' + csv_file.name
+    df.to_csv(updated_file_path, index=False)
+    return penal_code_dict, updated_file_path
+# Streamlit UI
+def main():
+    st.title("Penal Code Description Extractor with Llama 2")
+    # Load the Llama 2 model and tokenizer
+    tokenizer, model = load_llama_model()
+    # Upload CSV file
+    uploaded_file = st.file_uploader("Upload a CSV file with Penal Codes", type=["csv"])
+    if uploaded_file is not None:
+        # Display uploaded file
+        st.write("Uploaded CSV File:")
+        df = pd.read_csv(uploaded_file)
+        st.dataframe(df)
+        # Process the file and update descriptions
+        if st.button("Get Penal Code Descriptions"):
+            penal_code_dict, updated_file_path = update_csv_with_descriptions(uploaded_file, tokenizer, model)
+            # Show dictionary output
+            st.write("Penal Code Descriptions:")
+            st.json(penal_code_dict)
+            # Provide a download link for the updated CSV
+            with open(updated_file_path, 'rb') as f:
+                st.download_button(
+                    label="Download Updated CSV",
+                    data=f,
+                    file_name=updated_file_path,
+                    mime='text/csv'
+                )
+if __name__ == "__main__":
+    main()