fajjos commited on
Commit
bddf041
·
1 Parent(s): 7cefe7c

Add Streamlit app and requirements

Browse files
Files changed (2) hide show
  1. app.py +39 -24
  2. requirements.txt +2 -0
app.py CHANGED
@@ -1,34 +1,49 @@
1
  import streamlit as st
2
- from transformers import AutoModelForTokenClassification, AutoTokenizer
3
  import torch
 
4
 
5
- # Load the model and tokenizer from Hugging Face
6
- model_name = "fajjos/Keyword_v1" # Replace with the actual model name
7
  tokenizer = AutoTokenizer.from_pretrained(model_name)
8
- model = AutoModelForTokenClassification.from_pretrained(model_name)
 
 
 
 
 
 
 
 
 
9
 
10
  # Streamlit interface
11
- st.title("Keyword Extractor")
 
 
12
  user_input = st.text_area("Enter text for keyword extraction")
13
 
14
  if user_input:
15
- # Tokenize the input
16
- inputs = tokenizer(user_input, return_tensors="pt")
17
-
18
- # Get model predictions
 
 
 
 
 
 
 
 
 
 
19
  with torch.no_grad():
20
- outputs = model(**inputs)
21
-
22
- # Process the predictions (this will depend on your specific model output)
23
- tokens = tokenizer.convert_ids_to_tokens(inputs['input_ids'][0])
24
- predictions = torch.argmax(outputs.logits, dim=2)
25
-
26
- # Display extracted keywords
27
- st.write("Extracted Keywords:")
28
- for token, pred in zip(tokens, predictions[0]):
29
- if pred == 1: # Assuming label '1' corresponds to a keyword
30
- st.write(token)
31
-
32
- # # Add a slider for interaction (example)
33
- # x = st.slider('Select a value')
34
- # st.write(f"{x} squared is {x * x}")
 
1
  import streamlit as st
2
+ from transformers import AutoTokenizer, AutoModelForCausalLM, TextStreamer
3
  import torch
4
+ import bitsandbytes as bnb # Required for 4-bit quantization
5
 
6
+ # Load the tokenizer and the quantized LLaMA model
7
+ model_name = "unsloth/Llama-3.2-1B-Instruct-bnb-4bit"
8
  tokenizer = AutoTokenizer.from_pretrained(model_name)
9
+
10
+ # Load the quantized LLaMA model in 4-bit precision
11
+ model = AutoModelForCausalLM.from_pretrained(
12
+ model_name,
13
+ load_in_4bit=True, # Enable 4-bit quantization
14
+ device_map="auto" # Automatically assigns to CPU/GPU
15
+ )
16
+
17
+ # Enable native 2x faster inference (if applicable, ensure this feature works)
18
+ # FastLanguageModel.for_inference(model) # Uncomment this if FastLanguageModel is available for your model
19
 
20
  # Streamlit interface
21
+ st.title("Keyword Extractor using LLaMA 4-bit Model")
22
+
23
+ # Text input area for user input
24
  user_input = st.text_area("Enter text for keyword extraction")
25
 
26
  if user_input:
27
+ # Prepare the prompt for keyword extraction
28
+ prompt_template = (
29
+ "Extract keywords and variables from the prompt:\n"
30
+ "{}\n"
31
+ )
32
+ alpaca_prompt = prompt_template.format(user_input)
33
+
34
+ # Tokenize the input text
35
+ inputs = tokenizer([alpaca_prompt], return_tensors="pt").to("cuda")
36
+
37
+ # Set up the text streamer to display the generated text as it streams
38
+ text_streamer = TextStreamer(tokenizer)
39
+
40
+ # Generate keywords and extract variables
41
  with torch.no_grad():
42
+ output = model.generate(**inputs, streamer=text_streamer, max_new_tokens=128)
43
+
44
+ # Decode the output tokens to get the generated text
45
+ generated_text = tokenizer.decode(output[0], skip_special_tokens=True)
46
+
47
+ # Display the result in the Streamlit app
48
+ st.write("Extracted Keywords and Variables:")
49
+ st.write(generated_text)
 
 
 
 
 
 
 
requirements.txt CHANGED
@@ -1,3 +1,5 @@
1
  transformers
 
 
2
  torch
3
  streamlit
 
1
  transformers
2
+ bitsandbytes
3
+ sentencepiece
4
  torch
5
  streamlit