root
commited on
Commit
·
86dad54
1
Parent(s):
a1f97de
ss
Browse files- app.py +24 -21
- explanation_generator.py +17 -15
app.py
CHANGED
@@ -9,18 +9,36 @@ import os
|
|
9 |
import tempfile
|
10 |
import base64
|
11 |
from rank_bm25 import BM25Okapi
|
12 |
-
from transformers import AutoModel, AutoTokenizer
|
13 |
-
from sentence_transformers import SentenceTransformer
|
14 |
-
from nltk.tokenize import word_tokenize, sent_tokenize
|
15 |
-
from tqdm import tqdm
|
16 |
import re
|
17 |
import io
|
18 |
import PyPDF2
|
19 |
from docx import Document
|
20 |
import csv
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
21 |
from explanation_generator import ExplanationGenerator
|
22 |
-
from einops.layers.torch import Rearrange, Reduce
|
23 |
-
from einops import rearrange, reduce, repeat
|
24 |
|
25 |
# Download NLTK resources
|
26 |
try:
|
@@ -33,21 +51,6 @@ EMBEDDING_MODEL_NAME = "nvidia/NV-Embed-v2"
|
|
33 |
print(f"Loading embedding model {EMBEDDING_MODEL_NAME}...")
|
34 |
|
35 |
try:
|
36 |
-
# Create a simple Replicate class since it's missing
|
37 |
-
class Replicate(torch.nn.Module):
|
38 |
-
def __init__(self):
|
39 |
-
super().__init__()
|
40 |
-
|
41 |
-
def forward(self, x):
|
42 |
-
return x
|
43 |
-
|
44 |
-
# Add to global namespace to make it available
|
45 |
-
import sys
|
46 |
-
sys.modules['__main__'].Replicate = Replicate
|
47 |
-
|
48 |
-
# Also add to globals
|
49 |
-
globals()['Replicate'] = Replicate
|
50 |
-
|
51 |
# Load embedding model and tokenizer
|
52 |
global_embedding_tokenizer = AutoTokenizer.from_pretrained(EMBEDDING_MODEL_NAME, trust_remote_code=True)
|
53 |
global_embedding_model = AutoModel.from_pretrained(EMBEDDING_MODEL_NAME, trust_remote_code=True, device_map="auto")
|
|
|
9 |
import tempfile
|
10 |
import base64
|
11 |
from rank_bm25 import BM25Okapi
|
|
|
|
|
|
|
|
|
12 |
import re
|
13 |
import io
|
14 |
import PyPDF2
|
15 |
from docx import Document
|
16 |
import csv
|
17 |
+
|
18 |
+
# Fix for Replicate issue - must be before model imports
|
19 |
+
import torch
|
20 |
+
try:
|
21 |
+
from einops.layers.torch import Replicate
|
22 |
+
except ImportError:
|
23 |
+
# Define our own Replicate class if not available
|
24 |
+
class Replicate(torch.nn.Module):
|
25 |
+
def __init__(self, *args, **kwargs):
|
26 |
+
super().__init__()
|
27 |
+
def forward(self, x):
|
28 |
+
return x
|
29 |
+
|
30 |
+
# Make sure Replicate is available in the right module
|
31 |
+
import sys
|
32 |
+
import einops.layers.torch
|
33 |
+
einops.layers.torch.Replicate = Replicate
|
34 |
+
sys.modules['einops.layers.torch'].Replicate = Replicate
|
35 |
+
|
36 |
+
# Now import models
|
37 |
+
from transformers import AutoModel, AutoTokenizer
|
38 |
+
from sentence_transformers import SentenceTransformer
|
39 |
+
from nltk.tokenize import word_tokenize, sent_tokenize
|
40 |
+
from tqdm import tqdm
|
41 |
from explanation_generator import ExplanationGenerator
|
|
|
|
|
42 |
|
43 |
# Download NLTK resources
|
44 |
try:
|
|
|
51 |
print(f"Loading embedding model {EMBEDDING_MODEL_NAME}...")
|
52 |
|
53 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
54 |
# Load embedding model and tokenizer
|
55 |
global_embedding_tokenizer = AutoTokenizer.from_pretrained(EMBEDDING_MODEL_NAME, trust_remote_code=True)
|
56 |
global_embedding_model = AutoModel.from_pretrained(EMBEDDING_MODEL_NAME, trust_remote_code=True, device_map="auto")
|
explanation_generator.py
CHANGED
@@ -6,6 +6,23 @@ using the QwQ-32B model from Hugging Face.
|
|
6 |
"""
|
7 |
|
8 |
import torch
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
9 |
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
10 |
import os
|
11 |
import re
|
@@ -17,21 +34,6 @@ print("Loading Qwen/QwQ-32B model with 4-bit quantization...")
|
|
17 |
QWQ_MODEL_NAME = "Qwen/QwQ-32B"
|
18 |
|
19 |
try:
|
20 |
-
# Create a simple Replicate class since it's missing
|
21 |
-
class Replicate(torch.nn.Module):
|
22 |
-
def __init__(self):
|
23 |
-
super().__init__()
|
24 |
-
|
25 |
-
def forward(self, x):
|
26 |
-
return x
|
27 |
-
|
28 |
-
# Add to global namespace to make it available
|
29 |
-
import sys
|
30 |
-
sys.modules['__main__'].Replicate = Replicate
|
31 |
-
|
32 |
-
# Also add to globals
|
33 |
-
globals()['Replicate'] = Replicate
|
34 |
-
|
35 |
# Configure 4-bit quantization for better performance
|
36 |
quantization_config = BitsAndBytesConfig(
|
37 |
load_in_4bit=True,
|
|
|
6 |
"""
|
7 |
|
8 |
import torch
|
9 |
+
# Fix for Replicate issue - must be before model imports
|
10 |
+
try:
|
11 |
+
from einops.layers.torch import Replicate
|
12 |
+
except ImportError:
|
13 |
+
# Define our own Replicate class if not available
|
14 |
+
class Replicate(torch.nn.Module):
|
15 |
+
def __init__(self, *args, **kwargs):
|
16 |
+
super().__init__()
|
17 |
+
def forward(self, x):
|
18 |
+
return x
|
19 |
+
|
20 |
+
# Make sure Replicate is available in the right module
|
21 |
+
import sys
|
22 |
+
import einops.layers.torch
|
23 |
+
einops.layers.torch.Replicate = Replicate
|
24 |
+
sys.modules['einops.layers.torch'].Replicate = Replicate
|
25 |
+
|
26 |
from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig
|
27 |
import os
|
28 |
import re
|
|
|
34 |
QWQ_MODEL_NAME = "Qwen/QwQ-32B"
|
35 |
|
36 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
37 |
# Configure 4-bit quantization for better performance
|
38 |
quantization_config = BitsAndBytesConfig(
|
39 |
load_in_4bit=True,
|