Spaces:
Runtime error
Runtime error
Update app.py
Browse files
app.py
CHANGED
@@ -1,32 +1,3 @@
|
|
1 |
-
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
|
2 |
-
from datasets import Dataset
|
3 |
-
from groq import Groq
|
4 |
-
import os
|
5 |
-
|
6 |
-
# Initialize Groq client with your API key
|
7 |
-
client = Groq(api_key="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")
|
8 |
-
|
9 |
-
# Paths to your books
|
10 |
-
book_paths = {
|
11 |
-
"DSM": "/content/Diagnostic and statistical manual of mental disorders _ DSM-5 ( PDFDrive.com ).pdf",
|
12 |
-
"Personality": "/content/b6c3v8_Theories_of_Personality_10.pdf",
|
13 |
-
"SearchForMeaning": "/content/Mans-Search-For-Meaning.pdf"
|
14 |
-
}
|
15 |
-
|
16 |
-
# Function to load and preprocess the data from books
|
17 |
-
def load_data(paths):
|
18 |
-
data = []
|
19 |
-
for title, path in paths.items():
|
20 |
-
with open(path, "r", encoding="utf-8", errors='ignore') as file:
|
21 |
-
text = file.read()
|
22 |
-
paragraphs = text.split("\n\n") # Split by paragraphs (adjust as needed)
|
23 |
-
for paragraph in paragraphs:
|
24 |
-
if paragraph.strip(): # Skip empty paragraphs
|
25 |
-
data.append({"text": paragraph.strip()})
|
26 |
-
return Dataset.from_list(data)
|
27 |
-
|
28 |
-
# Load and preprocess dataset for fine-tuning
|
29 |
-
dataset = load_data(book_paths)
|
30 |
|
31 |
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
|
32 |
from datasets import Dataset
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1 |
|
2 |
from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
|
3 |
from datasets import Dataset
|