Zeeshan42 commited on
Commit
9dfc3d0
·
verified ·
1 Parent(s): 621f579

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +0 -29
app.py CHANGED
@@ -1,32 +1,3 @@
1
- from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
2
- from datasets import Dataset
3
- from groq import Groq
4
- import os
5
-
6
- # Initialize Groq client with your API key
7
- client = Groq(api_key="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")
8
-
9
- # Paths to your books
10
- book_paths = {
11
- "DSM": "/content/Diagnostic and statistical manual of mental disorders _ DSM-5 ( PDFDrive.com ).pdf",
12
- "Personality": "/content/b6c3v8_Theories_of_Personality_10.pdf",
13
- "SearchForMeaning": "/content/Mans-Search-For-Meaning.pdf"
14
- }
15
-
16
- # Function to load and preprocess the data from books
17
- def load_data(paths):
18
- data = []
19
- for title, path in paths.items():
20
- with open(path, "r", encoding="utf-8", errors='ignore') as file:
21
- text = file.read()
22
- paragraphs = text.split("\n\n") # Split by paragraphs (adjust as needed)
23
- for paragraph in paragraphs:
24
- if paragraph.strip(): # Skip empty paragraphs
25
- data.append({"text": paragraph.strip()})
26
- return Dataset.from_list(data)
27
-
28
- # Load and preprocess dataset for fine-tuning
29
- dataset = load_data(book_paths)
30
 
31
  from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
32
  from datasets import Dataset
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
 
2
  from transformers import AutoTokenizer, AutoModelForCausalLM, Trainer, TrainingArguments
3
  from datasets import Dataset