anasmkh commited on
Commit
61b75fc
·
1 Parent(s): ae4ca72

Update main.py

Browse files
Files changed (1) hide show
  1. main.py +30 -11
main.py CHANGED
@@ -2,7 +2,7 @@ from langchain.chains import RetrievalQA, ConversationalRetrievalChain
2
  from langchain.vectorstores import Chroma
3
  from langchain.text_splitter import CharacterTextSplitter
4
  from langchain.document_loaders import DirectoryLoader, TextLoader,PyPDFLoader
5
- from transformers import pipeline
6
  from langchain.llms import HuggingFacePipeline
7
  from langchain.embeddings import HuggingFaceInstructEmbeddings
8
  import gradio as gr
@@ -10,22 +10,41 @@ from InstructorEmbedding import INSTRUCTOR
10
  import torch
11
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
12
 
13
- tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
14
 
15
- model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
16
- pipe = pipeline(
17
- "text2text-generation",
 
 
 
 
 
 
 
 
 
 
 
 
18
  model=model,
19
  tokenizer=tokenizer,
20
- max_length=512,
21
- temperature=0.5,
22
- top_p=0.95,
23
- repetition_penalty=1.15
24
  )
25
 
26
- local_llm = HuggingFacePipeline(pipeline=pipe)
27
- print(local_llm('What is the capital of Syria?'))
 
 
 
 
 
 
 
 
28
 
 
 
29
  loader = PyPDFLoader('bipolar.pdf')
30
  # loader = TextLoader('info.txt')
31
  document = loader.load()
 
2
  from langchain.vectorstores import Chroma
3
  from langchain.text_splitter import CharacterTextSplitter
4
  from langchain.document_loaders import DirectoryLoader, TextLoader,PyPDFLoader
5
+ from transformers import pipeline, AutoModelForCausalLM
6
  from langchain.llms import HuggingFacePipeline
7
  from langchain.embeddings import HuggingFaceInstructEmbeddings
8
  import gradio as gr
 
10
  import torch
11
  from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
12
 
13
+ from transformers import AutoModelForSequenceClassification, AutoTokenizer
14
 
15
+ # model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-mrpc")
16
+ # tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-mrpc")
17
+
18
+
19
+ #
20
+ # tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
21
+ #
22
+ # model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
23
+
24
+ model_id = "gpt2-medium"
25
+ tokenizer = AutoTokenizer.from_pretrained(model_id)
26
+ model = AutoModelForCausalLM.from_pretrained(model_id)
27
+
28
+ pipeline = pipeline(
29
+ "text-generation",
30
  model=model,
31
  tokenizer=tokenizer,
32
+ max_length=100
 
 
 
33
  )
34
 
35
+ # local_llm = HuggingFacePipeline(pipeline=pipeline)
36
+ # pipe = pipeline(
37
+ # "text2text-generation",
38
+ # model=model,
39
+ # tokenizer=tokenizer,
40
+ # max_length=512,
41
+ # temperature=0.5,
42
+ # top_p=0.95,
43
+ # repetition_penalty=1.15
44
+ # )
45
 
46
+ local_llm = HuggingFacePipeline(pipeline=pipeline)
47
+ # print(local_llm('What is the capital of Syria?'))
48
  loader = PyPDFLoader('bipolar.pdf')
49
  # loader = TextLoader('info.txt')
50
  document = loader.load()