anasmkh commited on
Commit
2416f1c
·
1 Parent(s): 37ddf11

update main.py

Browse files
Files changed (1) hide show
  1. main.py +11 -30
main.py CHANGED
@@ -12,43 +12,26 @@ from transformers import AutoTokenizer, AutoModelForSeq2SeqLM
12
 
13
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
14
 
15
- # model = AutoModelForSequenceClassification.from_pretrained("distilbert-base-uncased-finetuned-mrpc")
16
- # tokenizer = AutoTokenizer.from_pretrained("distilbert-base-uncased-finetuned-mrpc")
17
 
18
-
19
- #
20
  tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
21
-
22
  model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
23
 
24
- # model_id = "lamdao/lora-trained-xl-colab"
25
- # tokenizer = AutoTokenizer.from_pretrained(model_id)
26
- # model = AutoModelForCausalLM.from_pretrained(model_id)
27
-
28
- pipeline = pipeline(
29
- "text-generation",
30
  model=model,
31
  tokenizer=tokenizer,
32
- max_length=512
 
 
 
 
33
  )
34
 
35
- # local_llm = HuggingFacePipeline(pipeline=pipeline)
36
- # pipe = pipeline(
37
- # "text2text-generation",
38
- # model=model,
39
- # tokenizer=tokenizer,
40
- # max_length=512,
41
- # temperature=0.5,
42
- # top_p=0.95,
43
- # repetition_penalty=1.15
44
- # )
45
-
46
- local_llm = HuggingFacePipeline(pipeline=pipeline)
47
- # print(local_llm('What is the capital of Syria?'))
48
  loader = PyPDFLoader('bipolar.pdf')
49
  # loader = TextLoader('info.txt')
50
  document = loader.load()
51
- text_spliter = CharacterTextSplitter(chunk_size=100, chunk_overlap=0)
52
  texts = text_spliter.split_documents(document)
53
  embedding = HuggingFaceInstructEmbeddings()
54
  docsearch = Chroma.from_documents(texts, embedding, persist_directory='db')
@@ -58,15 +41,13 @@ qa_chain = RetrievalQA.from_chain_type(llm=local_llm,
58
  chain_type="stuff",
59
  retriever=retriever,
60
  return_source_documents=True)
61
- # question = input('prompt: ')
62
- # result = qa_chain({'query': question})
63
- # print('result: ', result['result'])
64
  def gradinterface(query,history):
65
  result = qa_chain({'query': query})
66
  return result['result']
67
 
68
 
69
- demo = gr.ChatInterface(fn=gradinterface, title='OUR_BOT')
70
 
71
  if __name__ == "__main__":
72
  demo.launch(share=True)
 
12
 
13
  from transformers import AutoModelForSequenceClassification, AutoTokenizer
14
 
 
 
15
 
 
 
16
  tokenizer = AutoTokenizer.from_pretrained("google/flan-t5-base")
 
17
  model = AutoModelForSeq2SeqLM.from_pretrained("google/flan-t5-base")
18
 
19
+ pipe = pipeline(
20
+ "text2text-generation",
 
 
 
 
21
  model=model,
22
  tokenizer=tokenizer,
23
+ max_length=200,
24
+ temperature=0.8,
25
+ top_p=0.95,
26
+ repetition_penalty=1.15,
27
+ do_sample=True
28
  )
29
 
30
+ local_llm = HuggingFacePipeline(pipeline=pipe)
 
 
 
 
 
 
 
 
 
 
 
 
31
  loader = PyPDFLoader('bipolar.pdf')
32
  # loader = TextLoader('info.txt')
33
  document = loader.load()
34
+ text_spliter = CharacterTextSplitter(chunk_size=1000, chunk_overlap=0)
35
  texts = text_spliter.split_documents(document)
36
  embedding = HuggingFaceInstructEmbeddings()
37
  docsearch = Chroma.from_documents(texts, embedding, persist_directory='db')
 
41
  chain_type="stuff",
42
  retriever=retriever,
43
  return_source_documents=True)
44
+
 
 
45
  def gradinterface(query,history):
46
  result = qa_chain({'query': query})
47
  return result['result']
48
 
49
 
50
+ demo = gr.ChatInterface(fn=gradinterface, title='OUR_OWN_BOT')
51
 
52
  if __name__ == "__main__":
53
  demo.launch(share=True)