ShayanP commited on
Commit
bd72f84
·
1 Parent(s): db50c89

Upload 5 files

Browse files
Files changed (6) hide show
  1. .env +0 -0
  2. .gitattributes +1 -0
  3. audio-to-text.py +101 -0
  4. audio.wav +0 -0
  5. recording0.wav +3 -0
  6. requirements.txt +0 -0
.env ADDED
Binary file (138 Bytes). View file
 
.gitattributes CHANGED
@@ -33,3 +33,4 @@ saved_model/**/* filter=lfs diff=lfs merge=lfs -text
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
 
 
33
  *.zip filter=lfs diff=lfs merge=lfs -text
34
  *.zst filter=lfs diff=lfs merge=lfs -text
35
  *tfevents* filter=lfs diff=lfs merge=lfs -text
36
+ recording0.wav filter=lfs diff=lfs merge=lfs -text
audio-to-text.py ADDED
@@ -0,0 +1,101 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import streamlit as st
3
+ import whisper
4
+ from dotenv import load_dotenv
5
+ from langchain.chains import RetrievalQA
6
+ from audiorecorder import audiorecorder
7
+ from langchain.document_loaders import DirectoryLoader
8
+ from langchain.embeddings import OpenAIEmbeddings
9
+ from langchain.text_splitter import RecursiveCharacterTextSplitter
10
+ from langchain.vectorstores.faiss import FAISS
11
+ from langchain.chat_models import ChatOpenAI
12
+
13
+ load_dotenv()
14
+
15
+ api_key = os.getenv("OPENAI_API_KEY")
16
+
17
+ st.title("Avtarcoach Audio-to-text")
18
+
19
+ # audio_bytes = audio_recorder("Click to record", "Click to stop recording", neutral_color="#051082", icon_size="2x")
20
+ # if audio_bytes:
21
+ # st.audio(audio_bytes, format="audio/wav")
22
+
23
+ audio = audiorecorder("Click to record", "Click to stop recording")
24
+
25
+ if len(audio) > 0:
26
+ # To play audio in frontend:
27
+ st.audio(audio.export().read())
28
+
29
+ # To save audio to a file, use pydub export method:
30
+ audio.export("audio.wav", format="wav")
31
+
32
+ # To get audio properties, use pydub AudioSegment properties:
33
+ st.write(
34
+ f"Frame rate: {audio.frame_rate}, Frame width: {audio.frame_width}, Duration: {audio.duration_seconds} seconds")
35
+
36
+ model = whisper.load_model("base")
37
+
38
+ # load audio and pad/trim it to fit 30 seconds
39
+ audio = whisper.load_audio(r"audio.wav")
40
+ audio = whisper.pad_or_trim(audio)
41
+
42
+ # make log-Mel spectrogram and move to the same device as the model
43
+ mel = whisper.log_mel_spectrogram(audio).to(model.device)
44
+
45
+ # detect the spoken language
46
+ _, probs = model.detect_language(mel)
47
+ st.write(f"Detected language: {max(probs, key=probs.get)}")
48
+
49
+ # decode the audio
50
+ options = whisper.DecodingOptions(fp16=False)
51
+ result = whisper.decode(model, mel, options)
52
+
53
+ # print the recognized text
54
+ st.write("You Said: ", result.text)
55
+ input_text = result.text
56
+
57
+ st.markdown("""<hr style="height:10px;border:none;color:#333;background-color:#333;" /> """, unsafe_allow_html=True)
58
+
59
+ st.write("Avtarcoach Response: ")
60
+
61
+ # Gen AI results
62
+
63
+ pdf_loader = DirectoryLoader(
64
+ r'C:\Users\shpe1\Downloads\tea_project_text_to_text-main\tea_project_text_to_text-main\pdf_docs', glob="**/*.pdf",
65
+ use_multithreading=True)
66
+ docs_loader = DirectoryLoader(
67
+ r'C:\Users\shpe1\Downloads\tea_project_text_to_text-main\tea_project_text_to_text-main\docs', glob="**/*.docx",
68
+ use_multithreading=True)
69
+ csv_loader = DirectoryLoader(
70
+ r'C:\Users\shpe1\Downloads\tea_project_text_to_text-main\tea_project_text_to_text-main\docs', glob="**/*.csv",
71
+ use_multithreading=True)
72
+ xlsx_loader = DirectoryLoader(
73
+ r'C:\Users\shpe1\Downloads\tea_project_text_to_text-main\tea_project_text_to_text-main\docs', glob="**/*.xlsx",
74
+ use_multithreading=True)
75
+ loaders = [pdf_loader, docs_loader, csv_loader, xlsx_loader]
76
+
77
+ documents = []
78
+ for loader in loaders:
79
+ documents.extend(loader.load())
80
+
81
+ text_splitters = RecursiveCharacterTextSplitter(
82
+ chunk_size=2000,
83
+ chunk_overlap=200,
84
+ length_function=len
85
+ )
86
+
87
+ chunks = text_splitters.split_documents(documents)
88
+ embedding = OpenAIEmbeddings()
89
+ # db = FAISS.from_documents(chunks, embedding)
90
+ faiss_db = FAISS.from_documents(chunks, embedding)
91
+ retriever = faiss_db.as_retriever(search_type='mmr')
92
+ llm = ChatOpenAI(model="gpt-4-1106-preview", temperature=0)
93
+ qa_chain = RetrievalQA.from_chain_type(llm=llm, chain_type="stuff", retriever=retriever)
94
+
95
+ # doc_search =faiss_db.get_relevant_documents(input_text)
96
+ # llm = ChatOpenAI(model="gpt-4-1106-preview",temperature =0)
97
+ # qa_chain = load_qa_chain(llm=llm,chain_type="stuff")
98
+ # qa_document_chain = AnalyzeDocumentChain(combine_docs_chain=qa_chain)
99
+ response = qa_chain.run(input_text)
100
+
101
+ st.write(response)
audio.wav ADDED
Binary file (829 kB). View file
 
recording0.wav ADDED
@@ -0,0 +1,3 @@
 
 
 
 
1
+ version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4ef40337ebb5b111c40947a994d0ea0ef0412e27623e8fe6353b316208db255d
3
+ size 5760058
requirements.txt ADDED
Binary file (6.38 kB). View file