yolloo commited on
Commit
784c877
·
verified ·
1 Parent(s): 770f214

Upload 4 files

Browse files
Files changed (4) hide show
  1. Dockerfile (2).txt +25 -0
  2. qamatcher_server.py +71 -0
  3. qgen_server.py +39 -0
  4. whisper_server.py +62 -0
Dockerfile (2).txt ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.9-slim
2
+
3
+ # 1. Install system dependencies including ffmpeg for audio processing
4
+ RUN apt-get update && apt-get install -y --no-install-recommends ffmpeg git && \
5
+ apt-get clean && \
6
+ rm -rf /var/lib/apt/lists/*
7
+
8
+ # 2. Set up a writable Hugging Face cache directory
9
+ # This is the standard and correct way to avoid permission errors in Hugging Face Spaces.
10
+ ENV HF_HOME=/tmp/huggingface
11
+ ENV TRANSFORMERS_CACHE=/tmp/huggingface/transformers
12
+ ENV XDG_CACHE_HOME=/tmp/huggingface
13
+ RUN mkdir -p $HF_HOME && chmod -R 777 $HF_HOME
14
+
15
+ # 3. Install Python dependencies
16
+ COPY requirements.txt .
17
+ RUN pip install --no-cache-dir --upgrade pip && \
18
+ pip install --no-cache-dir -r requirements.txt
19
+
20
+ # 4. Copy the application files into the container
21
+ COPY . .
22
+
23
+ # 5. Set the command to run the application using Gunicorn
24
+ # This is a production-ready web server.
25
+ CMD ["gunicorn", "--bind", "0.0.0.0:7860", "app:app"]
qamatcher_server.py ADDED
@@ -0,0 +1,71 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ from flask import request, jsonify
4
+ from sentence_transformers import SentenceTransformer, util
5
+
6
+ # Define a writable directory for the model cache.
7
+ # This now respects the HF_HOME environment variable set in the Dockerfile.
8
+ cache_dir = os.environ.get("HF_HOME", "/tmp/huggingface")
9
+ os.makedirs(cache_dir, exist_ok=True)
10
+
11
+
12
+ print("Loading SentenceTransformer model (paraphrase-MiniLM-L6-v2)...")
13
+ matcher_model = SentenceTransformer('sentence-transformers/paraphrase-MiniLM-L6-v2', cache_folder=cache_dir)
14
+ print("SentenceTransformer model loaded.")
15
+
16
+ # Define a threshold for a "good" match
17
+ SIMILARITY_THRESHOLD = 0.6
18
+
19
+ def handle_match_question():
20
+ data = request.get_json()
21
+ if not data or 'user_question' not in data or 'documents' not in data:
22
+ return jsonify({'error': 'Invalid request. "user_question" and "documents" are required.'}), 400
23
+
24
+ user_question = data['user_question']
25
+ documents = data['documents']
26
+
27
+ if not documents:
28
+ return jsonify({'answer': "There are no notes to search."})
29
+
30
+ # Flatten the list of questions from all documents
31
+ all_questions = []
32
+ # Map each question to the original note text
33
+ question_to_note_map = {}
34
+
35
+ for doc in documents:
36
+ note_text = doc.get('note_text', '')
37
+ for q in doc.get('questions', []):
38
+ all_questions.append(q)
39
+ question_to_note_map[q] = note_text
40
+
41
+ if not all_questions:
42
+ return jsonify({'answer': "No questions have been generated for your notes yet."})
43
+
44
+ try:
45
+ # Encode the user's question and all stored questions
46
+ user_embedding = matcher_model.encode(user_question, convert_to_tensor=True)
47
+ stored_embeddings = matcher_model.encode(all_questions, convert_to_tensor=True)
48
+
49
+ # Compute cosine similarity
50
+ cosine_scores = util.pytorch_cos_sim(user_embedding, stored_embeddings)
51
+
52
+ # Find the best match
53
+ best_match_idx = cosine_scores.argmax()
54
+ best_score = float(cosine_scores[0][best_match_idx])
55
+ best_question = all_questions[best_match_idx]
56
+
57
+ print(f"User Question: '{user_question}'")
58
+ print(f"Best matched stored question: '{best_question}' with score: {best_score:.4f}")
59
+
60
+ # Check if the match is good enough
61
+ if best_score > SIMILARITY_THRESHOLD:
62
+ # Return the note associated with the best-matched question
63
+ answer = question_to_note_map[best_question]
64
+ else:
65
+ answer = "Sorry, I couldn't find a relevant note to answer your question."
66
+
67
+ return jsonify({'answer': answer})
68
+
69
+ except Exception as e:
70
+ print(f"Error during question matching: {e}")
71
+ return jsonify({'error': str(e)}), 500
qgen_server.py ADDED
@@ -0,0 +1,39 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ from flask import request, jsonify
4
+ from transformers import pipeline
5
+
6
+ # Define a writable directory for the model cache.
7
+ # This now respects the HF_HOME environment variable set in the Dockerfile.
8
+ cache_dir = os.environ.get("HF_HOME", "/tmp/huggingface")
9
+ os.makedirs(cache_dir, exist_ok=True)
10
+
11
+ print("Loading Question Generation model (iarfmoose/t5-base-question-generator)...")
12
+ # Initialize the pipeline for text2text-generation with the specified model
13
+ qg_model = pipeline("text2text-generation", model="iarfmoose/t5-base-question-generator", model_kwargs={"cache_dir": cache_dir})
14
+ print("Question Generation model loaded.")
15
+
16
+ def handle_generate_questions():
17
+ data = request.get_json()
18
+ if not data or 'text' not in data:
19
+ return jsonify({'error': 'Invalid request. "text" field is required.'}), 400
20
+
21
+ text = data['text']
22
+
23
+ # Prepend the text with "generate questions: " as required by this model
24
+ input_text = f"generate questions: {text}"
25
+
26
+ try:
27
+ # Generate questions
28
+ results = qg_model(input_text, max_length=64, num_beams=4, early_stopping=True)
29
+
30
+ # The result is a single string with questions separated by '<sep>'
31
+ generated_text = results[0]['generated_text']
32
+ questions = [q.strip() for q in generated_text.split('<sep>') if q.strip()]
33
+
34
+ print(f"Generated questions for text: '{text[:50]}...' -> {questions}")
35
+
36
+ return jsonify({'questions': questions})
37
+ except Exception as e:
38
+ print(f"Error during question generation: {e}")
39
+ return jsonify({'error': str(e)}), 500
whisper_server.py ADDED
@@ -0,0 +1,62 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import os
3
+ import tempfile
4
+ from flask import request, jsonify
5
+ from transformers import pipeline
6
+ import torch
7
+
8
+ # Define a writable directory for the model cache.
9
+ # This now respects the HF_HOME environment variable set in the Dockerfile.
10
+ cache_dir = os.environ.get("HF_HOME", "/tmp/huggingface")
11
+ os.makedirs(cache_dir, exist_ok=True)
12
+
13
+ print("Loading collabora/whisper-tiny-hindi model via transformers pipeline...")
14
+
15
+ # Determine device
16
+ device = "cuda:0" if torch.cuda.is_available() else "cpu"
17
+
18
+ # Initialize the ASR pipeline with the specified model
19
+ # Using the transformers pipeline is the correct way to load custom models from the Hub.
20
+ model = pipeline(
21
+ "automatic-speech-recognition",
22
+ model="collabora/whisper-tiny-hindi",
23
+ device=device,
24
+ model_kwargs={"cache_dir": cache_dir}
25
+ )
26
+
27
+ print("Whisper model loaded.")
28
+
29
+ def handle_transcribe():
30
+ if 'file' not in request.files:
31
+ return jsonify({'error': 'No file part in the request'}), 400
32
+
33
+ file = request.files['file']
34
+
35
+ if file.filename == '':
36
+ return jsonify({'error': 'No selected file'}), 400
37
+
38
+ if file:
39
+ # Use a temporary file to save the upload
40
+ with tempfile.NamedTemporaryFile(delete=True, suffix=".webm") as temp_audio:
41
+ file.save(temp_audio.name)
42
+
43
+ try:
44
+ print(f"Transcribing file: {temp_audio.name} with collabora/whisper-tiny-hindi pipeline")
45
+
46
+ # The pipeline expects a file path and handles the processing.
47
+ result = model(temp_audio.name)
48
+
49
+ transcribed_text = result.get('text', '')
50
+
51
+ print("Transcription successful.")
52
+ return jsonify({'text': transcribed_text})
53
+ except Exception as e:
54
+ print(f"Error during transcription: {e}")
55
+ # Provide a more specific error if possible
56
+ error_message = f"An unexpected error occurred during transcription: {str(e)}"
57
+ if "out of memory" in str(e).lower():
58
+ error_message = "The model ran out of memory. Please try a smaller audio file or check server resources."
59
+
60
+ return jsonify({'error': error_message}), 500
61
+
62
+ return jsonify({'error': 'File processing failed'}), 500