Spaces:

Nechba
/

indepai_spech_to_text

Sleeping

App Files Files Community

indepai_spech_to_text / app.py

Nechba

Update app.py

2ec5e31 verified 10 months ago

raw

history blame contribute delete

7.33 kB

	from fastapi import FastAPI, File, UploadFile, HTTPException
	import speech_recognition as sr
	from io import BytesIO
	import os
	from pydantic import BaseModel
	import google.generativeai as genai
	import openai

	app = FastAPI()

	def audio_to_text_intern(audio_data, language='fr-FR'):
	with sr.AudioFile(audio_data) as source:
	recognizer = sr.Recognizer()
	audio = recognizer.record(source)
	try:
	text = recognizer.recognize_google(audio, language=language)
	return text
	except sr.UnknownValueError:
	return "Unable to understand the audio"
	except sr.RequestError as e:
	return f"Service error: {e}"

	def generate_job_description_gemini(input_text, key):
	# Set up the model
	import google.generativeai as genai
	genai.configure(api_key=key)
	prompt = f"""
	Transforme le texte ci-dessous, qui est une transcription d'un enregistrement audio où un recruteur décrit un projet, en une description d'offre d'emploi complète et structurée. La description doit suivre le format ci-dessous :

	1. Description de l'activité de l'entreprise :
	2. Description du contexte du projet :
	3. Objectifs et livrables identifiés :
	4. Compétences fonctionnelles et techniques :

	Texte :
	"{input_text}"

	La description d'offre d'emploi doit être détaillée et couvrir les points suivants :
	- L'activité de l'entreprise et ses domaines d'expertise.
	- Le contexte du projet et sa pertinence pour l'entreprise.
	- Les objectifs spécifiques du projet et les livrables attendus.
	- Les compétences techniques et fonctionnelles requises pour le poste, ainsi que les conditions de travail (par exemple, la possibilité de travail en remote et la rémunération)."""


	generation_config = {
	"temperature": 1,
	"top_p": 0.95,
	"max_output_tokens": 5000000,
	}
	model = genai.GenerativeModel(model_name="gemini-1.0-pro-latest",
	generation_config=generation_config)
	response = model.generate_content(prompt)
	return response.text

	def generate_job_description_gpt(input_text, key):
	import openai
	openai.api_key = key
	prompt = f"""
	Transforme le texte ci-dessous, qui est une transcription d'un enregistrement audio où un recruteur décrit un projet, en une description d'offre d'emploi complète et structurée. La description doit suivre le format ci-dessous :

	1. Description de l'activité de l'entreprise :
	2. Description du contexte du projet :
	3. Objectifs et livrables identifiés :
	4. Compétences fonctionnelles et techniques :

	Texte :
	"{input_text}"

	La description d'offre d'emploi doit être détaillée et couvrir les points suivants :
	- L'activité de l'entreprise et ses domaines d'expertise.
	- Le contexte du projet et sa pertinence pour l'entreprise.
	- Les objectifs spécifiques du projet et les livrables attendus.
	- Les compétences techniques et fonctionnelles requises pour le poste, ainsi que les conditions de travail (par exemple, la possibilité de travail en remote et la rémunération)."""


	generation_config = {
	"temperature": 1,
	"top_p": 0.95,
	"max_output_tokens": 5000000,
	}
	response = openai.ChatCompletion.create(
	model="gpt-4o-mini",
	messages=[
	# {"role": "system", "content": "Vous êtes un createur de contenu qui génère des fausses réponses pour une question."},
	{"role": "user", "content": prompt}
	],
	**generation_config
	)

	answers_text = response.choices[0].message['content']
	return answers_text


	@app.post("/audio-to-text/")
	async def audio_to_text_endpoint(file: UploadFile, language: str = 'fr-FR'):
	"""
	Endpoint to upload an audio file and convert its content to text.

	Args:
	file (UploadFile): The audio file uploaded by the client. The file should be in .wav format.
	language (str, optional): The language code to be used in speech-to-text conversion. Defaults to 'fr-FR' for French.

	Returns:
	dict: A dictionary containing the converted text under the key 'text'.

	Raises:
	HTTPException: An error 500 if there is any issue during the file processing.
	"""
	try:
	# Read the audio file provided by the client
	audio_data = await file.read()

	# Convert byte data to a BytesIO object for processing
	audio_stream = BytesIO(audio_data)

	# Convert the audio stream to text using a speech-to-text conversion function
	text = audio_to_text_intern(audio_stream, language)

	# Return the converted text as part of the response
	return {"text": text}
	except Exception as e:
	# Raise an HTTP 500 error if any exceptions occur
	raise HTTPException(status_code=500, detail=f"An error occurred while processing the file: {e}")

	@app.post("/audio-to-offer-with-gemini-endpoint/")
	async def audio_to_offer_endpoint(file: UploadFile, language: str, api_key: str):
	"""
	Endpoint to convert uploaded audio content to a job offer text.

	Parameters:
	- file (UploadFile): The audio file uploaded by the client. The file should be in .wav format.
	- language (str): Language code to guide the speech recognition process. Default is French ('fr-FR').
	- api_key (str, optional): API key for accessing extended features or third-party services used in generating the job offer.

	Returns:
	- A JSON object with a key 'offer' containing the generated job description text based on the audio content.

	Raises:
	- HTTPException: If an error occurs during the processing of the audio or the generation of the job offer.
	"""

	try:
	text_response = await audio_to_text_endpoint(file, language)
	text = text_response["text"]
	offer = generate_job_description_gemini(text, api_key)
	return {"text": offer}
	except Exception as e:
	raise HTTPException(status_code=500, detail=f"Error generating job offer: {e}")

	@app.post("/audio-to-offer-with-gpt-endpoint/")
	async def audio_to_offer_endpoint(file: UploadFile, language: str, api_key: str):
	"""
	Endpoint to convert uploaded audio content to a job offer text.

	Parameters:
	- file (UploadFile): The audio file uploaded by the client. The file should be in .wav format.
	- language (str): Language code to guide the speech recognition process. Default is French ('fr-FR').
	- api_key (str, optional): API key for accessing extended features or third-party services used in generating the job offer.

	Returns:
	- A JSON object with a key 'offer' containing the generated job description text based on the audio content.

	Raises:
	- HTTPException: If an error occurs during the processing of the audio or the generation of the job offer.
	"""

	try:
	text_response = await audio_to_text_endpoint(file, language)
	text = text_response["text"]
	offer = generate_job_description_gpt(text, api_key)
	return {"text": offer}
	except Exception as e:
	raise HTTPException(status_code=500, detail=f"Error generating job offer: {e}")