Spaces:
Sleeping
Sleeping
import os | |
import time | |
from io import BytesIO | |
from langchain_core.pydantic_v1 import BaseModel, Field | |
from fastapi import FastAPI, HTTPException, Query, Request | |
from fastapi.responses import StreamingResponse,Response | |
from fastapi.middleware.cors import CORSMiddleware | |
from langchain.chains import LLMChain | |
from langchain.prompts import PromptTemplate | |
from TextGen.suno import custom_generate_audio, get_audio_information,generate_lyrics | |
#from TextGen.diffusion import generate_image | |
#from coqui import predict | |
from langchain_google_genai import ( | |
ChatGoogleGenerativeAI, | |
HarmBlockThreshold, | |
HarmCategory, | |
) | |
from TextGen import app | |
from gradio_client import Client, handle_file | |
from typing import List | |
from elevenlabs.client import ElevenLabs | |
from elevenlabs import Voice, VoiceSettings, stream | |
Eleven_client = ElevenLabs( | |
api_key=os.environ["ELEVEN_API_KEY"], # Defaults to ELEVEN_API_KEY | |
) | |
Last_message=None | |
class PlayLastMusic(BaseModel): | |
'''plays the lastest created music ''' | |
Desicion: str = Field( | |
..., description="Yes or No" | |
) | |
class CreateLyrics(BaseModel): | |
f'''create some Lyrics for a new music''' | |
Desicion: str = Field( | |
..., description="Yes or No" | |
) | |
class CreateNewMusic(BaseModel): | |
f'''create a new music with the Lyrics previously computed''' | |
Name: str = Field( | |
..., description="tags to describe the new music" | |
) | |
class SongRequest(BaseModel): | |
prompt: str | None = None | |
tags: List[str] | None = None | |
class Message(BaseModel): | |
npc: str | None = None | |
messages: List[str] | None = None | |
class ImageGen(BaseModel): | |
prompt: str | None = None | |
class VoiceMessage(BaseModel): | |
npc: str | None = None | |
input: str | None = None | |
language: str | None = "en" | |
genre:str | None = "Male" | |
song_base_api=os.environ["VERCEL_API"] | |
my_hf_token=os.environ["HF_TOKEN"] | |
#tts_client = Client("Jofthomas/xtts",hf_token=my_hf_token) | |
main_npcs={ | |
"Blacksmith":"./voices/Blacksmith.mp3", | |
"Herbalist":"./voices/female.mp3", | |
"Bard":"./voices/Bard_voice.mp3" | |
} | |
main_npcs_elevenlabs={ | |
"Blacksmith":"yYdk7n49vTsUKiXxnosS", | |
"Herbalist":"143zSsxc4O5ifS97lPCa", | |
"Bard":"143zSsxc4O5ifS97lPCa" | |
} | |
main_npc_system_prompts={ | |
"Blacksmith":"You are a blacksmith in a video game", | |
"Herbalist":"You are an herbalist in a video game", | |
"Witch":"You are a witch in a video game. You are disguised as a potion seller in a small city where adventurers come to challenge the portal. You are selling some magic spells in a UI that the player only sees. Don't event too much lore and just follow the standard role of a merchant.", | |
"Bard":"You are a bard in a video game" | |
} | |
class Generate(BaseModel): | |
text:str | |
class Rooms(BaseModel): | |
rooms:List | |
room_of_interest:List | |
index_exit:int | |
possible_entities:List | |
class Invoke(BaseModel): | |
system_prompt:str | |
message:str | |
def generate_text(messages: List[str], npc:str): | |
print(npc) | |
if npc in main_npcs: | |
system_prompt=main_npc_system_prompts[npc] | |
else: | |
system_prompt="you're a character in a video game. Play along." | |
print(system_prompt) | |
new_messages=[{"role": "user", "content": system_prompt}] | |
for index, message in enumerate(messages): | |
if index%2==0: | |
new_messages.append({"role": "user", "content": message}) | |
else: | |
new_messages.append({"role": "assistant", "content": message}) | |
print(new_messages) | |
# Initialize the LLM | |
llm = ChatGoogleGenerativeAI( | |
model="gemini-1.5-pro-latest", | |
max_output_tokens=100, | |
temperature=1, | |
safety_settings={ | |
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE, | |
HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE, | |
HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE, | |
HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE | |
}, | |
) | |
if npc=="bard": | |
llm = llm.bind_tools([PlayLastMusic,CreateNewMusic,CreateLyrics]) | |
llm_response = llm.invoke(new_messages) | |
print(llm_response) | |
return Generate(text=llm_response.content) | |
app.add_middleware( | |
CORSMiddleware, | |
allow_origins=["*"], | |
allow_credentials=True, | |
allow_methods=["*"], | |
allow_headers=["*"], | |
) | |
def inference_model(system_messsage, prompt): | |
new_messages=[{"role": "user", "content": system_messsage},{"role": "user", "content": prompt}] | |
llm = ChatGoogleGenerativeAI( | |
model="gemini-1.5-pro-latest", | |
max_output_tokens=100, | |
temperature=1, | |
safety_settings={ | |
HarmCategory.HARM_CATEGORY_DANGEROUS_CONTENT: HarmBlockThreshold.BLOCK_NONE, | |
HarmCategory.HARM_CATEGORY_HARASSMENT: HarmBlockThreshold.BLOCK_NONE, | |
HarmCategory.HARM_CATEGORY_HATE_SPEECH: HarmBlockThreshold.BLOCK_NONE, | |
HarmCategory.HARM_CATEGORY_SEXUALLY_EXPLICIT: HarmBlockThreshold.BLOCK_NONE | |
}, | |
) | |
llm_response = llm.invoke(new_messages) | |
print(llm_response) | |
return Generate(text=llm_response.content) | |
def api_home(): | |
return {'detail': 'Everchanging Quest backend, nothing to see here'} | |
def inference(message: Message): | |
return generate_text(messages=message.messages, npc=message.npc) | |
def story(prompt: Invoke): | |
return inference_model(system_messsage=prompt.system_prompt,prompt=prompt.message) | |
def placement(input: Rooms): | |
print(input) | |
markdown_map=generate_map_markdown(input) | |
answer={ | |
"key":"value" | |
} | |
return answer | |
#Dummy function for now | |
def determine_vocie_from_npc(npc,genre): | |
if npc in main_npcs: | |
return main_npcs[npc] | |
else: | |
if genre =="Male": | |
"./voices/default_male.mp3" | |
if genre=="Female": | |
return"./voices/default_female.mp3" | |
else: | |
return "./voices/narator_out.wav" | |
#Dummy function for now | |
def determine_elevenLav_voice_from_npc(npc,genre): | |
if npc in main_npcs_elevenlabs: | |
return main_npcs_elevenlabs[npc] | |
else: | |
if genre =="Male": | |
"bIHbv24MWmeRgasZH58o" | |
if genre=="Female": | |
return"pFZP5JQG7iQjIQuC4Bku" | |
else: | |
return "TX3LPaxmHKxFdv7VOQHJ" | |
async def generate_wav(message: VoiceMessage): | |
# try: | |
# voice = determine_vocie_from_npc(message.npc, message.genre) | |
# audio_file_pth = handle_file(voice) | |
# | |
# Generator function to yield audio chunks | |
# async def audio_stream(): | |
# result = tts_client.predict( | |
# prompt=message.input, | |
# language=message.language, | |
# audio_file_pth=audio_file_pth, | |
# mic_file_path=None, | |
# use_mic=False, | |
# voice_cleanup=False, | |
# no_lang_auto_detect=False, | |
# agree=True, | |
# api_name="/predict" | |
# ) | |
# for sampling_rate, audio_chunk in result: | |
# yield audio_chunk.tobytes() | |
# await asyncio.sleep(0) # Yield control to the event loop | |
# Return the generated audio as a streaming response | |
# return StreamingResponse(audio_stream(), media_type="audio/wav") | |
# except Exception as e: | |
# raise HTTPException(status_code=500, detail=str(e)) | |
return 200 | |
def generate_voice_eleven(message: VoiceMessage = None): | |
global Last_message # Declare Last_message as global | |
if message is None: | |
message = Last_message | |
else: | |
Last_message = message | |
def audio_stream(): | |
this_voice_id=determine_elevenLav_voice_from_npc(message.npc, message.genre) | |
# Generate the audio stream from ElevenLabs | |
for chunk in Eleven_client.generate(text=message.input, | |
voice=Voice( | |
voice_id=this_voice_id, | |
settings=VoiceSettings(stability=0.71, similarity_boost=0.5, style=0.0, use_speaker_boost=True) | |
), | |
stream=True): | |
yield chunk | |
return StreamingResponse(audio_stream(), media_type="audio/mpeg") | |
#@app.get("/generate_voice_coqui", response_class=StreamingResponse) | |
#@app.post("/generate_voice_coqui", response_class=StreamingResponse) | |
#def generate_voice_coqui(message: VoiceMessage = None): | |
# global Last_message | |
# if message is None: | |
# message = Last_message | |
# else: | |
# Last_message = message | |
# | |
# def audio_stream(): | |
# voice = determine_vocie_from_npc(message.npc, message.genre) | |
# result = predict( | |
# prompt=message.input, | |
# language=message.language, | |
# audio_file_pth=voice, | |
# mic_file_path=None, | |
# use_mic=False, | |
# voice_cleanup=False, | |
# no_lang_auto_detect=False, | |
# agree=True, | |
# ) | |
# # Generate the audio stream from ElevenLabs | |
# for chunk in result: | |
# print("received : ",chunk) | |
# yield chunk# | |
# | |
# return StreamingResponse(audio_stream(),media_type="audio/mpeg") | |
async def generate_song(): | |
text="""You are a bard in a video game singing the tales of a little girl in red hood.""" | |
song_lyrics=generate_lyrics({ | |
"prompt": f"{text}", | |
}) | |
data = custom_generate_audio({ | |
"prompt": song_lyrics['text'], | |
"tags": "male bard", | |
"title":"Everchangin_Quest_song", | |
"wait_audio":True, | |
}) | |
infos=get_audio_information(f"{data[0]['id']},{data[1]['id']}") | |
return infos | |
#@app.post('/generate_image') | |
#def Imagen(image:ImageGen=None): | |
# pil_image =generate_image(image.prompt) | |
# | |
# | |
# # Convert the PIL Image to bytes | |
# img_byte_arr = BytesIO() | |
# pil_image.save(img_byte_arr, format='PNG') | |
# img_byte_arr = img_byte_arr.getvalue() | |
# | |
# Return the image as a PNG response | |
# return Response(content=img_byte_arr, media_type="image/png") | |
def generate_map_markdown(data): | |
import numpy as np | |
# Define the room structure with walls and markers | |
def create_room(room_char): | |
return [ | |
f"βββββ", | |
f"β {room_char} β", | |
f"βββββ" | |
] | |
# Extract rooms and rooms of interest | |
rooms = [eval(room) for room in data["rooms"]] | |
rooms_of_interest = [eval(room) for room in data["room_of_interest"]] | |
# Determine grid size | |
min_x = min(room[0] for room in rooms) | |
max_x = max(room[0] for room in rooms) | |
min_y = min(room[1] for room in rooms) | |
max_y = max(room[1] for room in rooms) | |
# Create grid with empty spaces represented by a room-like structure | |
map_height = (max_y - min_y + 1) * 3 | |
map_width = (max_x - min_x + 1) * 5 | |
grid = np.full((map_height, map_width), " ") | |
# Populate grid with rooms and their characteristics | |
for i, room in enumerate(rooms): | |
x, y = room | |
x_offset = (x - min_x) * 5 | |
y_offset = (max_y - y) * 3 | |
if room == (0, 0): | |
room_char = "X" | |
elif room in rooms_of_interest: | |
room_char = "P" if i == data["index_exit"] else "?" | |
else: | |
room_char = " " | |
room_structure = create_room(room_char) | |
for j, row in enumerate(room_structure): | |
grid[y_offset + j, x_offset:x_offset + 5] = list(row) | |
# Convert grid to a string format suitable for display | |
markdown_map = "\n".join("".join(row) for row in grid) | |
# Return the map wrapped in triple backticks for proper display in markdown | |
return f"```\n{markdown_map}\n```" |