from groq import Groq from dotenv import load_dotenv import os import time def get_groq_api_keys(): load_dotenv() return [os.getenv(f"GROQ_API_KEY_{i}") for i in range(1, 256)] Groq_Api_Keys = get_groq_api_keys() API_KEY = 0 # print(len(Groq_Api_Keys)) # print(Groq_Api_Keys[API_KEY]) # print("List of Groq API Keys:", Groq_Api_Keys) def Groq_Inference(query, model="mistral", system="Be Helpful and Friendly", assistant="", temp=0.7, max_tokens=300): global API_KEY if "mistral".lower() in model: model = "mixtral-8x7b-32768" if "gemma".lower() in model: model = "gemma-7b-it" if "llama".lower() in model: model = "llama2-70b-4096" try: client = Groq(api_key=str(Groq_Api_Keys[API_KEY])) completion = client.chat.completions.create( model = model, messages=[ { "role": "system", "content": system }, { "role": "user", "content": query }, { "role": "assistant", "content": assistant } ], temperature=temp, max_tokens=max_tokens, top_p=1, stream=False, stop=None, ) # Print the completion returned by the LLM. print(completion.choices[0].message.content) return completion.choices[0].message.content except Exception as e: if type(e).__name__ == "AuthenticationError": print(e) print("Wrong API Key. Appending API key") API_KEY += 1 else: if API_KEY == 256: API_KEY = 0 else: print(e) print("Upgrading API Key. Limit Reached..\nKindly Request Again in 3 Seconds") API_KEY += 1 if __name__ == "__main__": # Example usage: while True: start = time.time() completion = Groq_Inference("hi, how are you") print(time.time()-start, "Seconds")