Spaces:

yash009
/

textgeneration

Runtime error

textgeneration / question_paper.py

Yash Sachdeva

llm cpp

e48a0c0 over 1 year ago

891 Bytes

	import transformers
	import torch
	import os

	from fastapi import FastAPI

	from transformers import AutoTokenizer

	from llama_cpp import Llama

	# Load the model

	app = FastAPI()
	@app.get("/")
	def llama():
	llm = Llama(
	model_path="./models/7B/llama-model.gguf",
	# n_gpu_layers=-1, # Uncomment to use GPU acceleration
	# seed=1337, # Uncomment to set a specific seed
	# n_ctx=2048, # Uncomment to increase the context window
	)

	output = llm(
	"Q: Name the planets in the solar system? A: ", # Prompt
	max_tokens=32, # Generate up to 32 tokens, set to None to generate up to the end of the context window
	stop=["Q:", "\n"], # Stop generating just before the model would generate a new question
	echo=True # Echo the prompt back in the output
	) # Generate a completion, can also call create_completion

	return {"output": output}