Spaces:

rathore11
/

PY_LLM_NEW

Paused

App Files Files Community

dharmendra commited on Jul 5

Commit

5601c60

1 Parent(s): e9ea2d4

Initial Docker Space setup with direct build

Browse files

Files changed (4) hide show

Dockerfile +13 -0
README.md +12 -0
app.py +84 -0
requirements.txt +69 -0

Dockerfile ADDED Viewed

	@@ -0,0 +1,13 @@

+FROM python:3.10-slim-buster
+RUN useradd -m -u 1000 appuser
+USER appuser
+ENV PATH="/home/appuser/.local/bin:$PATH"
+WORKDIR /app
+COPY --chown=appuser ./requirements.txt requirements.txt
+RUN pip install --no-cache-dir -r requirements.txt
+COPY --chown=appuser . .
+CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]

README.md CHANGED Viewed

@@ -6,6 +6,18 @@ colorTo: yellow
 sdk: docker
 pinned: false
 short_description: new Gen AI using open LLM
 ---
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

 sdk: docker
 pinned: false
 short_description: new Gen AI using open LLM
+app_port: 7860 # This MUST match the port your FastAPI app listens on inside the container
+hf_token_secret: HUGGINGFACEHUB_API_TOKEN
 ---
+# PY LLM DEMO (Direct Build)
+This Hugging Face Space hosts a FastAPI application serving a Qwen language model.
+The Docker image is built directly on Hugging Face Spaces from the provided Dockerfile.
+## API Endpoint Usage:
+To interact with the API, send a POST request with a JSON body with a "question" field to the `/api/generate` endpoint.
 Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference

app.py ADDED Viewed

	@@ -0,0 +1,84 @@

+import os
+from fastapi import FastAPI, HTTPException
+from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
+import torch
+from pydantic import BaseModel
+import traceback
+from langchain.memory import ConversationBufferMemory
+from langchain.chains import ConversationChain
+from langchain_community.llms import HuggingFacePipeline
+app = FastAPI()
+# Get the Hugging Face API token from environment variables (BEST PRACTICE)
+HUGGINGFACEHUB_API_TOKEN = os.environ.get("HUGGINGFACEHUB_API_TOKEN")
+if HUGGINGFACEHUB_API_TOKEN is None:
+    raise ValueError("HUGGINGFACEHUB_API_TOKEN environment variable not set.")
+tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-1.5B-Instruct")
+model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-1.5B-Instruct", device_map="auto", torch_dtype=torch.bfloat16, trust_remote_code=True, token=HUGGINGFACEHUB_API_TOKEN)
+#print(f"Tokenizer attributes: {dir(tokenizer)}")
+if torch.backends.mps.is_available():
+     device = "mps"
+elif torch.cuda.is_available():
+     device= "cuda"
+else :
+     device = "cpu"
+model.to(device)
+memory = ConversationBufferMemory() # for memory management
+# Initialize Langchain HuggingFacePipeline
+llm = HuggingFacePipeline(pipeline=pipeline("text-generation", model=model, tokenizer=tokenizer))
+# Initialize Langchain ConversationChain
+conversation = ConversationChain(llm=llm, memory=memory)
+class QuestionRequest(BaseModel):
+    question: str
+class ChatResponse(BaseModel):
+    response: str
+@app.post("/api/generate")
+async def generate_text(request: QuestionRequest):
+    try:
+        response = conversation.predict(input=request.question)
+        return {"response": response}
+    except Exception as e:
+        print("Error during generation:")
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail=str(e))
+    # below when not using langchain fully
+    # try:
+    #      # Retrieve history
+    #     history = memory.load_memory_variables({})['history']
+    #      # Create prompt with history and current question
+    #     prompt = f"History:\n{history}\nQuestion: {request.question}\nAnswer:"
+    #     inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(device)
+    #     with torch.no_grad():
+    #         outputs = model.generate(
+    #             inputs=inputs['input_ids'],  # Pass the 'input_ids' tensor
+    #             attention_mask=inputs['attention_mask'],
+    #             max_length=300,
+    #             num_beams=5,
+    #             no_repeat_ngram_size=2,
+    #             temperature=0.7,
+    #             top_k=50,
+    #             top_p=0.95,
+    #             do_sample=True,
+    #             eos_token_id=tokenizer.convert_tokens_to_ids("<|endoftext|>"),
+    #             pad_token_id=tokenizer.convert_tokens_to_ids("<|endoftext|>")
+    #         )
+    #     response = tokenizer.decode(outputs[0], skip_special_tokens=True)
+    #     return {"response": response}
+    # except Exception as e:
+    #     print("Error during generation:")
+    #     traceback.print_exc()
+    #     raise HTTPException(status_code=500, detail=str(e))

requirements.txt ADDED Viewed

	@@ -0,0 +1,69 @@

+accelerate==1.6.0
+aiohappyeyeballs==2.6.1
+aiohttp==3.11.16
+aiosignal==1.3.2
+annotated-types==0.7.0
+anyio==4.8.0
+async-timeout==4.0.3
+attrs==25.3.0
+certifi==2025.1.31
+charset-normalizer==3.4.1
+click==8.1.8
+dataclasses-json==0.6.7
+exceptiongroup==1.2.2
+fastapi==0.115.8
+filelock==3.17.0
+frozenlist==1.5.0
+fsspec==2025.2.0
+h11==0.14.0
+httpcore==1.0.8
+httpx==0.28.1
+httpx-sse==0.4.0
+huggingface-hub==0.28.1
+idna==3.10
+Jinja2==3.1.4
+jsonpatch==1.33
+jsonpointer==3.0.0
+langchain==0.3.23
+langchain-community==0.3.21
+langchain-core==0.3.51
+langchain-text-splitters==0.3.8
+langsmith==0.3.30
+MarkupSafe==2.1.5
+marshmallow==3.26.1
+mpmath==1.3.0
+multidict==6.4.3
+mypy-extensions==1.0.0
+networkx==3.4.2
+numpy==2.2.3
+orjson==3.10.16
+packaging==24.2
+pillow==11.0.0
+propcache==0.3.1
+psutil==7.0.0
+pydantic==2.10.6
+pydantic-settings==2.8.1
+pydantic_core==2.27.2
+python-dotenv==1.1.0
+PyYAML==6.0.2
+regex==2024.11.6
+requests==2.32.3
+requests-toolbelt==1.0.0
+safetensors==0.5.2
+sniffio==1.3.1
+SQLAlchemy==2.0.40
+starlette==0.45.3
+sympy
+tenacity==9.1.2
+tokenizers==0.21.0
+torch==2.6.0
+torchaudio==2.6.0
+torchvision==0.11.3
+tqdm==4.67.1
+transformers==4.49.0
+typing-inspect==0.9.0
+typing_extensions==4.12.2
+urllib3==2.3.0
+uvicorn==0.34.0
+yarl==1.19.0
+zstandard==0.23.0