dharmendra commited on
Commit
5601c60
·
1 Parent(s): e9ea2d4

Initial Docker Space setup with direct build

Browse files
Files changed (4) hide show
  1. Dockerfile +13 -0
  2. README.md +12 -0
  3. app.py +84 -0
  4. requirements.txt +69 -0
Dockerfile ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM python:3.10-slim-buster
2
+
3
+ RUN useradd -m -u 1000 appuser
4
+ USER appuser
5
+ ENV PATH="/home/appuser/.local/bin:$PATH"
6
+ WORKDIR /app
7
+
8
+ COPY --chown=appuser ./requirements.txt requirements.txt
9
+ RUN pip install --no-cache-dir -r requirements.txt
10
+
11
+ COPY --chown=appuser . .
12
+
13
+ CMD ["uvicorn", "app:app", "--host", "0.0.0.0", "--port", "7860"]
README.md CHANGED
@@ -6,6 +6,18 @@ colorTo: yellow
6
  sdk: docker
7
  pinned: false
8
  short_description: new Gen AI using open LLM
 
 
9
  ---
10
 
 
 
 
 
 
 
 
 
 
 
11
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
 
6
  sdk: docker
7
  pinned: false
8
  short_description: new Gen AI using open LLM
9
+ app_port: 7860 # This MUST match the port your FastAPI app listens on inside the container
10
+ hf_token_secret: HUGGINGFACEHUB_API_TOKEN
11
  ---
12
 
13
+ # PY LLM DEMO (Direct Build)
14
+
15
+ This Hugging Face Space hosts a FastAPI application serving a Qwen language model.
16
+ The Docker image is built directly on Hugging Face Spaces from the provided Dockerfile.
17
+
18
+ ## API Endpoint Usage:
19
+
20
+ To interact with the API, send a POST request with a JSON body with a "question" field to the `/api/generate` endpoint.
21
+
22
+
23
  Check out the configuration reference at https://huggingface.co/docs/hub/spaces-config-reference
app.py ADDED
@@ -0,0 +1,84 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ from fastapi import FastAPI, HTTPException
3
+ from transformers import AutoTokenizer, AutoModelForCausalLM, pipeline
4
+ import torch
5
+ from pydantic import BaseModel
6
+ import traceback
7
+ from langchain.memory import ConversationBufferMemory
8
+ from langchain.chains import ConversationChain
9
+ from langchain_community.llms import HuggingFacePipeline
10
+
11
+ app = FastAPI()
12
+ # Get the Hugging Face API token from environment variables (BEST PRACTICE)
13
+ HUGGINGFACEHUB_API_TOKEN = os.environ.get("HUGGINGFACEHUB_API_TOKEN")
14
+
15
+ if HUGGINGFACEHUB_API_TOKEN is None:
16
+ raise ValueError("HUGGINGFACEHUB_API_TOKEN environment variable not set.")
17
+
18
+
19
+ tokenizer = AutoTokenizer.from_pretrained("Qwen/Qwen2.5-1.5B-Instruct")
20
+ model = AutoModelForCausalLM.from_pretrained("Qwen/Qwen2.5-1.5B-Instruct", device_map="auto", torch_dtype=torch.bfloat16, trust_remote_code=True, token=HUGGINGFACEHUB_API_TOKEN)
21
+ #print(f"Tokenizer attributes: {dir(tokenizer)}")
22
+
23
+ if torch.backends.mps.is_available():
24
+ device = "mps"
25
+ elif torch.cuda.is_available():
26
+ device= "cuda"
27
+ else :
28
+ device = "cpu"
29
+
30
+ model.to(device)
31
+ memory = ConversationBufferMemory() # for memory management
32
+
33
+ # Initialize Langchain HuggingFacePipeline
34
+ llm = HuggingFacePipeline(pipeline=pipeline("text-generation", model=model, tokenizer=tokenizer))
35
+
36
+ # Initialize Langchain ConversationChain
37
+ conversation = ConversationChain(llm=llm, memory=memory)
38
+
39
+ class QuestionRequest(BaseModel):
40
+ question: str
41
+
42
+ class ChatResponse(BaseModel):
43
+ response: str
44
+
45
+ @app.post("/api/generate")
46
+ async def generate_text(request: QuestionRequest):
47
+ try:
48
+ response = conversation.predict(input=request.question)
49
+ return {"response": response}
50
+ except Exception as e:
51
+ print("Error during generation:")
52
+ traceback.print_exc()
53
+ raise HTTPException(status_code=500, detail=str(e))
54
+
55
+
56
+ # below when not using langchain fully
57
+ # try:
58
+ # # Retrieve history
59
+ # history = memory.load_memory_variables({})['history']
60
+ # # Create prompt with history and current question
61
+ # prompt = f"History:\n{history}\nQuestion: {request.question}\nAnswer:"
62
+
63
+ # inputs = tokenizer(prompt, return_tensors="pt", padding=True, truncation=True).to(device)
64
+
65
+ # with torch.no_grad():
66
+ # outputs = model.generate(
67
+ # inputs=inputs['input_ids'], # Pass the 'input_ids' tensor
68
+ # attention_mask=inputs['attention_mask'],
69
+ # max_length=300,
70
+ # num_beams=5,
71
+ # no_repeat_ngram_size=2,
72
+ # temperature=0.7,
73
+ # top_k=50,
74
+ # top_p=0.95,
75
+ # do_sample=True,
76
+ # eos_token_id=tokenizer.convert_tokens_to_ids("<|endoftext|>"),
77
+ # pad_token_id=tokenizer.convert_tokens_to_ids("<|endoftext|>")
78
+ # )
79
+ # response = tokenizer.decode(outputs[0], skip_special_tokens=True)
80
+ # return {"response": response}
81
+ # except Exception as e:
82
+ # print("Error during generation:")
83
+ # traceback.print_exc()
84
+ # raise HTTPException(status_code=500, detail=str(e))
requirements.txt ADDED
@@ -0,0 +1,69 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==1.6.0
2
+ aiohappyeyeballs==2.6.1
3
+ aiohttp==3.11.16
4
+ aiosignal==1.3.2
5
+ annotated-types==0.7.0
6
+ anyio==4.8.0
7
+ async-timeout==4.0.3
8
+ attrs==25.3.0
9
+ certifi==2025.1.31
10
+ charset-normalizer==3.4.1
11
+ click==8.1.8
12
+ dataclasses-json==0.6.7
13
+ exceptiongroup==1.2.2
14
+ fastapi==0.115.8
15
+ filelock==3.17.0
16
+ frozenlist==1.5.0
17
+ fsspec==2025.2.0
18
+ h11==0.14.0
19
+ httpcore==1.0.8
20
+ httpx==0.28.1
21
+ httpx-sse==0.4.0
22
+ huggingface-hub==0.28.1
23
+ idna==3.10
24
+ Jinja2==3.1.4
25
+ jsonpatch==1.33
26
+ jsonpointer==3.0.0
27
+ langchain==0.3.23
28
+ langchain-community==0.3.21
29
+ langchain-core==0.3.51
30
+ langchain-text-splitters==0.3.8
31
+ langsmith==0.3.30
32
+ MarkupSafe==2.1.5
33
+ marshmallow==3.26.1
34
+ mpmath==1.3.0
35
+ multidict==6.4.3
36
+ mypy-extensions==1.0.0
37
+ networkx==3.4.2
38
+ numpy==2.2.3
39
+ orjson==3.10.16
40
+ packaging==24.2
41
+ pillow==11.0.0
42
+ propcache==0.3.1
43
+ psutil==7.0.0
44
+ pydantic==2.10.6
45
+ pydantic-settings==2.8.1
46
+ pydantic_core==2.27.2
47
+ python-dotenv==1.1.0
48
+ PyYAML==6.0.2
49
+ regex==2024.11.6
50
+ requests==2.32.3
51
+ requests-toolbelt==1.0.0
52
+ safetensors==0.5.2
53
+ sniffio==1.3.1
54
+ SQLAlchemy==2.0.40
55
+ starlette==0.45.3
56
+ sympy
57
+ tenacity==9.1.2
58
+ tokenizers==0.21.0
59
+ torch==2.6.0
60
+ torchaudio==2.6.0
61
+ torchvision==0.11.3
62
+ tqdm==4.67.1
63
+ transformers==4.49.0
64
+ typing-inspect==0.9.0
65
+ typing_extensions==4.12.2
66
+ urllib3==2.3.0
67
+ uvicorn==0.34.0
68
+ yarl==1.19.0
69
+ zstandard==0.23.0