lucas-wa commited on
Commit
02d730d
·
1 Parent(s): cd5f7e4

Initial commit

Browse files
Files changed (7) hide show
  1. Dockerfile +25 -0
  2. app.py +146 -0
  3. check_token.py +6 -0
  4. docker +1 -0
  5. entrypoint.sh +15 -0
  6. inference.py +59 -0
  7. requirements.txt +13 -0
Dockerfile ADDED
@@ -0,0 +1,25 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ FROM debian
2
+ # FROM python:3.10.12
3
+
4
+ RUN apt-get update && apt-get upgrade
5
+
6
+ RUN apt-get install python3.9
7
+
8
+ WORKDIR /code
9
+
10
+ COPY ./requirements.txt /code/requirements.txt
11
+
12
+ RUN pip install --no-cache-dir --upgrade -r /code/requirements.txt
13
+
14
+ # Assegura que huggingface-cli está instalado
15
+ RUN pip install huggingface-hub
16
+
17
+ COPY ./app.py /code/app.py
18
+
19
+ COPY ./check_token.py /code/check_token.py
20
+
21
+ COPY ./inference.py /code/inference.py
22
+
23
+ COPY entrypoint.sh /entrypoint.sh
24
+ RUN chmod +x /entrypoint.sh
25
+ ENTRYPOINT ["/entrypoint.sh"] && /bin/bash
app.py ADDED
@@ -0,0 +1,146 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import os
2
+ import shutil
3
+ import shutil
4
+ import numpy as np
5
+ from uuid import uuid4
6
+ from io import BytesIO
7
+ from pydantic import BaseModel
8
+ from fastapi.encoders import jsonable_encoder
9
+ from fastapi.responses import StreamingResponse
10
+ from fastapi.middleware.cors import CORSMiddleware
11
+ from fastapi import FastAPI, UploadFile, File, Response, status
12
+ from llama_index.readers import StringIterableReader, PDFReader, SimpleDirectoryReader
13
+ from llama_index import (
14
+ VectorStoreIndex,
15
+ ServiceContext,
16
+ set_global_service_context,
17
+ )
18
+ from pyngrok import ngrok
19
+ import inference
20
+
21
+
22
+ app = FastAPI()
23
+
24
+ app.add_middleware(
25
+ CORSMiddleware,
26
+ allow_origins=["*"],
27
+ allow_credentials=True,
28
+ allow_methods=["*"],
29
+ allow_headers=["*"],
30
+ )
31
+
32
+ class Message(BaseModel):
33
+ content: str
34
+
35
+ if not os.path.exists("tmp"):
36
+ os.mkdir("tmp")
37
+
38
+ vector_stores = {}
39
+
40
+ @app.post("/retriveal/ingest")
41
+ async def store_file(
42
+ file: UploadFile = File(...)
43
+ ):
44
+
45
+ try:
46
+
47
+ print(file.filename)
48
+ id = str(uuid4())
49
+ file_location = f"tmp/{id}"
50
+
51
+ if not os.path.exists(file_location):
52
+ os.mkdir(file_location)
53
+
54
+ with open(f"{file_location}/{file.filename}", "wb+") as f:
55
+ shutil.copyfileobj(file.file, f)
56
+
57
+ pdf = SimpleDirectoryReader(f"tmp/{id}").load_data()
58
+
59
+ vector_stores[id] = VectorStoreIndex.from_documents(pdf)
60
+
61
+ return jsonable_encoder({"uuid": id})
62
+
63
+ except Exception as e:
64
+
65
+ # response.status_code = status.HTTP_500_INTERNAL_SERVER_ERROR
66
+ return jsonable_encoder({"error": str(e)})
67
+
68
+ @app.post("/retriveal/ingest/{id}")
69
+ async def store_file_with_id(
70
+ id,
71
+ file: UploadFile = File(...)
72
+ ):
73
+
74
+ try:
75
+
76
+ print(file.filename)
77
+
78
+ if(id == None or id == ""):
79
+ raise Exception("Id is required")
80
+
81
+ file_location = f"tmp/{id}"
82
+
83
+ if not os.path.exists(file_location):
84
+ os.mkdir(file_location)
85
+
86
+ with open(f"{file_location}/{file.filename}", "wb+") as f:
87
+ shutil.copyfileobj(file.file, f)
88
+
89
+ pdf = SimpleDirectoryReader(f"tmp/{id}").load_data()
90
+
91
+ vector_stores[id] = VectorStoreIndex.from_documents(pdf)
92
+
93
+ return jsonable_encoder({"uuid": id})
94
+
95
+ except Exception as e:
96
+
97
+ # response.status_code = status.HTTP_500_INTERNAL_SERVER_ERROR
98
+ return jsonable_encoder({"error": str(e)})
99
+
100
+ @app.delete("/session/{id}")
101
+ async def delete_session(id):
102
+ try:
103
+ shutil.rmtree(f"tmp/{id}")
104
+ return jsonable_encoder({"message": "ok"})
105
+ except Exception as e:
106
+ return jsonable_encoder({"error": str(e)})
107
+
108
+ @app.post("/retriveal/{id}")
109
+ async def inference(
110
+ id,
111
+ message: Message
112
+ ):
113
+
114
+ if(id == None or id == ""):
115
+ raise Exception("Id is required")
116
+
117
+ query = message.content
118
+
119
+ query_engine = vector_stores[id].as_query_engine()
120
+
121
+ inference = query_engine.query(query)
122
+
123
+ return inference
124
+
125
+
126
+ def stream_inference(gen):
127
+ for token in gen:
128
+ yield token
129
+
130
+
131
+ @app.post("/retriveal/stream/{id}")
132
+ async def inference(
133
+ id,
134
+ message: Message
135
+ ):
136
+
137
+ if(id == None or id == ""):
138
+ raise Exception("Id is required")
139
+
140
+ query = message.content
141
+
142
+ query_engine = vector_stores[id].as_query_engine(streaming=True)
143
+
144
+ gen = query_engine.query(query).response_gen
145
+
146
+ return StreamingResponse(stream_inference(gen))
check_token.py ADDED
@@ -0,0 +1,6 @@
 
 
 
 
 
 
 
1
+ import os
2
+ from huggingface_hub._login import _login
3
+
4
+ HF_KEY = os.environ["HF_KEY"]
5
+
6
+ _login(token=HF_KEY, add_to_git_credential=False)
docker ADDED
@@ -0,0 +1 @@
 
 
1
+ failed to get console mode for stdout: Identificador inválido.
entrypoint.sh ADDED
@@ -0,0 +1,15 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ #!/bin/bash
2
+
3
+ # Verifique o token
4
+ python /code/check_token.py $HF_TOKEN
5
+
6
+ # Verifique o status de saída
7
+ if [ $? -eq 0 ]; then
8
+ echo "Login bem-sucedido!"
9
+ else
10
+ echo "Falha no login."
11
+ exit 1
12
+ fi
13
+
14
+ # Inicie o seu aplicativo
15
+ exec "$@"
inference.py ADDED
@@ -0,0 +1,59 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ import logging
2
+ import sys
3
+ from IPython.display import Markdown, display
4
+ import torch
5
+ from llama_index.llms.huggingface import HuggingFaceLLM
6
+ from llama_index.prompts import PromptTemplate
7
+ from llama_index.embeddings.huggingface import HuggingFaceEmbedding
8
+
9
+ from llama_index import (
10
+ ServiceContext,
11
+ set_global_service_context,
12
+ )
13
+
14
+ logging.basicConfig(stream=sys.stdout, level=logging.INFO)
15
+ logging.getLogger().addHandler(logging.StreamHandler(stream=sys.stdout))
16
+
17
+
18
+ # Model names (make sure you have access on HF)
19
+ LLAMA2_7B = "meta-llama/Llama-2-7b-hf"
20
+ LLAMA2_7B_CHAT = "meta-llama/Llama-2-7b-chat-hf"
21
+ LLAMA2_13B = "meta-llama/Llama-2-13b-hf"
22
+ LLAMA2_13B_CHAT = "meta-llama/Llama-2-13b-chat-hf"
23
+ LLAMA2_70B = "meta-llama/Llama-2-70b-hf"
24
+ LLAMA2_70B_CHAT = "meta-llama/Llama-2-70b-chat-hf"
25
+
26
+ selected_model = LLAMA2_7B_CHAT
27
+
28
+ SYSTEM_PROMPT = """Você é um assistente de IA que responde a perguntas de maneira amigável, com base nos documentos fornecidos. Aqui estão algumas regras que você sempre segue:
29
+ - Gerar saídas legíveis para humanos, evitando criar texto sem sentido.
30
+ - Gerar apenas a saída solicitada, sem incluir qualquer outro idioma antes ou depois da saída solicitada.
31
+ - Nunca agradecer, expressar felicidade em ajudar, mencionar que é um agente de IA, etc. Apenas responda diretamente.
32
+ - Gerar linguagem profissional geralmente usada em documentos comerciais na América do Norte.
33
+ - Nunca gerar linguagem ofensiva ou obscena.
34
+ - Traduza as suas respostas sempre para Português Brasileiro. Nunca responsa nada em inglês.
35
+ """
36
+
37
+ query_wrapper_prompt = PromptTemplate(
38
+ "[INST]<<SYS>>\n" + SYSTEM_PROMPT + "<</SYS>>\n\n{query_str}[/INST] "
39
+ )
40
+
41
+ llm = HuggingFaceLLM(
42
+ context_window=4096,
43
+ max_new_tokens=2048,
44
+ generate_kwargs={"temperature": 0.0, "do_sample": False},
45
+ query_wrapper_prompt=query_wrapper_prompt,
46
+ tokenizer_name=selected_model,
47
+ model_name=selected_model,
48
+ device_map="auto",
49
+ # change these settings below depending on your GPU
50
+ # model_kwargs={"torch_dtype": torch.float16, "load_in_8bit": True},
51
+ )
52
+
53
+
54
+ embed_model = HuggingFaceEmbedding(model_name="neuralmind/bert-base-portuguese-cased")
55
+ # embed_model = FlagModel("BAAI/bge-m3")
56
+
57
+ service_context = ServiceContext.from_defaults(llm=llm, embed_model=embed_model)
58
+
59
+ set_global_service_context(service_context)
requirements.txt ADDED
@@ -0,0 +1,13 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ langchain==0.1.7
2
+ fastapi==0.109.2
3
+ python-multipart==0.0.9
4
+ uvicorn==0.27.1
5
+ pypdf==4.0.1
6
+ pyngrok==7.1.2
7
+ ipywidgets==7.7.1
8
+ bitsandbytes==0.42.0
9
+ accelerate==0.27.2
10
+ llama-index==0.9.47
11
+ torch==2.0.1
12
+ transformers==4.37.2
13
+ pyngrok