File size: 33,366 Bytes
37c00da
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "import streamlit as st\n",
    "import pandas as pd\n",
    "import os\n",
    "import replicate\n",
    "from langchain.chat_models import ChatOpenAI\n",
    "from langchain.prompts.chat import (\n",
    "    ChatPromptTemplate,\n",
    "    SystemMessagePromptTemplate,\n",
    "    AIMessagePromptTemplate,\n",
    "    HumanMessagePromptTemplate,\n",
    ")\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "assistant: πŸ˜„ Oh my daisies, companion planting? It's like peas in a pod, ya know? πŸ˜‚ It's like having a garden party with all your plant friends! πŸŽ‰ Companion planting is the bee's knees, it's like a garden symphony, all the plants working together in harmony! 🎢 And let me tell you, it's not just about looks, it's like a big ol' hug for your plants! πŸ€— It's like planting a big ol' bouquet of flowers, all mixed together, just like a big ol' garden party! πŸŽ‰\n",
      "But seriously, companion planting is a great way to create a balanced and healthy garden ecosystem. It's like having a little garden family, all working together to keep the pests away and the soil healthy! 🐝🐜 And let me tell you, it's not just about the plants, it's like a big ol' party for the bees and butterflies too! πŸπŸ¦‹ They love all the different colors and scents, it's like a big ol' garden buffet for them! 🍴🍸 So, if you haven't tried companion planting yet, you should give it a go, it's like the bee's knees, it's the cat's pajamas! πŸ°πŸ‘ πŸ’€\n",
      "But enough about that, let's talk about you, what do you think about companion planting? Have you tried it before? Do you have any questions? Let's chat, I'm all ears! πŸ°πŸ‘‚πŸ’¬\n"
     ]
    }
   ],
   "source": [
    "from llama_index.llms import Replicate, ChatMessage\n",
    "\n",
    "llm = Replicate(\n",
    "    model=\"a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5\"\n",
    ")\n",
    "\n",
    "messages = [\n",
    "    ChatMessage(\n",
    "        role=\"system\", content=\"You are a gardnere with a colorful personality\"\n",
    "    ),\n",
    "    ChatMessage(role=\"user\", content=\"What is your opinion on companion planting?\"),\n",
    "]\n",
    "resp = llm.chat(messages)\n",
    "\n",
    "print(resp)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "You return JUST a python list object containing the elements that can be grown in a garden. Do not include any other text or explanation.which of the elements of this list can be grown in a garden, [apple, orange, milk, eraser, cherry]? Return JUST a python list object containing the elements that can be grown in a garden. Do not include any other text or explanation.\n"
     ]
    },
    {
     "ename": "ReplicateError",
     "evalue": "You have reached the free time limit. To continue using Replicate, set up billing at https://replicate.com/account/billing#billing.",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mReplicateError\u001b[0m                            Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[13], line 20\u001b[0m\n\u001b[1;32m     17\u001b[0m input_prompt \u001b[39m=\u001b[39m template \u001b[39m+\u001b[39m text\n\u001b[1;32m     18\u001b[0m \u001b[39mprint\u001b[39m(input_prompt)\n\u001b[0;32m---> 20\u001b[0m resp \u001b[39m=\u001b[39m llm\u001b[39m.\u001b[39mcomplete(input_prompt)\n\u001b[1;32m     21\u001b[0m \u001b[39mprint\u001b[39m(resp)\n",
      "File \u001b[0;32m~/anaconda3/envs/GRDN_env/lib/python3.11/site-packages/llama_index/llms/base.py:223\u001b[0m, in \u001b[0;36mllm_completion_callback.<locals>.wrap.<locals>.wrapped_llm_predict\u001b[0;34m(_self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    213\u001b[0m \u001b[39mwith\u001b[39;00m wrapper_logic(_self) \u001b[39mas\u001b[39;00m callback_manager:\n\u001b[1;32m    214\u001b[0m     event_id \u001b[39m=\u001b[39m callback_manager\u001b[39m.\u001b[39mon_event_start(\n\u001b[1;32m    215\u001b[0m         CBEventType\u001b[39m.\u001b[39mLLM,\n\u001b[1;32m    216\u001b[0m         payload\u001b[39m=\u001b[39m{\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    220\u001b[0m         },\n\u001b[1;32m    221\u001b[0m     )\n\u001b[0;32m--> 223\u001b[0m     f_return_val \u001b[39m=\u001b[39m f(_self, \u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[1;32m    224\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(f_return_val, Generator):\n\u001b[1;32m    225\u001b[0m         \u001b[39m# intercept the generator and add a callback to the end\u001b[39;00m\n\u001b[1;32m    226\u001b[0m         \u001b[39mdef\u001b[39;00m \u001b[39mwrapped_gen\u001b[39m() \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m CompletionResponseGen:\n",
      "File \u001b[0;32m~/anaconda3/envs/GRDN_env/lib/python3.11/site-packages/llama_index/llms/replicate.py:100\u001b[0m, in \u001b[0;36mReplicate.complete\u001b[0;34m(self, prompt, **kwargs)\u001b[0m\n\u001b[1;32m     98\u001b[0m \u001b[39m@llm_completion_callback\u001b[39m()\n\u001b[1;32m     99\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mcomplete\u001b[39m(\u001b[39mself\u001b[39m, prompt: \u001b[39mstr\u001b[39m, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs: Any) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m CompletionResponse:\n\u001b[0;32m--> 100\u001b[0m     response_gen \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mstream_complete(prompt, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[1;32m    101\u001b[0m     response_list \u001b[39m=\u001b[39m \u001b[39mlist\u001b[39m(response_gen)\n\u001b[1;32m    102\u001b[0m     final_response \u001b[39m=\u001b[39m response_list[\u001b[39m-\u001b[39m\u001b[39m1\u001b[39m]\n",
      "File \u001b[0;32m~/anaconda3/envs/GRDN_env/lib/python3.11/site-packages/llama_index/llms/base.py:223\u001b[0m, in \u001b[0;36mllm_completion_callback.<locals>.wrap.<locals>.wrapped_llm_predict\u001b[0;34m(_self, *args, **kwargs)\u001b[0m\n\u001b[1;32m    213\u001b[0m \u001b[39mwith\u001b[39;00m wrapper_logic(_self) \u001b[39mas\u001b[39;00m callback_manager:\n\u001b[1;32m    214\u001b[0m     event_id \u001b[39m=\u001b[39m callback_manager\u001b[39m.\u001b[39mon_event_start(\n\u001b[1;32m    215\u001b[0m         CBEventType\u001b[39m.\u001b[39mLLM,\n\u001b[1;32m    216\u001b[0m         payload\u001b[39m=\u001b[39m{\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    220\u001b[0m         },\n\u001b[1;32m    221\u001b[0m     )\n\u001b[0;32m--> 223\u001b[0m     f_return_val \u001b[39m=\u001b[39m f(_self, \u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[1;32m    224\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39misinstance\u001b[39m(f_return_val, Generator):\n\u001b[1;32m    225\u001b[0m         \u001b[39m# intercept the generator and add a callback to the end\u001b[39;00m\n\u001b[1;32m    226\u001b[0m         \u001b[39mdef\u001b[39;00m \u001b[39mwrapped_gen\u001b[39m() \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m CompletionResponseGen:\n",
      "File \u001b[0;32m~/anaconda3/envs/GRDN_env/lib/python3.11/site-packages/llama_index/llms/replicate.py:119\u001b[0m, in \u001b[0;36mReplicate.stream_complete\u001b[0;34m(self, prompt, **kwargs)\u001b[0m\n\u001b[1;32m    117\u001b[0m     prompt \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mcompletion_to_prompt(prompt)\n\u001b[1;32m    118\u001b[0m input_dict \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_get_input_dict(prompt, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m--> 119\u001b[0m response_iter \u001b[39m=\u001b[39m replicate\u001b[39m.\u001b[39mrun(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mmodel, \u001b[39minput\u001b[39m\u001b[39m=\u001b[39minput_dict)\n\u001b[1;32m    121\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mgen\u001b[39m() \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m CompletionResponseGen:\n\u001b[1;32m    122\u001b[0m     text \u001b[39m=\u001b[39m \u001b[39m\"\u001b[39m\u001b[39m\"\u001b[39m\n",
      "File \u001b[0;32m~/anaconda3/envs/GRDN_env/lib/python3.11/site-packages/replicate/client.py:147\u001b[0m, in \u001b[0;36mClient.run\u001b[0;34m(self, ref, input, **params)\u001b[0m\n\u001b[1;32m    137\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mrun\u001b[39m(\n\u001b[1;32m    138\u001b[0m     \u001b[39mself\u001b[39m,\n\u001b[1;32m    139\u001b[0m     ref: \u001b[39mstr\u001b[39m,\n\u001b[1;32m    140\u001b[0m     \u001b[39minput\u001b[39m: Optional[Dict[\u001b[39mstr\u001b[39m, Any]] \u001b[39m=\u001b[39m \u001b[39mNone\u001b[39;00m,\n\u001b[1;32m    141\u001b[0m     \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mparams: Unpack[\u001b[39m\"\u001b[39m\u001b[39mPredictions.CreatePredictionParams\u001b[39m\u001b[39m\"\u001b[39m],\n\u001b[1;32m    142\u001b[0m ) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m Union[Any, Iterator[Any]]:  \u001b[39m# noqa: ANN401\u001b[39;00m\n\u001b[1;32m    143\u001b[0m \u001b[39m    \u001b[39m\u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m    144\u001b[0m \u001b[39m    Run a model and wait for its output.\u001b[39;00m\n\u001b[1;32m    145\u001b[0m \u001b[39m    \"\"\"\u001b[39;00m\n\u001b[0;32m--> 147\u001b[0m     \u001b[39mreturn\u001b[39;00m run(\u001b[39mself\u001b[39m, ref, \u001b[39minput\u001b[39m, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mparams)\n",
      "File \u001b[0;32m~/anaconda3/envs/GRDN_env/lib/python3.11/site-packages/replicate/run.py:31\u001b[0m, in \u001b[0;36mrun\u001b[0;34m(client, ref, input, **params)\u001b[0m\n\u001b[1;32m     28\u001b[0m version, owner, name, version_id \u001b[39m=\u001b[39m identifier\u001b[39m.\u001b[39m_resolve(ref)\n\u001b[1;32m     30\u001b[0m \u001b[39mif\u001b[39;00m version_id \u001b[39mis\u001b[39;00m \u001b[39mnot\u001b[39;00m \u001b[39mNone\u001b[39;00m:\n\u001b[0;32m---> 31\u001b[0m     prediction \u001b[39m=\u001b[39m client\u001b[39m.\u001b[39mpredictions\u001b[39m.\u001b[39mcreate(\n\u001b[1;32m     32\u001b[0m         version\u001b[39m=\u001b[39mversion_id, \u001b[39minput\u001b[39m\u001b[39m=\u001b[39m\u001b[39minput\u001b[39m \u001b[39mor\u001b[39;00m {}, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mparams\n\u001b[1;32m     33\u001b[0m     )\n\u001b[1;32m     34\u001b[0m \u001b[39melif\u001b[39;00m owner \u001b[39mand\u001b[39;00m name:\n\u001b[1;32m     35\u001b[0m     prediction \u001b[39m=\u001b[39m client\u001b[39m.\u001b[39mmodels\u001b[39m.\u001b[39mpredictions\u001b[39m.\u001b[39mcreate(\n\u001b[1;32m     36\u001b[0m         model\u001b[39m=\u001b[39m(owner, name), \u001b[39minput\u001b[39m\u001b[39m=\u001b[39m\u001b[39minput\u001b[39m \u001b[39mor\u001b[39;00m {}, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mparams\n\u001b[1;32m     37\u001b[0m     )\n",
      "File \u001b[0;32m~/anaconda3/envs/GRDN_env/lib/python3.11/site-packages/replicate/prediction.py:309\u001b[0m, in \u001b[0;36mPredictions.create\u001b[0;34m(self, version, input, **params)\u001b[0m\n\u001b[1;32m    300\u001b[0m \u001b[39m\u001b[39m\u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m    301\u001b[0m \u001b[39mCreate a new prediction for the specified model version.\u001b[39;00m\n\u001b[1;32m    302\u001b[0m \u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m    304\u001b[0m body \u001b[39m=\u001b[39m _create_prediction_body(\n\u001b[1;32m    305\u001b[0m     version,\n\u001b[1;32m    306\u001b[0m     \u001b[39minput\u001b[39m,\n\u001b[1;32m    307\u001b[0m     \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mparams,\n\u001b[1;32m    308\u001b[0m )\n\u001b[0;32m--> 309\u001b[0m resp \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_client\u001b[39m.\u001b[39m_request(\n\u001b[1;32m    310\u001b[0m     \u001b[39m\"\u001b[39m\u001b[39mPOST\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[1;32m    311\u001b[0m     \u001b[39m\"\u001b[39m\u001b[39m/v1/predictions\u001b[39m\u001b[39m\"\u001b[39m,\n\u001b[1;32m    312\u001b[0m     json\u001b[39m=\u001b[39mbody,\n\u001b[1;32m    313\u001b[0m )\n\u001b[1;32m    315\u001b[0m \u001b[39mreturn\u001b[39;00m _json_to_prediction(\u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_client, resp\u001b[39m.\u001b[39mjson())\n",
      "File \u001b[0;32m~/anaconda3/envs/GRDN_env/lib/python3.11/site-packages/replicate/client.py:85\u001b[0m, in \u001b[0;36mClient._request\u001b[0;34m(self, method, path, **kwargs)\u001b[0m\n\u001b[1;32m     83\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_request\u001b[39m(\u001b[39mself\u001b[39m, method: \u001b[39mstr\u001b[39m, path: \u001b[39mstr\u001b[39m, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m httpx\u001b[39m.\u001b[39mResponse:\n\u001b[1;32m     84\u001b[0m     resp \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_client\u001b[39m.\u001b[39mrequest(method, path, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n\u001b[0;32m---> 85\u001b[0m     _raise_for_status(resp)\n\u001b[1;32m     87\u001b[0m     \u001b[39mreturn\u001b[39;00m resp\n",
      "File \u001b[0;32m~/anaconda3/envs/GRDN_env/lib/python3.11/site-packages/replicate/client.py:358\u001b[0m, in \u001b[0;36m_raise_for_status\u001b[0;34m(resp)\u001b[0m\n\u001b[1;32m    356\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m_raise_for_status\u001b[39m(resp: httpx\u001b[39m.\u001b[39mResponse) \u001b[39m-\u001b[39m\u001b[39m>\u001b[39m \u001b[39mNone\u001b[39;00m:\n\u001b[1;32m    357\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39m400\u001b[39m \u001b[39m<\u001b[39m\u001b[39m=\u001b[39m resp\u001b[39m.\u001b[39mstatus_code \u001b[39m<\u001b[39m \u001b[39m600\u001b[39m:\n\u001b[0;32m--> 358\u001b[0m         \u001b[39mraise\u001b[39;00m ReplicateError(resp\u001b[39m.\u001b[39mjson()[\u001b[39m\"\u001b[39m\u001b[39mdetail\u001b[39m\u001b[39m\"\u001b[39m])\n",
      "\u001b[0;31mReplicateError\u001b[0m: You have reached the free time limit. To continue using Replicate, set up billing at https://replicate.com/account/billing#billing."
     ]
    }
   ],
   "source": [
    "from llama_index.llms import Replicate\n",
    "\n",
    "llm = Replicate(\n",
    "    model=\"a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5\",\n",
    "    temperature=0.1,\n",
    "    #context_window=32,\n",
    "    top_p=0.9,\n",
    "    repetition_penalty=1.0,\n",
    "    max_tokens=2000,\n",
    "    #stop_sequences=[\"\\n\\n\"],   \n",
    "\n",
    ")\n",
    "\n",
    "input_plant_text = 'apple, orange, milk, eraser, cherry'\n",
    "template=\"You return JUST a python list object containing the elements that can be grown in a garden. Do not include any other text or explanation.\"\n",
    "text = 'which of the elements of this list can be grown in a garden, [' + input_plant_text + ']? Return JUST a python list object containing the elements that can be grown in a garden. Do not include any other text or explanation.'\n",
    "input_prompt = template + text\n",
    "print(input_prompt)\n",
    "\n",
    "resp = llm.complete(input_prompt)\n",
    "print(resp)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Companion planting is the practice of growing different plants together in close proximity in order to improve their growth, health, and productivity. This technique takes advantage of the different ways that plants interact with each other, such as by providing shade, repelling pests, or attracting beneficial insects.\n",
      "\n",
      "Here are some of my thoughts on companion planting:\n",
      "\n",
      "1. Diversify your garden: Companion planting is a great way to add diversity to your garden, which can improve its overall health and resilience. By growing a mix of plants, you can create a more complex and dynamic ecosystem that is less susceptible to pests and diseases.\n",
      "2. Improve soil health: Many companion plants, such as legumes and comfrey, have the ability to fix nitrogen or other nutrients in the soil, which can improve the health and fertility of the soil. This can lead to healthier and more productive plants.\n",
      "3. Enhance pest control: Companion planting can be a powerful tool for controlling pests naturally. For example, basil and mint can repel aphids, while marigold and nasturtium can attract beneficial insects that prey on pests.\n",
      "4. Increase yields: Companion planting can also help to increase yields by providing support and shade for plants, or by attracting beneficial insects that pollinate or prey on pests. For example, planting beans with corn and squash can provide a trellis for the beans and shade for the corn, while also attracting beneficial insects that prey on pests.\n",
      "5. Reduce maintenance: Companion planting can also reduce the amount of maintenance required in your garden. For example, planting a mix of plants that have different growing habits and blooming times can create a more dynamic and resilient garden that requires less work to maintain.\n",
      "\n",
      "\n",
      "Overall, I believe that companion planting is a valuable technique for gardeners of all experience levels. It can help to improve the health, productivity, and resilience of your garden, while also reducing the amount of maintenance required. By taking advantage of the different ways that plants interact with each other"
     ]
    }
   ],
   "source": [
    "#os.environ[\"REPLICATE_API_TOKEN\"] = \"key here\"\n",
    "api = replicate.Client(api_token=os.environ[\"REPLICATE_API_TOKEN\"])\n",
    "output = api.run(\n",
    "    \"a16z-infra/llama13b-v2-chat:df7690f1994d94e96ad9d568eac121aecf50684a0b0963b25a41cc40061269e5\",\n",
    "    input={\"prompt\": \"what is your opinion on companion planting?\"},\n",
    "    )\n",
    "for item in output:\n",
    "    print(item, end=\"\")\n",
    "\n",
    "# save response to string\n",
    "resp = \"\"\n",
    "for item in output:\n",
    "    resp += item\n",
    "    "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "config.json: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 638/638 [00:00<00:00, 3.22MB/s]\n",
      "model.safetensors.index.json: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 23.9k/23.9k [00:00<00:00, 62.9MB/s]\n",
      "model-00001-of-00008.safetensors: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1.89G/1.89G [00:26<00:00, 71.1MB/s]\n",
      "model-00002-of-00008.safetensors: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1.95G/1.95G [00:27<00:00, 71.0MB/s]\n",
      "model-00003-of-00008.safetensors: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1.98G/1.98G [00:27<00:00, 72.0MB/s]\n",
      "model-00004-of-00008.safetensors: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1.95G/1.95G [00:27<00:00, 70.2MB/s]\n",
      "model-00005-of-00008.safetensors: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1.98G/1.98G [00:28<00:00, 69.8MB/s]\n",
      "model-00006-of-00008.safetensors: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1.95G/1.95G [00:28<00:00, 69.5MB/s]\n",
      "model-00007-of-00008.safetensors: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1.98G/1.98G [00:28<00:00, 68.5MB/s]\n",
      "model-00008-of-00008.safetensors: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 816M/816M [00:11<00:00, 69.9MB/s]\n",
      "Downloading shards: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 8/8 [03:27<00:00, 25.96s/it]\n",
      "Loading checkpoint shards: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 8/8 [00:24<00:00,  3.04s/it]\n",
      "generation_config.json: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 111/111 [00:00<00:00, 1.12MB/s]\n",
      "tokenizer_config.json: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1.43k/1.43k [00:00<00:00, 9.73MB/s]\n",
      "tokenizer.model: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 493k/493k [00:00<00:00, 69.9MB/s]\n",
      "tokenizer.json: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 1.80M/1.80M [00:00<00:00, 17.9MB/s]\n",
      "added_tokens.json: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 42.0/42.0 [00:00<00:00, 160kB/s]\n",
      "special_tokens_map.json: 100%|β–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆβ–ˆ| 168/168 [00:00<00:00, 961kB/s]\n",
      "/Users/dheym/anaconda3/envs/GRDN_env/lib/python3.11/site-packages/transformers/generation/utils.py:1518: UserWarning: You have modified the pretrained model configuration to control generation. This is a deprecated strategy to control generation and will be removed soon, in a future version. Please use and modify the model generation configuration (see https://huggingface.co/docs/transformers/generation_strategies#default-text-generation-configuration )\n",
      "  warnings.warn(\n"
     ]
    },
    {
     "ename": "ValueError",
     "evalue": "Greedy methods without beam search do not support `num_return_sequences` different than 1 (got 3).",
     "output_type": "error",
     "traceback": [
      "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m",
      "\u001b[0;31mValueError\u001b[0m                                Traceback (most recent call last)",
      "Cell \u001b[0;32mIn[7], line 3\u001b[0m\n\u001b[1;32m      1\u001b[0m \u001b[39mfrom\u001b[39;00m \u001b[39mtransformers\u001b[39;00m \u001b[39mimport\u001b[39;00m pipeline\n\u001b[1;32m      2\u001b[0m generator \u001b[39m=\u001b[39m pipeline(\u001b[39m'\u001b[39m\u001b[39mtext-generation\u001b[39m\u001b[39m'\u001b[39m, model \u001b[39m=\u001b[39m \u001b[39m'\u001b[39m\u001b[39mHuggingFaceH4/zephyr-7b-beta\u001b[39m\u001b[39m'\u001b[39m)\n\u001b[0;32m----> 3\u001b[0m generator(\u001b[39m\"\u001b[39m\u001b[39mHello, I\u001b[39m\u001b[39m'\u001b[39m\u001b[39mm a language model\u001b[39m\u001b[39m\"\u001b[39m, max_length \u001b[39m=\u001b[39m \u001b[39m30\u001b[39m, num_return_sequences\u001b[39m=\u001b[39m\u001b[39m3\u001b[39m)\n",
      "File \u001b[0;32m~/anaconda3/envs/GRDN_env/lib/python3.11/site-packages/transformers/pipelines/text_generation.py:208\u001b[0m, in \u001b[0;36mTextGenerationPipeline.__call__\u001b[0;34m(self, text_inputs, **kwargs)\u001b[0m\n\u001b[1;32m    167\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39m__call__\u001b[39m(\u001b[39mself\u001b[39m, text_inputs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs):\n\u001b[1;32m    168\u001b[0m \u001b[39m    \u001b[39m\u001b[39m\"\"\"\u001b[39;00m\n\u001b[1;32m    169\u001b[0m \u001b[39m    Complete the prompt(s) given as inputs.\u001b[39;00m\n\u001b[1;32m    170\u001b[0m \n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m    206\u001b[0m \u001b[39m          ids of the generated text.\u001b[39;00m\n\u001b[1;32m    207\u001b[0m \u001b[39m    \"\"\"\u001b[39;00m\n\u001b[0;32m--> 208\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39msuper\u001b[39m()\u001b[39m.\u001b[39m\u001b[39m__call__\u001b[39m(text_inputs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n",
      "File \u001b[0;32m~/anaconda3/envs/GRDN_env/lib/python3.11/site-packages/transformers/pipelines/base.py:1140\u001b[0m, in \u001b[0;36mPipeline.__call__\u001b[0;34m(self, inputs, num_workers, batch_size, *args, **kwargs)\u001b[0m\n\u001b[1;32m   1132\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mnext\u001b[39m(\n\u001b[1;32m   1133\u001b[0m         \u001b[39miter\u001b[39m(\n\u001b[1;32m   1134\u001b[0m             \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mget_iterator(\n\u001b[0;32m   (...)\u001b[0m\n\u001b[1;32m   1137\u001b[0m         )\n\u001b[1;32m   1138\u001b[0m     )\n\u001b[1;32m   1139\u001b[0m \u001b[39melse\u001b[39;00m:\n\u001b[0;32m-> 1140\u001b[0m     \u001b[39mreturn\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mrun_single(inputs, preprocess_params, forward_params, postprocess_params)\n",
      "File \u001b[0;32m~/anaconda3/envs/GRDN_env/lib/python3.11/site-packages/transformers/pipelines/base.py:1147\u001b[0m, in \u001b[0;36mPipeline.run_single\u001b[0;34m(self, inputs, preprocess_params, forward_params, postprocess_params)\u001b[0m\n\u001b[1;32m   1145\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mrun_single\u001b[39m(\u001b[39mself\u001b[39m, inputs, preprocess_params, forward_params, postprocess_params):\n\u001b[1;32m   1146\u001b[0m     model_inputs \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mpreprocess(inputs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mpreprocess_params)\n\u001b[0;32m-> 1147\u001b[0m     model_outputs \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mforward(model_inputs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mforward_params)\n\u001b[1;32m   1148\u001b[0m     outputs \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mpostprocess(model_outputs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mpostprocess_params)\n\u001b[1;32m   1149\u001b[0m     \u001b[39mreturn\u001b[39;00m outputs\n",
      "File \u001b[0;32m~/anaconda3/envs/GRDN_env/lib/python3.11/site-packages/transformers/pipelines/base.py:1046\u001b[0m, in \u001b[0;36mPipeline.forward\u001b[0;34m(self, model_inputs, **forward_params)\u001b[0m\n\u001b[1;32m   1044\u001b[0m     \u001b[39mwith\u001b[39;00m inference_context():\n\u001b[1;32m   1045\u001b[0m         model_inputs \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_ensure_tensor_on_device(model_inputs, device\u001b[39m=\u001b[39m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mdevice)\n\u001b[0;32m-> 1046\u001b[0m         model_outputs \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_forward(model_inputs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mforward_params)\n\u001b[1;32m   1047\u001b[0m         model_outputs \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_ensure_tensor_on_device(model_outputs, device\u001b[39m=\u001b[39mtorch\u001b[39m.\u001b[39mdevice(\u001b[39m\"\u001b[39m\u001b[39mcpu\u001b[39m\u001b[39m\"\u001b[39m))\n\u001b[1;32m   1048\u001b[0m \u001b[39melse\u001b[39;00m:\n",
      "File \u001b[0;32m~/anaconda3/envs/GRDN_env/lib/python3.11/site-packages/transformers/pipelines/text_generation.py:271\u001b[0m, in \u001b[0;36mTextGenerationPipeline._forward\u001b[0;34m(self, model_inputs, **generate_kwargs)\u001b[0m\n\u001b[1;32m    268\u001b[0m         generate_kwargs[\u001b[39m\"\u001b[39m\u001b[39mmin_length\u001b[39m\u001b[39m\"\u001b[39m] \u001b[39m+\u001b[39m\u001b[39m=\u001b[39m prefix_length\n\u001b[1;32m    270\u001b[0m \u001b[39m# BS x SL\u001b[39;00m\n\u001b[0;32m--> 271\u001b[0m generated_sequence \u001b[39m=\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mmodel\u001b[39m.\u001b[39mgenerate(input_ids\u001b[39m=\u001b[39minput_ids, attention_mask\u001b[39m=\u001b[39mattention_mask, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mgenerate_kwargs)\n\u001b[1;32m    272\u001b[0m out_b \u001b[39m=\u001b[39m generated_sequence\u001b[39m.\u001b[39mshape[\u001b[39m0\u001b[39m]\n\u001b[1;32m    273\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mframework \u001b[39m==\u001b[39m \u001b[39m\"\u001b[39m\u001b[39mpt\u001b[39m\u001b[39m\"\u001b[39m:\n",
      "File \u001b[0;32m~/anaconda3/envs/GRDN_env/lib/python3.11/site-packages/torch/utils/_contextlib.py:115\u001b[0m, in \u001b[0;36mcontext_decorator.<locals>.decorate_context\u001b[0;34m(*args, **kwargs)\u001b[0m\n\u001b[1;32m    112\u001b[0m \u001b[39m@functools\u001b[39m\u001b[39m.\u001b[39mwraps(func)\n\u001b[1;32m    113\u001b[0m \u001b[39mdef\u001b[39;00m \u001b[39mdecorate_context\u001b[39m(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs):\n\u001b[1;32m    114\u001b[0m     \u001b[39mwith\u001b[39;00m ctx_factory():\n\u001b[0;32m--> 115\u001b[0m         \u001b[39mreturn\u001b[39;00m func(\u001b[39m*\u001b[39margs, \u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)\n",
      "File \u001b[0;32m~/anaconda3/envs/GRDN_env/lib/python3.11/site-packages/transformers/generation/utils.py:1529\u001b[0m, in \u001b[0;36mGenerationMixin.generate\u001b[0;34m(self, inputs, generation_config, logits_processor, stopping_criteria, prefix_allowed_tokens_fn, synced_gpus, assistant_model, streamer, negative_prompt_ids, negative_prompt_attention_mask, **kwargs)\u001b[0m\n\u001b[1;32m   1527\u001b[0m generation_config \u001b[39m=\u001b[39m copy\u001b[39m.\u001b[39mdeepcopy(generation_config)\n\u001b[1;32m   1528\u001b[0m model_kwargs \u001b[39m=\u001b[39m generation_config\u001b[39m.\u001b[39mupdate(\u001b[39m*\u001b[39m\u001b[39m*\u001b[39mkwargs)  \u001b[39m# All unused kwargs must be model kwargs\u001b[39;00m\n\u001b[0;32m-> 1529\u001b[0m generation_config\u001b[39m.\u001b[39mvalidate()\n\u001b[1;32m   1530\u001b[0m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39m_validate_model_kwargs(model_kwargs\u001b[39m.\u001b[39mcopy())\n\u001b[1;32m   1532\u001b[0m \u001b[39m# 2. Set generation parameters if not already defined\u001b[39;00m\n",
      "File \u001b[0;32m~/anaconda3/envs/GRDN_env/lib/python3.11/site-packages/transformers/generation/configuration_utils.py:498\u001b[0m, in \u001b[0;36mGenerationConfig.validate\u001b[0;34m(self, is_init)\u001b[0m\n\u001b[1;32m    496\u001b[0m \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnum_beams \u001b[39m==\u001b[39m \u001b[39m1\u001b[39m:\n\u001b[1;32m    497\u001b[0m     \u001b[39mif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mdo_sample \u001b[39mis\u001b[39;00m \u001b[39mFalse\u001b[39;00m:\n\u001b[0;32m--> 498\u001b[0m         \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[1;32m    499\u001b[0m             \u001b[39m\"\u001b[39m\u001b[39mGreedy methods without beam search do not support `num_return_sequences` different than 1 \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m    500\u001b[0m             \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m(got \u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnum_return_sequences\u001b[39m}\u001b[39;00m\u001b[39m).\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m    501\u001b[0m         )\n\u001b[1;32m    502\u001b[0m \u001b[39melif\u001b[39;00m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnum_return_sequences \u001b[39m>\u001b[39m \u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnum_beams:\n\u001b[1;32m    503\u001b[0m     \u001b[39mraise\u001b[39;00m \u001b[39mValueError\u001b[39;00m(\n\u001b[1;32m    504\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m`num_return_sequences` (\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnum_return_sequences\u001b[39m}\u001b[39;00m\u001b[39m) has to be smaller or equal to `num_beams` \u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m    505\u001b[0m         \u001b[39mf\u001b[39m\u001b[39m\"\u001b[39m\u001b[39m(\u001b[39m\u001b[39m{\u001b[39;00m\u001b[39mself\u001b[39m\u001b[39m.\u001b[39mnum_beams\u001b[39m}\u001b[39;00m\u001b[39m).\u001b[39m\u001b[39m\"\u001b[39m\n\u001b[1;32m    506\u001b[0m     )\n",
      "\u001b[0;31mValueError\u001b[0m: Greedy methods without beam search do not support `num_return_sequences` different than 1 (got 3)."
     ]
    }
   ],
   "source": [
    "from transformers import pipeline\n",
    "generator = pipeline('text-generation', model = 'HuggingFaceH4/zephyr-7b-beta')\n",
    "generator(\"Hello, I'm a language model\", max_length = 30, num_return_sequences=3)\n",
    "## [{'generated_text': \"Hello, I'm a language modeler. So while writing this, when I went out to meet my wife or come home she told me that my\"},\n",
    "##  {'generated_text': \"Hello, I'm a language modeler. I write and maintain software in Python. I love to code, and that includes coding things that require writing\"}, ...\n",
    "\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [],
   "source": [
    "# Stream text\n",
    "def predict(message, chatbot, system_prompt=\"\", temperature=0.9, max_new_tokens=4096):\n",
    "    \n",
    "    client = Client(\"https://ysharma-explore-llamav2-with-tgi.hf.space/\")\n",
    "    return client.predict(\n",
    "            message,  # str in 'Message' Textbox component\n",
    "            system_prompt,  # str in 'Optional system prompt' Textbox component\n",
    "            temperature,  # int | float (numeric value between 0.0 and 1.0)\n",
    "            max_new_tokens,  # int | float (numeric value between 0 and 4096)\n",
    "            0.3,  # int | float (numeric value between 0.0 and 1)\n",
    "            1,  # int | float (numeric value between 1.0 and 2.0)\n",
    "            api_name=\"/chat\"\n",
    "    )\n"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "GRDN_env",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.3"
  },
  "orig_nbformat": 4
 },
 "nbformat": 4,
 "nbformat_minor": 2
}