MuhammadFarhan67 commited on
Commit
34f575a
·
verified ·
1 Parent(s): 9702d71

Upload 4 files

Browse files
Files changed (4) hide show
  1. App.py +105 -0
  2. Train.py +219 -0
  3. requirements (1).txt +5 -0
  4. requirements.txt +117 -0
App.py ADDED
@@ -0,0 +1,105 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+
2
+ import streamlit as st
3
+ import pandas as pd
4
+ from datetime import datetime, timedelta
5
+ from unsloth import FastLanguageModel
6
+ import torch
7
+
8
+ # Cargar el modelo y el tokenizador
9
+ model_path = "/home/roser97/MarketAI/lora_model"
10
+ model, tokenizer = FastLanguageModel.from_pretrained(
11
+ model_name=model_path,
12
+ max_seq_length=800, # Ajusta según tus necesidades
13
+ load_in_4bit=True,
14
+ )
15
+
16
+ # Configurar el modelo para inferencia
17
+ FastLanguageModel.for_inference(model)
18
+
19
+ def generate_marketing_content(instruction, input_context):
20
+ inputs = tokenizer(
21
+ [f"### Instruction:\n{instruction}\n### Input:\n{input_context}\n### Response:"],
22
+ return_tensors="pt"
23
+ ).to("cuda" if torch.cuda.is_available() else "cpu")
24
+
25
+ output = model.generate(**inputs, max_new_tokens=128)
26
+ return tokenizer.decode(output[0], skip_special_tokens=True)
27
+
28
+ def main():
29
+ st.set_page_config(page_title="Compass AI", layout="wide")
30
+ st.title("Compass AI")
31
+
32
+ # Sidebar for navigation
33
+ page = st.sidebar.selectbox("Choose a page", ["Home", "Campaign Creation", "Strategy", "Scheduling", "Analytics"])
34
+
35
+ if page == "Home":
36
+ show_home()
37
+ elif page == "Campaign Creation":
38
+ show_campaign_creation()
39
+ elif page == "Strategy":
40
+ show_strategy()
41
+ elif page == "Scheduling":
42
+ show_scheduling()
43
+ elif page == "Analytics":
44
+ show_analytics()
45
+
46
+ def show_home():
47
+ st.header("Welcome to AI Marketing Campaign Agent")
48
+ st.write("This tool helps you create, manage, and analyze your marketing campaigns using AI.")
49
+ st.write("Use the sidebar to navigate through different features.")
50
+
51
+ def show_campaign_creation():
52
+ st.header("Campaign Creation")
53
+
54
+ # Brand Questionnaire
55
+ st.subheader("Brand Questionnaire")
56
+ brand_name = st.text_input("Brand Name")
57
+ industry = st.selectbox("Industry", ["Technology", "Fashion", "Food & Beverage", "Other"])
58
+ target_audience = st.text_area("Describe your target audience")
59
+ campaign_objective = st.selectbox("Campaign Objective", ["Brand Awareness", "Lead Generation", "Sales", "Other"])
60
+
61
+ # Content Generation
62
+ st.subheader("Content Generation")
63
+ content_type = st.selectbox("Content Type", ["Social Media Post", "Ad Copy", "Email"])
64
+ content_prompt = st.text_area("Describe the content you want to generate")
65
+
66
+ if st.button("Generate Content"):
67
+ with st.spinner("Generating content..."):
68
+ generated_content = generate_marketing_content(content_prompt, f"{brand_name}, {industry}, {target_audience}, {campaign_objective}")
69
+ st.text_area("Generated Content", generated_content, height=200)
70
+
71
+ def show_strategy():
72
+ st.header("Marketing Strategy")
73
+
74
+ start_date = st.date_input("Campaign Start Date")
75
+ duration = st.number_input("Campaign Duration (days)", min_value=1, value=30)
76
+
77
+ if st.button("Generate Strategy"):
78
+ with st.spinner("Generating strategy..."):
79
+ strategy = generate_marketing_content("Generate a marketing strategy", f"Start Date: {start_date}, Duration: {duration} days")
80
+
81
+ st.subheader("Generated Marketing Strategy")
82
+ st.text(strategy)
83
+
84
+ if st.button("Generate PDF Proposal"):
85
+ st.write("PDF generation functionality to be implemented.")
86
+
87
+ def show_scheduling():
88
+ st.header("Content Scheduling")
89
+
90
+ platforms = st.multiselect("Select Platforms", ["Facebook", "Instagram", "Twitter"])
91
+ post_content = st.text_area("Post Content")
92
+ post_date = st.date_input("Post Date")
93
+ post_time = st.time_input("Post Time")
94
+
95
+ if st.button("Schedule Post"):
96
+ scheduled_datetime = datetime.combine(post_date, post_time)
97
+ for platform in platforms:
98
+ st.success(f"Post scheduled for {platform} at {scheduled_datetime}")
99
+
100
+ def show_analytics():
101
+ st.header("Campaign Analytics")
102
+ st.write("This feature is under development. It will show campaign performance metrics and insights.")
103
+
104
+ if __name__ == "__main__":
105
+ main()
Train.py ADDED
@@ -0,0 +1,219 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ # -*- coding: utf-8 -*-
2
+ """train.py.ipynb
3
+
4
+ Automatically generated by Colab.
5
+
6
+ Original file is located at
7
+ https://colab.research.google.com/drive/1KS0vOkR4vSJYwCiFMW4o9c8RK1h5hpCt
8
+ """
9
+
10
+ !pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
11
+ !pip install --no-deps "xformers<0.0.27" "trl<0.9.0" peft accelerate bitsandbytes ratelimit
12
+ !pip install torch transformers datasets rich
13
+
14
+ from unsloth import FastLanguageModel
15
+ import torch
16
+
17
+ # Configuraciones del modelo
18
+ max_seq_length = 800 # Ajusta según tus necesidades
19
+ dtype = None # None para detección automática. Usa Float16 o Bfloat16 si sabes cuál usar.
20
+ load_in_4bit = True # Habilita la cuantificación de 4 bits para reducir el uso de memoria
21
+
22
+ # Carga el modelo preentrenado con las optimizaciones
23
+ model, tokenizer = FastLanguageModel.from_pretrained(
24
+ model_name="unsloth/Meta-Llama-3.1-8B", # Nombre del modelo preentrenado
25
+ max_seq_length=max_seq_length, # Longitud máxima de secuencia
26
+ dtype=dtype, # Tipo de dato de la GPU (detección automática si es None)
27
+ load_in_4bit=load_in_4bit, # Cuantificación de 4 bits para optimizar memoria
28
+ )
29
+
30
+ print("Modelo cargado exitosamente.")
31
+
32
+ # Aplicación de adaptadores LoRA al modelo
33
+ model = FastLanguageModel.get_peft_model(
34
+ model,
35
+ r=16,
36
+ target_modules=["q_proj", "k_proj", "v_proj", "o_proj", "gate_proj", "up_proj", "down_proj"],
37
+ lora_alpha=16,
38
+ lora_dropout=0,
39
+ bias="none",
40
+ use_gradient_checkpointing="unsloth",
41
+ random_state=3407,
42
+ use_rslora=False,
43
+ loftq_config=None,
44
+ )
45
+
46
+ print("Adaptadores LoRA aplicados correctamente.")
47
+
48
+ from datasets import load_dataset
49
+
50
+ # Paso 8: Dar formato al conjunto de datos para el entrenamiento
51
+ # Definir la plantilla de formato
52
+ alpaca_prompt = """Below is an instruction that describes a task, paired with an input that provides further context. Write a response that appropriately completes the request.
53
+
54
+ ### Instruction:
55
+ {}
56
+
57
+ ### Input:
58
+ {}
59
+
60
+ ### Response:
61
+ {}"""
62
+ EOS_TOKEN = tokenizer.eos_token
63
+
64
+ # Función para formatear el conjunto de datos
65
+ def formatting_prompts_func(examples):
66
+ instructions = examples["instruction"]
67
+ inputs = examples["input"]
68
+ outputs = examples["response"]
69
+ texts = []
70
+ for instruction, input, output in zip(instructions, inputs, outputs):
71
+ text = alpaca_prompt.format(instruction, input, output) + EOS_TOKEN
72
+ texts.append(text)
73
+ return {"text": texts}
74
+
75
+ # Cargar y preparar el dataset para el ajuste fino
76
+ dataset = load_dataset('json', data_files='/content/marketing_social_media_dataset_v1.json', split='train')
77
+
78
+ # Aplicar el formato a todo el dataset usando la función map
79
+ dataset = dataset.map(formatting_prompts_func, batched=True)
80
+
81
+ print("Dataset cargado y formateado correctamente.")
82
+
83
+ from trl import SFTTrainer
84
+ from transformers import TrainingArguments
85
+
86
+ # Configuración del entrenamiento utilizando SFTTrainer
87
+ trainer = SFTTrainer(
88
+ model=model,
89
+ tokenizer=tokenizer,
90
+ train_dataset=dataset,
91
+ dataset_text_field="text",
92
+ max_seq_length=max_seq_length,
93
+ dataset_num_proc=2,
94
+ packing=False,
95
+ args=TrainingArguments(
96
+ per_device_train_batch_size=2,
97
+ gradient_accumulation_steps=4,
98
+ warmup_steps=5,
99
+ max_steps=60,
100
+ learning_rate=2e-4,
101
+ fp16=True, # Deshabilitar fp16
102
+ bf16=False, # Habilitar bf16, recomendado para GPUs Ampere
103
+ logging_steps=1,
104
+ optim="adamw_8bit",
105
+ weight_decay=0.01,
106
+ lr_scheduler_type="linear",
107
+ seed=3407,
108
+ output_dir="outputs",
109
+ ),
110
+ )
111
+
112
+ # Iniciar el proceso de entrenamiento
113
+ trainer_stats = trainer.train()
114
+
115
+ print("Entrenamiento completado con éxito.")
116
+
117
+ from rich.console import Console
118
+ from rich.panel import Panel
119
+ from rich.text import Text
120
+ from rich.markdown import Markdown as RichMarkdown
121
+ from IPython.display import display, Markdown
122
+ import json
123
+
124
+ # Configurar el modelo para la inferencia
125
+ FastLanguageModel.for_inference(model)
126
+
127
+ # Generar texto basado en una instrucción dada
128
+ inputs = tokenizer(
129
+ [
130
+ alpaca_prompt.format(
131
+ "Best marketing post for sneaker company", # Instrucción para el modelo
132
+ "", # Entrada adicional (en este caso, ninguna)
133
+ "", # Respuesta esperada (en este caso, ninguna)
134
+ )
135
+ ], return_tensors="pt").to("cuda")
136
+
137
+ # Generar la salida sin el uso de TextStreamer
138
+ output = model.generate(**inputs)
139
+
140
+ # Decodificar la salida
141
+ output_text = tokenizer.decode(output[0], skip_special_tokens=True)
142
+
143
+ # Función para analizar la salida y convertirla en un diccionario
144
+ def parse_output_to_dict(output_text):
145
+ result = {}
146
+ current_section = None
147
+ lines = output_text.split('\n')
148
+ for line in lines:
149
+ line = line.strip()
150
+ if line.startswith('###'):
151
+ current_section = line.strip('# ').lower().replace(' ', '_')
152
+ result[current_section] = {}
153
+ elif ':' in line:
154
+ key, value = line.split(':', 1)
155
+ key = key.lower().replace(' ', '_').strip()
156
+ result[current_section][key] = value.strip()
157
+ elif line and current_section:
158
+ if 'content' not in result[current_section]:
159
+ result[current_section]['content'] = []
160
+ result[current_section]['content'].append(line)
161
+
162
+ return result
163
+
164
+ # Analizar la salida generada en un diccionario
165
+ parsed_output = parse_output_to_dict(output_text)
166
+
167
+ # Mostrar la salida analizada como un JSON formateado
168
+ display(Markdown("## Parsed JSON Output\n\n```json\n" + json.dumps(parsed_output, indent=2) + "\n```"))
169
+
170
+ # Guardar el modelo ajustado y el tokenizador en un directorio
171
+ model.save_pretrained("lora_model")
172
+ tokenizer.save_pretrained("lora_model")
173
+
174
+ print("Modelo y tokenizador guardados correctamente en 'lora_model'.")
175
+
176
+ from unsloth import FastLanguageModel
177
+
178
+ # Recargar el modelo y el tokenizador desde el directorio guardado
179
+ model, tokenizer = FastLanguageModel.from_pretrained(
180
+ model_name="lora_model",
181
+ max_seq_length=max_seq_length,
182
+ dtype=dtype,
183
+ load_in_4bit=load_in_4bit,
184
+ )
185
+
186
+ # Configurar el modelo para inferencia
187
+ FastLanguageModel.for_inference(model)
188
+
189
+ print("Modelo y tokenizador recargados correctamente desde 'lora_model'.")
190
+
191
+ # Generar texto basado en una nueva indicación
192
+ inputs = tokenizer(
193
+ [
194
+ alpaca_prompt.format(
195
+ "Create a marketing campaign to promote the chocolate bar", # Instrucción
196
+ "Company: Cadbury, target audience: adults/boomers", # Información de entrada adicional
197
+ "", # Respuesta esperada (en este caso, ninguna)
198
+ )
199
+ ], return_tensors="pt").to("cuda")
200
+
201
+ # Generar salida (si no usas TextStreamer, puedes quitar la línea correspondiente)
202
+ output = model.generate(**inputs, max_new_tokens=128)
203
+
204
+ # Decodificar la salida
205
+ output_text = tokenizer.decode(output[0], skip_special_tokens=True)
206
+
207
+ # Mostrar la salida generada
208
+ print("Salida Generada por el Modelo:")
209
+ print(output_text)
210
+
211
+ from google.colab import files
212
+ import shutil
213
+
214
+ # Crear un archivo zip del modelo
215
+ shutil.make_archive("lora_model", 'zip', "lora_model")
216
+
217
+ # Descargar el archivo zip
218
+ files.download("lora_model.zip")
219
+
requirements (1).txt ADDED
@@ -0,0 +1,5 @@
 
 
 
 
 
 
1
+ pip install "unsloth[colab-new] @ git+https://github.com/unslothai/unsloth.git"
2
+ pip install --no-deps "xformers<0.0.27" "trl<0.9.0" peft accelerate bitsandbytes ratelimit
3
+ pip install torch transformers datasets rich
4
+ ipython
5
+ openai
requirements.txt ADDED
@@ -0,0 +1,117 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
+ accelerate==0.33.0
2
+ aiohappyeyeballs==2.4.0
3
+ aiohttp==3.10.4
4
+ aiosignal==1.3.1
5
+ altair==5.4.0
6
+ annotated-types==0.7.0
7
+ anyio==4.4.0
8
+ asttokens==2.4.1
9
+ async-timeout==4.0.3
10
+ attrs==24.2.0
11
+ bitsandbytes==0.43.3
12
+ blinker==1.8.2
13
+ cachetools==5.5.0
14
+ certifi==2024.7.4
15
+ charset-normalizer==3.3.2
16
+ click==8.1.7
17
+ datasets==2.21.0
18
+ decorator==5.1.1
19
+ dill==0.3.8
20
+ distro==1.9.0
21
+ docstring_parser==0.16
22
+ exceptiongroup==1.2.2
23
+ executing==2.0.1
24
+ filelock==3.15.4
25
+ frozenlist==1.4.1
26
+ fsspec==2024.6.1
27
+ gitdb==4.0.11
28
+ GitPython==3.1.43
29
+ h11==0.14.0
30
+ hf_transfer==0.1.8
31
+ httpcore==1.0.5
32
+ httpx==0.27.0
33
+ huggingface-hub==0.24.5
34
+ idna==3.7
35
+ ipython==8.26.0
36
+ jedi==0.19.1
37
+ Jinja2==3.1.4
38
+ jiter==0.5.0
39
+ jsonschema==4.23.0
40
+ jsonschema-specifications==2023.12.1
41
+ markdown-it-py==3.0.0
42
+ MarkupSafe==2.1.5
43
+ matplotlib-inline==0.1.7
44
+ mdurl==0.1.2
45
+ mpmath==1.3.0
46
+ multidict==6.0.5
47
+ multiprocess==0.70.16
48
+ narwhals==1.4.2
49
+ networkx==3.3
50
+ numpy==2.0.1
51
+ nvidia-cublas-cu12==12.1.3.1
52
+ nvidia-cuda-cupti-cu12==12.1.105
53
+ nvidia-cuda-nvrtc-cu12==12.1.105
54
+ nvidia-cuda-runtime-cu12==12.1.105
55
+ nvidia-cudnn-cu12==9.1.0.70
56
+ nvidia-cufft-cu12==11.0.2.54
57
+ nvidia-curand-cu12==10.3.2.106
58
+ nvidia-cusolver-cu12==11.4.5.107
59
+ nvidia-cusparse-cu12==12.1.0.106
60
+ nvidia-nccl-cu12==2.20.5
61
+ nvidia-nvjitlink-cu12==12.6.20
62
+ nvidia-nvtx-cu12==12.1.105
63
+ openai==1.41.0
64
+ packaging==24.1
65
+ pandas==2.2.2
66
+ parso==0.8.4
67
+ peft==0.12.0
68
+ pexpect==4.9.0
69
+ pillow==10.4.0
70
+ prompt_toolkit==3.0.47
71
+ protobuf==3.20.3
72
+ psutil==6.0.0
73
+ ptyprocess==0.7.0
74
+ pure_eval==0.2.3
75
+ pyarrow==17.0.0
76
+ pydantic==2.8.2
77
+ pydantic_core==2.20.1
78
+ pydeck==0.9.1
79
+ Pygments==2.18.0
80
+ python-dateutil==2.9.0.post0
81
+ pytz==2024.1
82
+ PyYAML==6.0.2
83
+ ratelimit==2.2.1
84
+ referencing==0.35.1
85
+ regex==2024.7.24
86
+ requests==2.32.3
87
+ rich==13.7.1
88
+ rpds-py==0.20.0
89
+ safetensors==0.4.4
90
+ sentencepiece==0.2.0
91
+ shtab==1.7.1
92
+ six==1.16.0
93
+ smmap==5.0.1
94
+ sniffio==1.3.1
95
+ stack-data==0.6.3
96
+ streamlit==1.37.1
97
+ sympy==1.13.2
98
+ tenacity==8.5.0
99
+ tokenizers==0.19.1
100
+ toml==0.10.2
101
+ torch==2.4.0
102
+ tornado==6.4.1
103
+ tqdm==4.66.5
104
+ traitlets==5.14.3
105
+ transformers==4.44.0
106
+ triton==3.0.0
107
+ trl==0.9
108
+ typing_extensions==4.12.2
109
+ tyro==0.8.8
110
+ tzdata==2024.1
111
+ unsloth @ git+https://github.com/unslothai/unsloth.git@52bc19d1fa4cd3557b785127fd68b5f4d1c34347
112
+ urllib3==2.2.2
113
+ watchdog==4.0.2
114
+ wcwidth==0.2.13
115
+ xformers==0.0.27
116
+ xxhash==3.5.0
117
+ yarl==1.9.4