File size: 3,292 Bytes
260a7c3
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
# -*- coding: utf-8 -*-
"""Mockinterview-Falcon.ipynb

Automatically generated by Colaboratory.

Original file is located at
    https://colab.research.google.com/drive/1hCKPV5U_bg7QQXPUIwwDvCdB1N8fgz0z

## Install dependencies
"""


"""## Import dependencies"""

import os
import torch
from transformers import (
    AutoModelForCausalLM,
    AutoTokenizer,
    BitsAndBytesConfig,
    pipeline
)

import csv
import codecs

from langchain import PromptTemplate,  LLMChain
import random
import json

"""## Creating pipeline for Falcon-7b"""

from langchain import HuggingFacePipeline
from transformers import AutoTokenizer, pipeline
import torch

model = "tiiuae/falcon-7b-instruct" #tiiuae/falcon-40b-instruct

tokenizer = AutoTokenizer.from_pretrained(model)

pipeline = pipeline(
    "text-generation", #task
    model=model,
    tokenizer=tokenizer,
    torch_dtype=torch.bfloat16,
    trust_remote_code=True,
    device_map="auto",
    max_length=512,
    do_sample=True,
    top_k=10,
    num_return_sequences=1,
    eos_token_id=tokenizer.eos_token_id
)

llm = HuggingFacePipeline(pipeline = pipeline, model_kwargs = {'temperature':0})

"""## Loading csv (attempting the program without RAG)

* RAG was reducing program efficiency.
"""

file = "/content/Combined_Data_set.csv"

fields = []
rows = []

with codecs.open(file, 'r', 'utf-8') as csvfile:
  csvreader = csv.reader(csvfile)
  fields = next(csvreader)

  for row in csvreader:
    rows.append(row)

"""## LLMChain for deciding next question"""

# Here we can make certain changes through the prompt template, like the tone in which we want the questions to be asked, we may use few shots for that.

record = random.randint(0,len(rows))

def get_question():
    topic = rows[record][0] #extracting question from csv

    template1 = """
    You are a data science interviewer between an interview, ask a question regarding the following given topic:
    Topic to ask question on as a interviewer: {question}
    """
    prompt1 = PromptTemplate(template=template1, input_variables=["question"])

    llm_chain = LLMChain(prompt=prompt1, llm=llm)

    next_question = llm_chain.run(topic)
    # print(next_question)
    json_string = "{{\"question\": \"{}\"}}".format(next_question)

    json_ques = json.loads(json_string, strict=False)

    return json_ques

result = get_question()
result

result['question']

"""## LLMChain for evaluating user response"""

# Now we can improve performance through the prompt, like we can provide a few shots, tell it about how to give positive and negative responses using some shots.

# corr_ans = rows[record][1] #extracting answer from csv

def get_evaluation(response):
    template2 = '''
    You are a data scientist interviewer and you are taking the interview of someone.
    Evaluate the response given by that person: {response}

    '''
    prompt2 = PromptTemplate(template=template2, input_variables=["response"])

    llm_chain2 = LLMChain(prompt=prompt2, llm=llm)

    evaluation = llm_chain2.run(response)
    #print(evaluation)
    json_string = "{{\"response\" : \"{}\" }}".format(evaluation)

    json_eval = json.loads(json_string, strict=False)
    return json_eval

response = input("Enter your response: ")
result = get_evaluation(response)
result

result['response']