-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathquestion-generation.py
46 lines (35 loc) · 1.77 KB
/
question-generation.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
from openai import OpenAI
import os
import pandas as pd
import boto3
# Goal: Use the independent lines of information to create prompts for fine-tuning job.
# Purpose: Final set of preparation for model fine-tuning.
# Steps for this script:
## - Load in OpenAI GPT 3.5 turbo model
## - Use pretrained model with few shots learning to create prompts for the responses
## - Utilise data augmentation on prompts to create a diverse set of prompts
## - Format data for finetuning (.jsonl)
## Retrieving web-scraped data from AWS S3 bucket
# Create an S3 client
s3 = boto3.client('s3')
bucket_name = 'tampa-ai'
client = OpenAI(
api_key=os.environ.get("OPENAI_API_KEY"),
)
with open("data.txt", 'r') as infile:
responses = [infile.readline() for line in infile]
def generate_questions(responses):
prompt_responses ={}
for response in responses:
chat_completion = client.chat.completions.create(
model="gpt-3.5-turbo",
messages=[{"role": "system","content": "You are a helpful assistant whose goal is to create prompts/questions for statements."},
{"role": "user", "content": "Create a prompt for the following statement: Tampa is known for its vibrant waterfront parks."},
{"role": "assistant", "content": "What is Tampa known for?"},
{"role": "user", "content": "Create a prompt for the following statement: The best time is during the spring."},
{"role": "assistant", "content": "When is the best time to visit Tampa?"},
{"role":"user","content":f"Create a prompt for the following statement: {response}"}]
)
prompt = chat_completion.choices[0].message.content
prompt_responses[response]= prompt
return(prompt_responses)