I want to execute POST URLRequest but first I need to create request body. The body should look like this:
{
"model": "gpt-4-vision-preview",
"messages": [
{
"role": "user",
"content": [
{
"type": "text",
"text": <mutableStringValue>
},
{
"type": "image_url",
"image_url": {
"url": <mutableStringValue>
}
}
]
}
],
"max_tokens": 300
}
I cannot hardcode this as .json
file because parameters "text"
(under first "type"
) and "image_url"
(under second "type"
) are given as function parameters.
I don't know how to handle two "type"
keys.
As of now, I came up with something like this:
struct ImageInputRequestBody: Codable {
let model: String = "gpt-4-vision-preview"
let messages: [Message]
let maxTokens: Int = 300
enum CodingKeys: String, CodingKey {
case model, messages
case maxTokens = "max_tokens"
}
struct Message: Codable {
let role: String
let content: [Content]
}
struct Content: Codable {
let type: String
let text: String?
}
}
But here I'm missing second "type"
and "image_url"
content
You could try something simple like this:
struct Content: Identifiable, Codable {
let id = UUID()
let type: String
var text: String?
var imageUrl: ImgUrl?
enum CodingKeys: String, CodingKey {
case type, text
case imageUrl = "image_url"
}
// where value is the text or the url string
// depending on the type given (which could be an enum)
init(type: String, value: String? = nil) {
self.type = type
self.text = nil
self.imageUrl = nil
if type == "text" {
self.text = value
} else {
if value != nil {
self.imageUrl = ImgUrl(url: value!)
}
}
}
public func encode(to encoder: Encoder) throws {
var container = encoder.container(keyedBy: CodingKeys.self)
try container.encode(type, forKey: .type)
if type == "text" {
try container.encode(text, forKey: .text)
} else {
try container.encode(imageUrl, forKey: .imageUrl)
}
}
}
struct ImageInputRequestBody: Codable {
let model: String = "gpt-4-vision-preview"
let messages: [Message]
let maxTokens: Int = 300
enum CodingKeys: String, CodingKey {
case model, messages
case maxTokens = "max_tokens"
}
}
struct Message: Codable {
let role: String
let content: [Content]
}
struct ImgUrl: Codable {
let url: String
// let detail: String? // <--- if desired later
}
Note, you need to add two (or more for multi-images) Content
to the Message
object.
For example:
func getTestRequestBody(text: String? = nil, url: String? = nil) -> ImageInputRequestBody {
return ImageInputRequestBody(messages: [
Message(role: "user", content: [
Content(type: "text", value: text),
Content(type: "image_url", value: url)
])
])
}
EDIT-1
Use it like this to post to OpenAI
let text = "What's in this image?"
let imgurl = "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
// ....
.task {
await fetch(txt: text, urlString: imgurl)
}
//....
private func fetch(txt: String? = nil, urlString: String? = nil) async {
let apikey = "YOUR-APIKEY"
if let url = URL(string: "https://api.openai.com/v1/chat/completions") {
var request = URLRequest(url: url)
request.addValue("application/json", forHTTPHeaderField: "Content-Type")
request.addValue("Bearer \(apikey)", forHTTPHeaderField: "Authorization")
request.httpMethod = "POST"
let body = ImageInputRequestBody(messages: [ // <--- here
Message(role: "user", content: [
Content(type: "text", value: txt),
Content(type: "image_url", value: urlString)
])
])
do {
let encoded = try JSONEncoder().encode(body)
request.httpBody = encoded
// check the encoding
// if let encodedString = String(data: encoded, encoding: .utf8) {
// print("\n----> encodedString: \n \(encodedString) \n")
// }
let (data, _) = try await URLSession.shared.data(for: request)
print("-----> \n \(String(data: data, encoding: .utf8) as AnyObject) \n")
// let decoded = try JSONDecoder().decode(OpenAIResponse.self, from: data)
// .....
}
catch { print(error) }
}
}