iosjsonswiftcodable

Create JSON from given data in Swift


I want to execute POST URLRequest but first I need to create request body. The body should look like this:

{
  "model": "gpt-4-vision-preview",
  "messages": [
    {
      "role": "user",
      "content": [
        {
          "type": "text",
          "text": <mutableStringValue>
        },
        {
          "type": "image_url",
          "image_url": {
            "url": <mutableStringValue>
          }
        }
      ]
    }
  ],
  "max_tokens": 300
}

I cannot hardcode this as .json file because parameters "text" (under first "type") and "image_url" (under second "type") are given as function parameters. I don't know how to handle two "type" keys.

As of now, I came up with something like this:

struct ImageInputRequestBody: Codable {
        let model: String = "gpt-4-vision-preview"
        let messages: [Message]
        let maxTokens: Int = 300
        enum CodingKeys: String, CodingKey {
            case model, messages
            case maxTokens = "max_tokens"
        }
        
        struct Message: Codable {
            let role: String
            let content: [Content]
        }
        struct Content: Codable {
            let type: String
            let text: String?
        }
    }

But here I'm missing second "type" and "image_url" content


Solution

  • You could try something simple like this:

    struct Content: Identifiable, Codable {
        let id = UUID()
        let type: String
        var text: String?
        var imageUrl: ImgUrl?
        
        enum CodingKeys: String, CodingKey {
            case type, text
            case imageUrl = "image_url"
        }
    
        // where value is the text or the url string
        // depending on the type given (which could be an enum)
        init(type: String, value: String? = nil) {
            self.type = type
            self.text = nil
            self.imageUrl = nil
        
            if type == "text" {
                self.text = value
            } else {
                if value != nil {
                    self.imageUrl = ImgUrl(url: value!)
                }
            }
        }
        
        public func encode(to encoder: Encoder) throws {
            var container = encoder.container(keyedBy: CodingKeys.self)
            try container.encode(type, forKey: .type)
            if type == "text" {
                try container.encode(text, forKey: .text)
            } else {
                try container.encode(imageUrl, forKey: .imageUrl)
            }
        }
    }
    
    struct ImageInputRequestBody: Codable {
        let model: String = "gpt-4-vision-preview"
        let messages: [Message]
        let maxTokens: Int = 300
        
        enum CodingKeys: String, CodingKey {
            case model, messages
            case maxTokens = "max_tokens"
        }
    }
    
    struct Message: Codable {
        let role: String
        let content: [Content]
    }
    
    struct ImgUrl: Codable {
        let url: String
        // let detail: String?  // <--- if desired later
    }
    

    Note, you need to add two (or more for multi-images) Content to the Message object.

    For example:

    func getTestRequestBody(text: String? = nil, url: String? = nil) -> ImageInputRequestBody {
        return ImageInputRequestBody(messages: [
            Message(role: "user", content: [
                Content(type: "text", value: text),
                Content(type: "image_url", value: url)
            ])
        ])
    }
    

    EDIT-1

    Use it like this to post to OpenAI

         let text = "What's in this image?"
         let imgurl =  "https://upload.wikimedia.org/wikipedia/commons/thumb/d/dd/Gfp-wisconsin-madison-the-nature-boardwalk.jpg/2560px-Gfp-wisconsin-madison-the-nature-boardwalk.jpg"
         // ....
         .task {
             await fetch(txt: text, urlString: imgurl)
         }
         //....
         
         private func fetch(txt: String? = nil, urlString: String? = nil) async {
             let apikey = "YOUR-APIKEY"
             
             if let url = URL(string: "https://api.openai.com/v1/chat/completions") {
                 var request = URLRequest(url: url)
                 request.addValue("application/json", forHTTPHeaderField: "Content-Type")
                 request.addValue("Bearer \(apikey)", forHTTPHeaderField: "Authorization")
                 request.httpMethod = "POST"
    
                 let body = ImageInputRequestBody(messages: [   // <--- here
                     Message(role: "user", content: [
                         Content(type: "text", value: txt),
                         Content(type: "image_url", value: urlString)
                     ])
                 ])
                 
                 do {
                     let encoded = try JSONEncoder().encode(body)
                     request.httpBody = encoded
                     // check the encoding
     //                if let encodedString = String(data: encoded, encoding: .utf8) {
     //                    print("\n----> encodedString: \n \(encodedString) \n")
     //                }
                     let (data, _) = try await URLSession.shared.data(for: request)
                     print("-----> \n \(String(data: data, encoding: .utf8) as AnyObject) \n")
                     
                     // let decoded = try JSONDecoder().decode(OpenAIResponse.self, from: data)
                     // .....
                 }
                 catch { print(error) }
             }
         }