I am trying to take a .cbz
/ comic book file, read the images into a single byte array and return it as a single image to serve in a web application. For testing purposes a .cbz
file is just a rar with its extension changed, so you can create your own .cbz
file by compressing a list of .jpg
files and renaming the extension then try the code.
Here is my current code:
package main
import (
"archive/zip"
"fmt"
"io"
"log"
"net/http"
"os"
"path/filepath"
)
func main() {
http.HandleFunc("/rendercbz", handleRenderCBZ)
log.Fatal(http.ListenAndServe(":8080", nil))
}
func handleRenderCBZ(w http.ResponseWriter, r *http.Request) {
// Example path to .cbz file on the server filesystem
filePath := "/home/my-home-dir/my-comic-book.cbz"
// Open .cbz file from the filesystem
file, err := os.Open(filePath)
if err != nil {
http.Error(w, fmt.Sprintf("Failed to open file: %v", err), http.StatusInternalServerError)
return
}
defer file.Close()
// Combine images from .cbz file into a single JPEG byte slice
combinedData, err := combineImagesFromCBZ(file)
if err != nil {
http.Error(w, fmt.Sprintf("Failed to combine images from CBZ: %v", err), http.StatusInternalServerError)
return
}
// Serve the combined image as response
w.Header().Set("Content-Type", "image/jpeg")
if _, err := w.Write(combinedData); err != nil {
http.Error(w, fmt.Sprintf("Failed to write image response: %v", err), http.StatusInternalServerError)
return
}
}
func combineImagesFromCBZ(file *os.File) ([]byte, error) {
var combinedData []byte
var imageCount int
// Get file info to determine file size
fileInfo, err := file.Stat()
if err != nil {
return nil, fmt.Errorf("failed to get file info: %v", err)
}
// Create a zip.Reader from the file
reader, err := zip.NewReader(file, fileInfo.Size())
if err != nil {
return nil, fmt.Errorf("failed to create zip reader: %v", err)
}
// Iterate through each file in the .cbz archive
for _, zipFile := range reader.File {
// Log which image is being processed
log.Printf("Processing image: %s", zipFile.Name)
// Skip files named "thumbnail.jpg" and non-image files
if filepath.Base(zipFile.Name) == "thumbnail.jpg" {
log.Printf("Skipping thumbnail file: %s", zipFile.Name)
continue
}
ext := filepath.Ext(zipFile.Name)
if ext != ".jpg" && ext != ".jpeg" && ext != ".png" && ext != ".gif" {
log.Printf("Skipping non-image file: %s", zipFile.Name)
continue
}
// Open each image file in the .cbz archive
rc, err := zipFile.Open()
if err != nil {
log.Printf("Failed to open file in CBZ archive: %v", err)
continue
}
// Read image file data
fileData, err := io.ReadAll(rc)
rc.Close()
if err != nil {
log.Printf("Failed to read file %s: %v", zipFile.Name, err)
continue
}
// Validate that the image ends with 0xff, 0xd9
if len(fileData) >= 2 && fileData[len(fileData)-2] == 0xff && fileData[len(fileData)-1] == 0xd9 {
// Append image file data to combinedData
combinedData = append(combinedData, fileData...)
imageCount++
} else {
log.Printf("Invalid image ending for file: %s", zipFile.Name)
}
}
// Append a single EOF marker to the end of combinedData
combinedData = append(combinedData, []byte{0xff, 0xd9}...)
// Log the final size of combinedData and the number of valid images found
log.Printf("Final combinedData size = %d", len(combinedData))
log.Printf("Number of valid images found: %d", imageCount)
// Check if we have any data
if len(combinedData) == 0 {
return nil, fmt.Errorf("no valid image data found in CBZ file")
}
return combinedData, nil
}
And here is the corresponding console log:
> go run main.go
2024/07/10 07:33:44 Processing image: 01.jpg
2024/07/10 07:33:44 Processing image: 02.jpg
2024/07/10 07:33:44 Processing image: 03.jpg
2024/07/10 07:33:44 Processing image: 04.jpg
2024/07/10 07:33:44 Processing image: 05.jpg
2024/07/10 07:33:45 Processing image: 06.jpg
2024/07/10 07:33:45 Processing image: 07.jpg
2024/07/10 07:33:45 Processing image: 08.jpg
2024/07/10 07:33:45 Processing image: 09.jpg
2024/07/10 07:33:45 Processing image: 10.jpg
2024/07/10 07:33:45 Processing image: 11.jpg
2024/07/10 07:33:45 Processing image: ComicInfo.xml
2024/07/10 07:33:45 Skipping non-image file: ComicInfo.xml
2024/07/10 07:33:45 Processing image: thumbnail.jpg
2024/07/10 07:33:45 Skipping thumbnail file: thumbnail.jpg
2024/07/10 07:33:45 Final combinedData size = 35750599
2024/07/10 07:33:45 Number of valid images found: 11
If i compare the original first image within the archive and the downloaded image from the api. Even though the api only returned 1 image, the size is significantly different:
Can someone spot why the api only returns the first image in the .cbz file when going to localhost:8080/rendercbz
? It seems like there is something wrong with the built image that cuts off the remaining images from being rendered properly.
Note: I can't use jpeg.Encode since there are many large images inside the .cbz file causing this error: Failed to encode and send image: failed to encode image: jpeg: image is too large to encode
.
I have gotten single page selection through a ?page=#
query parameter to work, but wanted to explore if it possible to build a single image.
I somewhat got it to work, but as expected it performs very poorly (maybe just because i'm currently on old hardware). If someone else wants to give it a try, or possible improve upon the work, feel free to.
package main
import (
"archive/zip"
"bytes"
"fmt"
"image"
"image/draw"
"image/jpeg"
"image/png"
"io"
"log"
"net/http"
"os"
"path/filepath"
"sort"
"strings"
"golang.org/x/image/webp"
)
const (
port = 8080
cbzDirectory = "./" // Directory where .cbz files are stored
)
func main() {
http.HandleFunc("/webtoon", handleWebtoon)
log.Printf("Server starting on port %d...\n", port)
log.Fatal(http.ListenAndServe(fmt.Sprintf(":%d", port), nil))
}
func handleWebtoon(w http.ResponseWriter, r *http.Request) {
if r.Method != http.MethodGet {
http.Error(w, "Method not allowed", http.StatusMethodNotAllowed)
return
}
filename := r.URL.Query().Get("file")
if filename == "" {
http.Error(w, "File parameter is required", http.StatusBadRequest)
return
}
if filepath.Ext(filename) != ".cbz" {
http.Error(w, "Invalid file extension. Only .cbz files are allowed", http.StatusBadRequest)
return
}
filePath := filepath.Join(cbzDirectory, filepath.Clean(filename))
if _, err := os.Stat(filePath); os.IsNotExist(err) {
http.Error(w, "File not found", http.StatusNotFound)
return
}
img, err := CreateWebtoonStrip(filePath)
if err != nil {
log.Printf("Error creating webtoon strip: %v", err)
http.Error(w, fmt.Sprintf("Error processing file: %v", err), http.StatusInternalServerError)
return
}
w.Header().Set("Content-Type", "image/png")
w.Header().Set("Content-Disposition", fmt.Sprintf("inline; filename=\"%s.png\"", filepath.Base(filename)))
err = streamPNG(w, img)
if err != nil {
log.Printf("Error streaming PNG: %v", err)
http.Error(w, "Error sending image", http.StatusInternalServerError)
return
}
}
func CreateWebtoonStrip(cbzFilePath string) (image.Image, error) {
reader, err := zip.OpenReader(cbzFilePath)
if err != nil {
return nil, fmt.Errorf("error opening CBZ file: %v", err)
}
defer reader.Close()
sort.Slice(reader.File, func(i, j int) bool {
return reader.File[i].Name < reader.File[j].Name
})
var images []image.Image
var totalHeight int
var commonWidth int
for _, file := range reader.File {
if isImageFile(file.Name) {
rc, err := file.Open()
if err != nil {
return nil, fmt.Errorf("error opening file %s: %v", file.Name, err)
}
data, err := io.ReadAll(rc)
rc.Close()
if err != nil {
return nil, fmt.Errorf("error reading file %s: %v", file.Name, err)
}
img, format, err := decodeImage(bytes.NewReader(data))
if err != nil {
log.Printf("Error decoding file %s: %v", file.Name, err)
continue // Skip this file and try the next one
}
log.Printf("Successfully decoded %s as %s", file.Name, format)
width := img.Bounds().Dx()
if commonWidth == 0 {
commonWidth = width
} else if width != commonWidth {
log.Printf("Skipping %s: width %d doesn't match common width %d", file.Name, width, commonWidth)
continue
}
images = append(images, img)
totalHeight += img.Bounds().Dy()
}
}
if len(images) == 0 {
return nil, fmt.Errorf("no valid images found with matching width in the CBZ file")
}
finalImage := image.NewRGBA(image.Rect(0, 0, commonWidth, totalHeight))
currentY := 0
for _, img := range images {
draw.Draw(finalImage, image.Rect(0, currentY, commonWidth, currentY+img.Bounds().Dy()), img, image.Point{}, draw.Src)
currentY += img.Bounds().Dy()
}
return finalImage, nil
}
func isImageFile(filename string) bool {
ext := strings.ToLower(filepath.Ext(filename))
return ext == ".jpg" || ext == ".jpeg" || ext == ".png" || ext == ".webp"
}
func decodeImage(r io.Reader) (image.Image, string, error) {
data, err := io.ReadAll(r)
if err != nil {
return nil, "", fmt.Errorf("error reading image data: %v", err)
}
// Try decoding as JPEG
img, err := jpeg.Decode(bytes.NewReader(data))
if err == nil {
return img, "jpeg", nil
}
// Try decoding as PNG
img, err = png.Decode(bytes.NewReader(data))
if err == nil {
return img, "png", nil
}
// Try decoding as WebP
img, err = webp.Decode(bytes.NewReader(data))
if err == nil {
return img, "webp", nil
}
return nil, "", fmt.Errorf("unsupported image format")
}
func streamPNG(w io.Writer, img image.Image) error {
encoder := png.Encoder{
CompressionLevel: png.DefaultCompression,
}
return encoder.Encode(w, img)
}
It is by no means perfect, but it does the job. Take any image where the width has a common size to not have dead space in the final image and render them as one MASSIVE image at rest.
With that said, I am sticking with lazy loading through a ?page=#
query parameter.
Note: I had to abandon using jepg as the content type since JPEG has a max dimension of 65535x65535. When each page in the webtoon is around 12000 in height, and 720 in width, the dimension is quickly broken. So we decode jpegs, and return a large png. (which probably doesn't help with performance)
And i also uploaded it here: https://github.com/alexander-bruun/go-cbz-to-png/tree/main in case people want a fast git clone
.