javahibernatewildflyhibernate-searchapache-tika

HSEARCH000151: Unable to get input stream from object of type byte


I try to index pdf file throught Hibernate Search @TikaBridge. According to the documentation the annotated field with TikaBridge should be any the following types:

String - where the string value is interpreted as a file path
URI - where the URI is interpreted as a resource URI
byte[]
java.sql.Blob

I have a PDF entity which has a content field of byte array type as:

package models;

import java.io.Serializable;
import java.util.Objects;
import javax.persistence.Column;
import javax.persistence.Entity;
import javax.persistence.Table;
import javax.persistence.GeneratedValue;
import javax.persistence.GenerationType;
import javax.persistence.Id;
import javax.persistence.Lob;
import org.hibernate.search.annotations.Field;

import org.hibernate.search.annotations.Indexed;
import org.hibernate.search.annotations.TikaBridge;

/**
 *
 * @author sergio
 */
@Indexed
@Entity
@Table(name = "PDFS")
public class PdfFile implements Serializable {

    @Id
    @GeneratedValue(strategy = GenerationType.IDENTITY)
    private Long id;

    @Column(name = "file_name")
    private String fileName;

    @Lob
    @Column(name = "content")
    @Field
    @TikaBridge
    private byte[] content;

    public PdfFile() {
    }

    public PdfFile(String fileName, byte[] content) {
        this.fileName = fileName;
        this.content = content;
    }

    public Long getId() {
        return id;
    }

    public void setId(Long id) {
        this.id = id;
    }

    public String getFileName() {
        return fileName;
    }

    public void setFileName(String fileName) {
        this.fileName = fileName;
    }

    public byte[] getContent() {
        return content;
    }

    public void setContent(byte[] content) {
        this.content = content;
    }

    @Override
    public int hashCode() {
        int hash = 3;
        hash = 59 * hash + Objects.hashCode(this.id);
        hash = 59 * hash + Objects.hashCode(this.fileName);
        return hash;
    }

    @Override
    public boolean equals(Object obj) {
        if (this == obj) {
            return true;
        }
        if (obj == null) {
            return false;
        }
        if (getClass() != obj.getClass()) {
            return false;
        }
        final PdfFile other = (PdfFile) obj;
        if (!Objects.equals(this.fileName, other.fileName)) {
            return false;
        }
        if (!Objects.equals(this.id, other.id)) {
            return false;
        }
        return true;
    }

    @Override
    public String toString() {
        return "PdfFile{" + "id=" + id + ", fileName=" + fileName + '}';
    }
}

This entity is associated with Book Entity as follows:

@IndexedEmbedded
    @OneToOne(cascade = CascadeType.ALL, fetch=FetchType.EAGER, targetEntity = PdfFile.class)
    @JoinColumn(name="excerpt_id")
    private PdfFile excerpt;

When deploy EAR on wildfly 10 I get the following error:

14:29:02,627 ERROR [org.jboss.msc.service.fail] (ServerService Thread Pool -- 631) MSC000001: Failed to start service jboss.persistenceunit."ejercicio4-ear-1.0-SNAPSHOT.ear/ejercicio4-ejb-1.0-SNAPSHOT.jar#ejercicio4PU": org.jboss.msc.service.StartException in service jboss.persistenceunit."ejercicio4-ear-1.0-SNAPSHOT.ear/ejercicio4-ejb-1.0-SNAPSHOT.jar#ejercicio4PU": javax.persistence.PersistenceException: [PersistenceUnit: ejercicio4PU] Unable to build Hibernate SessionFactory
    at org.jboss.as.jpa.service.PersistenceUnitServiceImpl$1$1.run(PersistenceUnitServiceImpl.java:179)
    at org.jboss.as.jpa.service.PersistenceUnitServiceImpl$1$1.run(PersistenceUnitServiceImpl.java:121)
    at org.wildfly.security.manager.WildFlySecurityManager.doChecked(WildFlySecurityManager.java:667)
    at org.jboss.as.jpa.service.PersistenceUnitServiceImpl$1.run(PersistenceUnitServiceImpl.java:193)
    at java.util.concurrent.ThreadPoolExecutor.runWorker(ThreadPoolExecutor.java:1142)
    at java.util.concurrent.ThreadPoolExecutor$Worker.run(ThreadPoolExecutor.java:617)
    at java.lang.Thread.run(Thread.java:745)
    at org.jboss.threads.JBossThread.run(JBossThread.java:320)
Caused by: javax.persistence.PersistenceException: [PersistenceUnit: ejercicio4PU] Unable to build Hibernate SessionFactory
    at org.hibernate.jpa.boot.internal.EntityManagerFactoryBuilderImpl.persistenceException(EntityManagerFactoryBuilderImpl.java:954)
    at org.hibernate.jpa.boot.internal.EntityManagerFactoryBuilderImpl.build(EntityManagerFactoryBuilderImpl.java:882)
    at org.jboss.as.jpa.hibernate5.TwoPhaseBootstrapImpl.build(TwoPhaseBootstrapImpl.java:44)
    at org.jboss.as.jpa.service.PersistenceUnitServiceImpl$1$1.run(PersistenceUnitServiceImpl.java:161)
    ... 7 more
Caused by: org.hibernate.search.exception.SearchException: HSEARCH000151: Unable to get input stream from object of type byte
    at org.hibernate.search.bridge.impl.TikaBridgeProvider.provideFieldBridge(TikaBridgeProvider.java:44)
    at org.hibernate.search.bridge.impl.ExtendedBridgeProvider.provideFieldBridge(ExtendedBridgeProvider.java:36)
    at org.hibernate.search.bridge.impl.BridgeFactory.getFieldBridgeFromBridgeProvider(BridgeFactory.java:271)
    at org.hibernate.search.bridge.impl.BridgeFactory.buildFieldBridge(BridgeFactory.java:193)
    at org.hibernate.search.engine.metadata.impl.AnnotationMetadataProvider.bindFieldAnnotation(AnnotationMetadataProvider.java:1092)
    at org.hibernate.search.engine.metadata.impl.AnnotationMetadataProvider.checkForField(AnnotationMetadataProvider.java:1024)
    at org.hibernate.search.engine.metadata.impl.AnnotationMetadataProvider.initializeMemberLevelAnnotations(AnnotationMetadataProvider.java:855)
    at org.hibernate.search.engine.metadata.impl.AnnotationMetadataProvider.initializeClass(AnnotationMetadataProvider.java:471)
    at org.hibernate.search.engine.metadata.impl.AnnotationMetadataProvider.checkForIndexedEmbedded(AnnotationMetadataProvider.java:1636)
    at org.hibernate.search.engine.metadata.impl.AnnotationMetadataProvider.initializeMemberLevelAnnotations(AnnotationMetadataProvider.java:863)
    at org.hibernate.search.engine.metadata.impl.AnnotationMetadataProvider.initializeClass(AnnotationMetadataProvider.java:471)
    at org.hibernate.search.engine.metadata.impl.AnnotationMetadataProvider.getTypeMetadataFor(AnnotationMetadataProvider.java:132)
    at org.hibernate.search.spi.SearchIntegratorBuilder.initDocumentBuilders(SearchIntegratorBuilder.java:373)
    at org.hibernate.search.spi.SearchIntegratorBuilder.buildNewSearchFactory(SearchIntegratorBuilder.java:199)
    at org.hibernate.search.spi.SearchIntegratorBuilder.buildSearchIntegrator(SearchIntegratorBuilder.java:117)
    at org.hibernate.search.hcore.impl.HibernateSearchSessionFactoryObserver.sessionFactoryCreated(HibernateSearchSessionFactoryObserver.java:75)
    at org.hibernate.internal.SessionFactoryObserverChain.sessionFactoryCreated(SessionFactoryObserverChain.java:35)
    at org.hibernate.internal.SessionFactoryImpl.<init>(SessionFactoryImpl.java:530)
    at org.hibernate.boot.internal.SessionFactoryBuilderImpl.build(SessionFactoryBuilderImpl.java:444)
    at org.hibernate.jpa.boot.internal.EntityManagerFactoryBuilderImpl.build(EntityManagerFactoryBuilderImpl.java:879)
    ... 9 more

14:29:02,644 ERROR [org.jboss.as.controller.management-operation] (DeploymentScanner-threads - 1) WFLYCTL0013: Operation ("full-replace-deployment") failed - address: ([]) - failure description: {
    "WFLYCTL0080: Failed services" => {"jboss.persistenceunit.\"ejercicio4-ear-1.0-SNAPSHOT.ear/ejercicio4-ejb-1.0-SNAPSHOT.jar#ejercicio4PU\"" => "org.jboss.msc.service.StartException in service jboss.persistenceunit.\"ejercicio4-ear-1.0-SNAPSHOT.ear/ejercicio4-ejb-1.0-SNAPSHOT.jar#ejercicio4PU\": javax.persistence.PersistenceException: [PersistenceUnit: ejercicio4PU] Unable to build Hibernate SessionFactory
    Caused by: javax.persistence.PersistenceException: [PersistenceUnit: ejercicio4PU] Unable to build Hibernate SessionFactory
    Caused by: org.hibernate.search.exception.SearchException: HSEARCH000151: Unable to get input stream from object of type byte"},
    "WFLYCTL0412: Required services that are not installed:" => ["jboss.persistenceunit.\"ejercicio4-ear-1.0-SNAPSHOT.ear/ejercicio4-ejb-1.0-SNAPSHOT.jar#ejercicio4PU\""],
    "WFLYCTL0180: Services with missing/unavailable dependencies" => undefined
}

I am using Hibernate Search ORM 5.5.5.Final

<dependency>
            <groupId>org.hibernate</groupId>
            <artifactId>hibernate-search-orm</artifactId>
            <version>${hibernate.search.version}</version>
            <exclusions>
                <exclusion>  <!-- declare the exclusion here -->
                  <groupId>org.apache.lucene</groupId>
                  <artifactId>lucene-backward-codecs</artifactId>
                </exclusion>
            </exclusions> 
        </dependency>

And Hibernate Entity Manager 5.0.11.Final

<dependency>
            <groupId>org.hibernate</groupId>
            <artifactId>hibernate-entitymanager</artifactId>
            <version>${hibernate.entity.manager}</version>
        </dependency>

Can somebody help me?


Solution

  • I solved this using a java.sql.Blob type as:

    @Lob
    @Column(name = "content")
    @Field
    @TikaBridge
    private Blob content;
    

    For convert byte array to Blob I used a SerialBlob class:

    byte[] content = IOUtils.toByteArray(file.toURI());
    pdf = new PdfFile(fileName, new SerialBlob(content));
    

    To search this field .ignoreFieldBridge() it has to be used

    org.apache.lucene.search.Query luceneQuery = qb.keyword()
                    .onFields("title", "description", "authors.name", "excerpt.content")
                    .ignoreFieldBridge()
                    .matching(keyword)
                    .createQuery();