elasticsearchlogstashlogstash-configurationlogstash-filelogstash-jdbc

Asking how to do nested in logstash(Elastic Search)


"archives_id": null,
"level_of_detail": null,
"items": \[
    {
        "barcode": "000892",
        "brn": "4188",
        "collection": "\["Books"\]",
        "updated_time": "2023-09-13 11:36:56.000000",
        "suffix": "LAI",
        "status": "Available",
        "availability": "Yes",
        "location": "Rotunda Library & Archive",
        "class_number": "759.95957"
    },
    {
        "barcode": "001445",
        "brn": "4188",
        "collection": "\["Books"\]",
        "updated_time": "2023-09-13 11:36:56.000000",
        "suffix": "LAI",
        "status": "On-Loan",
        "availability": "No",
        "location": "Rotunda Library & Archive",
        "class_number": "759.95957"
    }
\
],
"seriestitle": null,

the collection is not in array, how to code in filter{} in logstash config file i can't relly solve it I need the collection data is in array and place inside the items: \[ {}\ ]

I search from other pages that ask to use remove_field or split... but not really works for me Can anyone please help me with this?

My Expected Result should be like this:

"archives_id": null,
    "level_of_detail": null,
    "items": \[
        {
            "barcode": "000892",
            "brn": "4188",
            "collection": \[
                "Books"                     \<--- I want format like this
\
            ],
            "updated_time": "2023-09-13 11:36:56.000000",
            "suffix": "LAI",
            "status": "Available",
            "availability": "Yes",
            "location": "Rotunda Library & Archive",
            "class_number": "759.95957"
        },
        {
            "barcode": "001445",
            "brn": "4188",
            "collection": "\["Books"\]", \<--- Not like this, I do not want this
"updated_time": "2023-09-13 11:36:56.000000",
            "suffix": "LAI",
            "status": "On-Loan",
            "availability": "No",
            "location": "Rotunda Library & Archive",
            "class_number": "759.95957"
        }
\
    ],
    "seriestitle": null,

can anyone help me to solve this issues. this is the logstash config code that help in indexing a new index to elasticsearch server

I tried a lot of code like split, remove_field... but still cannot ................................................................................................................................................................

current logstash config

input {
  jdbc {
    clean_run => true
    jdbc_driver_library => "C:\Users\shien\Downloads\elasticsearch\logstash-conf\mysql-connector-java-8.0.30.jar" # depends on the server 
    jdbc_driver_class => "com.mysql.jdbc.Driver"
    jdbc_connection_string => ""
    jdbc_user => ""
    jdbc_password => ""
    statement => "SELECT p.id, p.category_code, p.file_path, p.image_path, p.title AS product_title, p.artist, p.product_date, p.location, p.access_level, p.umo_id, p.created_time AS products_created_time, p.updated_time AS products_updated_time, p.is_available,
c.code, c.sequence, art.id AS artwork_id, art.umo_id, art.system_id, art.accession_no, art.artist_date_birth, art.artist_date_death, art.creditline, art.medium, art.dimensions, art.artwork_on_display,
art.photo_credit, art.genre, art.copyright, art.description, arc.id AS archives_id, arc.umo_id, arc.reference_code, arc.other_title, arc.fonds_collection, arc.type_of_date, arc.language AS archive_language, arc.subject, arc.material_type, arc.extent_and_medium, arc.storage_location, arc.conditions_governing_access, arc.usage_restrictions_code, arc.conditions_governing_use_reproduction, arc.system_arrangement, arc.archival_history, arc.source_of_acquisition, arc.location_of_originals, arc.location_of_copies, arc.notes AS archive_note, arc.scope_and_content, arc.level_of_detail, arc.level_of_description, arc.status, arc.subject_personalname, l.id AS library_id ,l.brn , l.title AS library_title, l.author, l.corporate_author, l.meeting_name, l.edition_statement, l.imprint, l.collation, l.seriesTitle, l.notes AS library_notes, l.linking_notes, l.credits,
l.performers, l.system_details, l.contents, l.summary, l.isbn, l.issn, l.dewey_class, l.language AS library_language, l.variant_title, l.added_title, l.subject AS library_subject, l.published_date, l.bib_format,
l.irn, l.host_item_entry, l.created_time AS library_created_time, l.updated_time AS library_updated_time, JSON_ARRAYAGG(JSON_OBJECT('barcode', li.barcode, 'brn', li.brn, 'location', li.location, 'collection', li.collection, 'class_number', li.class_number, 'suffix', li.suffix, 'status', li.status, 'availability', li.availability, 'updated_time', li.updated_time)) AS items FROM products p LEFT JOIN artwork art ON p.id = art.product_id LEFT JOIN categories c ON p.category_code = c.code LEFT JOIN archives arc ON p.id = arc.product_id LEFT JOIN library l ON p.id = l.product_id LEFT JOIN library_items li ON li.library_id = l.id group by p.id;"
    use_column_value => true
    tracking_column => "id"
    schedule => "*/5 * * * * *"
  }
}
filter {
  mutate {
    remove_field => ["@version", "@timestamp"]
  }
  mutate {
    convert => {
      "[items][collection]" => "array"
    }
  }
  # product table 
  json { source => "product_title" target => "product_title" }
  json { source => "artist" target => "artist" }
  json { source => "location" target => "location" }

  # archive table
  json { source => "subject" target => "subject" }
  json { source => "archive_language" target => "archive_language" }
  json { source => "system_arrangement" target => "system_arrangement" }
  json { source => "archive_note" target => "archive_note" }
  json { source => "scope_and_content" target => "scope_and_content" }
  json { source => "subject_personalname" target => "subject_personalname" }

  # artwork table
  json { source => "artist_date_birth" target => "artist_date_birth" }
  json { source => "artist_date_death" target => "artist_date_death" }
  json { source => "dimensions" target => "dimensions" }
  json { source => "copyright" target => "copyright" }

  # library table
  json { source => "library_title" target => "library_title" }
  json { source => "author" target => "author" }
  json { source => "corporate_author" target => "corporate_author" }
  json { source => "meeting_name" target => "meeting_name" }
  json { source => "imprint" target => "imprint" }
  json { source => "collation" target => "collation" }
  json { source => "library_notes" target => "library_notes" }
  json { source => "contents" target => "contents" }
  json { source => "library_language" target => "library_language" }
  json { source => "library_subject" target => "library_subject" }

  # library_items table
  json { source => "isbn" target => "isbn" }
  json { source => "items" target => "items" }
}

output{
  # Localhost
  elasticsearch { 
    hosts => ["http://localhost:9200/"]
    index => ""
    ilm_rollover_alias => ""
    ilm_enabled => false
    data_stream => false
    user => "elastic"
    password => ""
    document_id => "%{id}"
  }
}

Solution

  • The problem is that you're probably storing a JSON array in the library_items.collection VARCHAR field.

    You need to parse that JSON string in your SQL query and it's probably going to solve your problem.