pythonpython-3.xzendeskzendesk-apizendesk-app

getting incremental data from Zendesk using Zenpy


I have the below code from which i get the data from zendesk, The thing is that whenever i run this script i takes last 30days data.Can someone let me know what all changes i have to do to make it incremental. Ideally this script should run twice or thrice a day(With the current script on each execution its getting last 30 days data which is unnecessary).

from zenpy import Zenpy
import time,datetime
import json
import psycopg2

# Connecting DB..
DSN = "dbname='postgres' user='postgres' host='localhost' password='postgres' port='5432'"
conn = psycopg2.connect(DSN)
conn.set_client_encoding('utf-8')
cur = conn.cursor()
ins_cur = conn.cursor()


script = 'DROP TABLE IF EXISTS ticket_events; CREATE TABLE ticket_events ( ID serial NOT NULL ' \
         'PRIMARY KEY, info json NOT NULL); '
cur.execute(script)
conn.commit()
print('Table dropped and recreated')

# Zenpy accepts an API token
creds = {
    'email': 'xxxxx@xxx.com',
    'token': '*************',
    'subdomain': 'xxxxxx'
}
rday = datetime.datetime.now() - datetime.timedelta(days=30)

# Default connect
zenpy_client = Zenpy(**creds)
print('Zendesk connected via zenpy')
requests = zenpy_client.tickets.events(start_time=rday,include=None)

# loop the tickets and insert to dwh
for request in requests:
    req_json = json.dumps(request.to_dict(), sort_keys=False)
    print(req_json)
    insert_query = '''INSERT INTO ticket_events(info) VALUES ( $$ ''' + req_json + ''' $$ )'''
    cur.execute(insert_query)
    conn.commit()

conn.close()

Below is the table structure i have defined to take values, But i feel it is not so good.I want this table to be updated incrementally with the data and delete any redundant data it have(if Any).Any suggestions on this please.

drop table if exists zendesk_ticket_events;
create table  zendesk_ticket_events as
   SELECT  
    CAST (info ->> 'id' as   BIGINT) as parent_id,
    CAST (info ->> 'ticket_id' as   BIGINT) as ticket_id,
    CAST (info ->> 'updater_id' as  BIGINT) as updater_id,
    CAST (info ->> 'via' as   VARCHAR (50)) as via,
    CAST (info ->> 'event_type' as VARCHAR (50)) as parent_event_type,
    CAST (info ->> 'created_at' as timestamp without time zone) as created_at,
    CAST(enrolment_info->>'via_reference_id'as TEXT)  AS via_reference_id,
    CAST(enrolment_info->>'id'as TEXT)  AS child_id,
    CAST(enrolment_info->>'assignee_id' as BIGINT)  AS assignee_id,
    CAST(enrolment_info->>'subject' as   VARCHAR (50)) AS subject,
    CAST(enrolment_info->>'requester_id'as TEXT)  AS requester_id,
    CAST(enrolment_info->>'status' as   VARCHAR (50)) AS status,
    CAST(enrolment_info->>'priority' as   VARCHAR (50)) AS priority,
    CAST(enrolment_info->>'comment_public' as   VARCHAR (50)) AS comment_public,
    CAST(enrolment_info->>'comment_present' as   VARCHAR (50)) AS comment_present,
    CAST(enrolment_info->>'event_type' as   VARCHAR (50)) AS child_event_type,
    CAST(enrolment_info->>'previous_value'as TEXT)  AS previous_value,
    CAST(enrolment_info->>'group_id'as TEXT)  AS group_id
    FROM ticket_events t, json_array_elements(t.info -> 'child_events') AS enrolment_info;

Below is the sample data.Can anyone cross verify the below data and let me know whether the table structure above is fine or not?

    {
  "child_events": [
    {
      "id": 54334560,
      "via": "Mail",
      "via_reference_id": null,
      "comment_present": true,
      "comment_public": true,
      "event_type": "Comment"
    },
    {
      "id": 54334580,
      "via": "Mail",
      "via_reference_id": null,
      "subject": "Order 10056 on 20.03.20",
      "event_type": "Create"
    },
    {
      "id": 54334600,
      "via": "Mail",
      "via_reference_id": null,
      "requester_id": 369854,
      "event_type": "Create"
    },
    {
      "id": 54334620,
      "via": "Mail",
      "via_reference_id": null,
      "locale_id": "8",
      "event_type": "Create"
    },
    {
      "id": 543342310640,
      "via": "Mail",
      "via_reference_id": null,
      "status": "new",
      "event_type": "Create"
    },
    {
      "id": 54334660,
      "via": "Mail",
      "via_reference_id": null,
      "priority": null,
      "event_type": "Create"
    },
    {
      "id": 54334700,
      "via": "Mail",
      "via_reference_id": null,
      "type": null,
      "event_type": "Create"
    },
    {
      "id": 54334740,
      "via": "Mail",
      "via_reference_id": null,
      "tags": [
        "bestellung"
      ],
      "added_tags": [
        "Orders"
      ],
      "removed_tags": [

      ],
      "event_type": "Create"
    },
    {
      "id": 54334860,
      "via": "Rule",
      "via_reference_id": 44967,
      "group_id": 2117,
      "rel": "trigger",
      "revision_id": 1,
      "event_type": "Change",
      "previous_value": null
    }
  ],
  "id": 54334540,
  "ticket_id": 159978,
  "updater_id": 369854,
  "via": "Mail",
  "created_at": "2020-03-29T18:41:22Z",
  "event_type": "Audit",
  "timestamp": 1585507282,
  "system": {
    "client": "Microsoft Outlook 14.0",
    "ip_address": null,
    "latitude": 48.3074,
    "location": "Linz, 4, Austria",
    "longitude": 14.285
  }
}

Solution

  • From where you are creating table - upto your rday variable creation - i have changed code to this :

    create_table_sql = 'CREATE TABLE IF NOT EXISTS ' \
              'ticket_events ( ID serial NOT NULL ' \
             'PRIMARY KEY, info json NOT NULL); ' # create table only if not present already
    
    cur.execute(create_table_sql)
    conn.commit()
    
    # Zenpy accepts an API token
    creds = {
        'email': 'xxxxx@xxx.com',
        'token': '*************',
        'subdomain': 'xxxxxx'
    }
    
    
    select_max_created = 'SELECT MAX(created_at) FROM ticket_events;' #get max-created-date of your DB 
    cur.execute(create_table_sql)
    row = cur.fetchone() # single row 
    created_at = datetime.datetime(row[0])  
    rday = created_at + datetime.timedelta(hours=1) # buffer of 1 hour