pythonjsontiktok

How can I make TikTok scraper scrape and add to CSV


I am building this scraper and I am trying to scrape TikTok hastags, then get the username from that hashtag, followed by scarping each username that I previously scraped. When finished want the information to be organized in a csv file. This is what I came up with but it is not working as I expected. I am a beginner and I am trying to learn a new language. What am I missing and doing wrong?

import requests
import json
import pandas as pd

# scrape hastag
url = "https://api.tikapi.io/public/hashtag?count=30&id=9261"

payload={}
headers = {
  'X-API-KEY': 'xxxxxx'
}

response = requests.request("GET", url, headers=headers, data=payload)
hashtag_response = response.text
hashtag_json = json.loads (hashtag_response)

# write data to hashtag json file
results_json = open("data.json", "w")
L = [response.text]
results_json.writelines(L)
results_json.close()

# list
influencer = []

followerCount = []
bioLink = []
signature = []


for uniqueId in hashtag_json ['uniqueId']:
   influencer.append(uniqueId)


# scrape influencer username
url = "https://api.tikapi.io/public/check?username={influencer}"

payload={}
headers = {
  'X-API-KEY': 'xxxxx'
}

influencerresponse = requests.request("GET", url, headers=headers, data=payload)
infl_response = influencerresponse.text
influencer_json = json.loads (infl_response)

# write data to influencer json file
results_json = open("infl_data.json", "w")
I = [influencerresponse.text]
results_json.writelines(I)
results_json.close()

for followerCount, bioLink in influencer_json ['followerCount','bioLink','signature']:
    followerCount.append(followerCount)
    bioLink.append(bioLink)
    signature.append(signature)

#  create csv file of results 

influencer_df = pd.DataFrame({  
    'Influencer' : influencer,
    'Follower Count' : followerCount,
    'Link' : bioLink,
    'Signature' : signature,   

    })

influencer_df.to_csv('/Users/john/Repos/TikTok/influencer.csv', index=False)


Solution

  • you were wrong in this part

    for uniqueId in hashtag_json ['uniqueId']:
       influencer.append(uniqueId)
    

    should be

    influencer.append(hashtag_json["itemList"][0]['author']['uniqueId'])
    

    and in this part

    for followerCount, bioLink in influencer_json ['followerCount','bioLink','signature']:
    

    should be

    followerCount.append(influencer_json['userInfo']['stats']['followerCount'])
    bioLink.append(influencer_json['userInfo']['user']['bioLink']['link'])
    signature.append(influencer_json['userInfo']['user']['signature'])
    

    .

    import requests
    import json
    import pandas as pd
    
    # scrape hastag
    url = "https://api.tikapi.io/public/hashtag?count=30&id=9261"
    
    payload={}
    headers = {
      'X-API-KEY': 'xxxx'
    }
    
    response = requests.request("GET", url, headers=headers, data=payload)
    hashtag_response = response.text
    hashtag_json = json.loads(hashtag_response)
    
    # write data to hashtag json file
    results_json = open("data.json", "w")
    L = [response.text]
    results_json.writelines(L)
    results_json.close()
    
    # list
    influencer = []
    followerCount = []
    bioLink = []
    signature = []
    
    
    
    influencer.append(hashtag_json["itemList"][0]['author']['uniqueId'])
    
    
    # scrape influencer username
    url = "https://api.tikapi.io/public/check?username={}".format(influencer[0])
    
    payload={}
    headers = {
      'X-API-KEY': 'xxxx'
    }
    
    influencerresponse = requests.request("GET", url, headers=headers, data=payload)
    infl_response = influencerresponse.text
    influencer_json = json.loads(infl_response)
    
    # write data to influencer json file
    results_json = open("infl_data.json", "w")
    I = [influencerresponse.text]
    results_json.writelines(I)
    results_json.close()
    
    followerCount.append(influencer_json['userInfo']['stats']['followerCount'])
    bioLink.append(influencer_json['userInfo']['user']['bioLink']['link'])
    signature.append(influencer_json['userInfo']['user']['signature'])
    
    #  create csv file of results 
    influencer_df = pd.DataFrame({  
        'Influencer' : influencer,
        'Follower Count' : followerCount,
        'Link' : bioLink,
        'Signature' : signature,  
        })
    
    influencer_df.to_csv('/Users/john/Repos/TikTok/influencer.csv', index=False)