I'm trying to web scrape the first headline from the yahoo finance press release page, so in this case:
"Nutriband Inc. Signs Exclusive Manufacturing Agreement for Diocheck(TM) Visual COVID-19 Antibody Indicator Patch"
from the URL: https://finance.yahoo.com/quote/NTRB/press-releases
the formula:
=IMPORTXML("https://finance.yahoo.com/quote/NTRB/press-releases", "//*[@id="summaryPressStream-0-Stream"]/ul/li[1]/div/div/div[1]/h3/a/text()")
gives me a parsing error, which I'm guessing is from the double quotation marks around "summaryPressStream-0-Stream"
Replacing the double quotations with single quotations,
i.e. =IMPORTXML("https://finance.yahoo.com/quote/NTRB/press-releases", "//*[@id='summaryPressStream-0-Stream']/ul/li[1]/div/div/div[1]/h3/a")
results in a resource not found at URL error. Maybe I have the xpath wrong? I've tried all the xpath's near that section but just can't seem to get it working.
Does anyone know how to solve this issue?
If you want to get a quick overview of the press releases, you can try
function pressReleases(code){
var url = 'https://finance.yahoo.com/quote/'+code+'/press-releases'
var source = UrlFetchApp.fetch(url).getContentText()
var jsonString = source.match(/(?<=root.App.main = ).*(?=}}}})/g) + '}}}}'
var data = JSON.parse(jsonString)
var result=[]
eval("data.context.dispatcher.stores.StreamStore.streams['YFINANCE:"+code+".mega'].data.stream_items").forEach(function(item){
result.push([item.title,item.publisher,item.summary])
})
return (result)
}
and then, put in a cell :
=pressReleases(A1)
with A1 = NTRB
according to your requirement.