i am getting somthing like this
Hi i am scraping a web page using Selenium Webdriver an i am able to achieve my data but problem is that this directly interact with browser and i dont want to open a web browser and want to scrape all data as it is
How can i achieve my goal
Here is my code
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.firefox.FirefoxDriver;
import org.openqa.selenium.support.ui.Select;
public class GetData {
public static void main(String args[]) throws InterruptedException {
String sDate = "27/03/2014";
WebDriver driver = new FirefoxDriver();
String url="http://www.upmandiparishad.in/commodityWiseAll.aspx";
driver.get(url);
Thread.sleep(5000);
// select barge
new Select(driver.findElement(By.id("ctl00_ContentPlaceHolder1_ddl_commodity"))).selectByVisibleText("Jo");
driver.findElement(By.id("ctl00_ContentPlaceHolder1_txt_rate")).sendKeys(sDate);
// click buttonctl00_ContentPlaceHolder1_txt_rate
Thread.sleep(3000);
driver.findElement(By.id("ctl00_ContentPlaceHolder1_btn_show")).click();
Thread.sleep(5000);
//get only table tex
WebElement findElement = driver.findElement(By.id("ctl00_ContentPlaceHolder1_GridView1"));
String htmlTableText = findElement.getText();
// do whatever you want now, This is raw table values.
System.out.println(htmlTableText);
driver.close();
driver.quit();
}
}
My updated New code
import com.gargoylesoftware.htmlunit.BrowserVersion;
import org.openqa.selenium.By;
import org.openqa.selenium.WebDriver;
import org.openqa.selenium.WebElement;
import org.openqa.selenium.firefox.FirefoxDriver;
import org.openqa.selenium.htmlunit.HtmlUnitDriver;
import org.openqa.selenium.support.ui.Select;
public class Getdata1 {
public static void main(String args[]) throws InterruptedException {
WebDriver driver = new HtmlUnitDriver(BrowserVersion.FIREFOX_3_6);
driver.get("http://www.upmandiparishad.in/commodityWiseAll.aspx");
System.out.println(driver.getPageSource());
Thread.sleep(5000);
// select barge
new Select(driver.findElement(By.id("ctl00_ContentPlaceHolder1_ddl_commodity"))).selectByVisibleText("Jo");
String sDate = "12/04/2014"; //What date you want
driver.findElement(By.id("ctl00_ContentPlaceHolder1_txt_rate")).sendKeys(sDate);
driver.findElement(By.id("ctl00_ContentPlaceHolder1_btn_show")).click();
Thread.sleep(3000);
//get only table tex
WebElement findElement = driver.findElement(By.id("ctl00_ContentPlaceHolder1_GridView1"));
String htmlTableText = findElement.getText();
// do whatever you want now, This is raw table values.
System.out.println(htmlTableText);
driver.close();
driver.quit();
}
}
Thanks in advance
Use HtmlUnit or HtmlUnitDriver by Selenium
WebDriver driver = new HtmlUnitDriver(BrowserVersion.FIREFOX_17);
driver.get("http://www.upmandiparishad.in/commodityWiseAll.aspx");
System.out.println(driver.getPageSource());
Thread.sleep(5000);
// select barge
new Select(driver.findElement(By.id("ctl00_ContentPlaceHolder1_ddl_commodity"))).selectByVisibleText("Jo");
String sDate = "12/04/2014"; //What date you want
driver.findElement(By.id("ctl00_ContentPlaceHolder1_txt_rate")).sendKeys(sDate);
driver.findElement(By.id("ctl00_ContentPlaceHolder1_btn_show")).click();
Thread.sleep(3000);
//get only table tex
WebElement findElement = driver.findElement(By.id("ctl00_ContentPlaceHolder1_GridView1"));
String htmlTableText = findElement.getText();
// do whatever you want now, This is raw table values.
System.out.println(htmlTableText);
driver.close();
driver.quit();
To get tabular output, you can try something like this..
String arrCells[] = htmlTableText.split(" ");
Boolean bIsANumber = false;
for(int i = 0; i < arrCells.length; i++) {
try {
int tmp = Integer.parseInt(arrCells[i]);
bIsANumber = true;
}
catch(Exception ex) {
bIsANumber = false;
}
if(bIsANumber) {
System.out.print("\n"+arrCells[i]+"\t");
}
else {
System.out.print(arrCells[i]+"\t");
}
}