stormcrawler

Build Failure in Stromcrawler 1.16


i am using stormcrawler 1.16, apache storm 1.2.3, maven 3.6.3 and jdk 1.8.

i have created the project using the articfact command below-

mvn archetype:generate -DarchetypeGroupId=com.digitalpebble.stormcrawler -Darche typeArtifactId=storm-crawler-elasticsearch-archetype -DarchetypeVersion=LATEST

when i run mvn clean package command then i get this error -

   /crawler$ mvn clean package
 [INFO] Scanning for projects...
 [INFO] 
  [INFO] -------------------------< com.storm:crawler >--------------------------
 [INFO] Building crawler 1.0-SNAPSHOT  
[INFO] --------------------------------[ jar ]---------------------------------
[INFO] 
[INFO] --- maven-clean-plugin:2.5:clean (default-clean) @ crawler ---
[INFO] Deleting /home/ubuntu/crawler/target
[INFO] 
[INFO] --- maven-resources-plugin:2.6:resources (default-resources) @ crawler ---
[INFO] Using 'UTF-8' encoding to copy filtered resources.
[INFO] Copying 4 resources
[INFO] 
[INFO] --- maven-compiler-plugin:3.2:compile (default-compile) @ crawler ---
[INFO] Changes detected - recompiling the module!
[INFO] Compiling 1 source file to /home/ubuntu/crawler/target/classes
[INFO] -------------------------------------------------------------
[ERROR] COMPILATION ERROR : 
[INFO] -------------------------------------------------------------
[ERROR] /home/ubuntu/crawler/src/main/java/com/cnf/245/ESCrawlTopology.java:[19,16] ';' 
expected
[INFO] 1 error 
[INFO] -------------------------------------------------------------  
[INFO] ------------------------------------------------------------------------
[INFO] BUILD FAILURE
[INFO] ------------------------------------------------------------------------
[INFO] Total time:  2.407 s
[INFO] Finished at: 2020-06-29T20:40:46Z
[INFO] ------------------------------------------------------------------------
[ERROR] Failed to execute goal org.apache.maven.plugins:maven-compiler-plugin:3.2:compile 
 (default-compile) on project crawler: Compilation failure
 [ERROR] /home/ubuntu/crawler/src/main/java/com/cnf/245/ESCrawlTopology.java:[19,16] ';' 
 expected
 [ERROR] 
 [ERROR] -> [Help 1]
 [ERROR] 
 [ERROR] To see the full stack trace of the errors, re-run Maven with the -e switch.
 [ERROR] Re-run Maven using the -X switch to enable full debug logging.
 [ERROR] 
 [ERROR] For more information about the errors and possible solutions, please read the 
  following articles:
 [ERROR] [Help 1] http://cwiki.apache.org/confluence/display/MAVEN/MojoFailureException

i haven't edited pom.xml file.

Here is the content of the ESCrawlTopology.java file -

    package com.cnf.245;
    import org.apache.storm.metric.LoggingMetricsConsumer;
    import org.apache.storm.topology.TopologyBuilder;     
    import org.apache.storm.tuple.Fields;

    import com.digitalpebble.stormcrawler.ConfigurableTopology;
    import com.digitalpebble.stormcrawler.Constants;
    import com.digitalpebble.stormcrawler.bolt.FetcherBolt;
    import com.digitalpebble.stormcrawler.bolt.JSoupParserBolt;
    import com.digitalpebble.stormcrawler.bolt.SiteMapParserBolt;
    import com.digitalpebble.stormcrawler.bolt.URLFilterBolt;
    import com.digitalpebble.stormcrawler.bolt.URLPartitionerBolt;
    import 
    com.digitalpebble.stormcrawler.elasticsearch.bolt.DeletionBolt;
    import  
    com.digitalpebble.stormcrawler.elasticsearch.bolt.IndexerBolt;
    import 


 com.digitalpebble.stormcrawler.elasticsearch.metrics.MetricsConsumer;
        import 
 

com.digitalpebble.stormcrawler.elasticsearch.metrics.StatusMetricsBolt;
        import 
 

com.digitalpebble.stormcrawler.elasticsearch.persistence.AggregationSpout;
       import com.digitalpebble.stormcrawler.elasticsearch.persistence.StatusUpdaterBolt;
       import com.digitalpebble.stormcrawler.spout.FileSpout;
       import com.digitalpebble.stormcrawler.util.ConfUtils;
        import com.digitalpebble.stormcrawler.util.URLStreamGrouping;

     /**
 * Dummy topology to play with the spouts and bolts on ElasticSearch
 */
public class ESCrawlTopology extends ConfigurableTopology {

    public static void main(String[] args) throws Exception {
        ConfigurableTopology.start(new ESCrawlTopology(), args);
    }

    @Override
    protected int run(String[] args) {
        TopologyBuilder builder = new TopologyBuilder();

        int numWorkers = ConfUtils.getInt(getConf(), "topology.workers", 1);

        if (args.length == 0) {
            System.err.println("ESCrawlTopology seed_dir file_filter");
            return -1;
        }

        // set to the real number of shards ONLY if es.status.routing is set to
        // true in the configuration
        int numShards = 1;

        builder.setSpout("filespout", new FileSpout(args[0], args[1], true));

        Fields key = new Fields("url");

        builder.setBolt("filter", new URLFilterBolt())
                .fieldsGrouping("filespout", Constants.StatusStreamName, key);

        builder.setSpout("spout", new AggregationSpout(), numShards);

        builder.setBolt("status_metrics", new StatusMetricsBolt())
                .shuffleGrouping("spout");

        builder.setBolt("partitioner", new URLPartitionerBolt(), numWorkers)
                .shuffleGrouping("spout");

        builder.setBolt("fetch", new FetcherBolt(), numWorkers)
                .fieldsGrouping("partitioner", new Fields("key"));

        builder.setBolt("sitemap", new SiteMapParserBolt(), numWorkers)
                .localOrShuffleGrouping("fetch");

        builder.setBolt("parse", new JSoupParserBolt(), numWorkers)
                .localOrShuffleGrouping("sitemap");

        builder.setBolt("indexer", new IndexerBolt(), numWorkers)
                .localOrShuffleGrouping("parse");

        builder.setBolt("status", new StatusUpdaterBolt(), numWorkers)
                .fieldsGrouping("fetch", Constants.StatusStreamName, 
         key)
                .fieldsGrouping("sitemap", Constants.StatusStreamName, 
         key)
                .fieldsGrouping("parse", Constants.StatusStreamName, 
        key)
                .fieldsGrouping("indexer", Constants.StatusStreamName, 
        key)
                .customGrouping("filter", Constants.StatusStreamName,
                        new URLStreamGrouping());

        builder.setBolt("deleter", new DeletionBolt(), numWorkers)
                .localOrShuffleGrouping("status",
                        Constants.DELETION_STREAM_NAME);

        conf.registerMetricsConsumer(MetricsConsumer.class);
        conf.registerMetricsConsumer(LoggingMetricsConsumer.class);

        return submit("crawl", conf, builder);
          }
       }

i put com.cnf.245 in groupId and crawler in articfactId.

someone please explain what cause this error ?


Solution

  • Can you please paste the content of ESCrawlTopology.java? Did you set com.cnf.245 as package name?

    The template class gets rewritten during the execution of the archetype with the package name substituted, it could be that the value you set broke the template.

    EDIT: you can't use numbers in package names in Java. See Using numbers as package names in java

    Use a different package name and groupID.