我的SPARK调试java代码

来源:互联网 发布:数学不好能学编程吗 编辑:程序博客网 时间:2024/06/11 21:53

仅供自己参考,其他人可能看不懂。

import org.apache.spark.SparkConf;import org.apache.spark.api.java.JavaRDD;import org.apache.spark.api.java.JavaSparkContext;import org.apache.spark.mllib.rdd.RandomRDD;import org.apache.spark.rdd.RDD;import org.apache.spark.sql.*;import org.apache.spark.sql.hive.HiveContext;import org.apache.spark.sql.types.IntegerType;import org.apache.spark.sql.types.StructType;import org.bouncycastle.asn1.dvcs.Data;import java.util.ArrayList;import java.util.Arrays;import java.util.List;/** * Created by hadoop on 15-12-11. */public class ceshi {    public static void main(String[] args){        SparkConf conf=new SparkConf().setAppName("alongloc6").setMaster("spark://192.168.3.31:7077");        JavaSparkContext sc=new JavaSparkContext(conf);        JavaRDD<String> distFile=sc.textFile("hdfs://192.168.3.31:9000/ceshi/wai.csv");        //System.out.println("along3:"+distFile.first());        List<Integer> listint= Arrays.asList(1,2,3,4,5);        JavaRDD<Integer> distData=sc.parallelize(listint);        //int zong=distFile.map(s->s.length()).reduce((a,b)->a+b);        //int zong= distData.reduce((a, b)->a+b);        //listint.forEach(n-> {if(n>3) System.out.println(n.toString());  });        //System.out.println(distFile.filter(s -> s.length()>20).count());       /* System.out.println( distFile.filter(                new Function<String, Boolean>() {                    public Boolean call(String s){                        return s.contains("石膏线");                    }                }        ).count() );*/        //distFile.foreach(n->System.out.println("ah"));       // long z=distData.filter(a-> a>3).count();       // long z2=distData.reduce((a,b)->a+b);        //JavaRDD<Integer> ge=distFile.map(p->p.length());        //System.out.println(distFile.count() +" ge : "+ge.first());        SQLContext sqlcont=new SQLContext(sc);        //DataFrame df=sqlcont.read().json("hdfs://192.168.3.31:9000/ceshi/jia2.json");        //df.show();        System.out.println("aaaaaaaaaaaaaaaaaaaaaaaa");        //df.select("dw","xm").show();        //df.filter(df.col("age").gt(21)).show();        //df.orderBy("age").show();        //df.write().parquet("hdfs://192.168.3.31:9000/ceshi/xmage2.parquet");        //DataFrame df2=sqlcont.read().parquet("hdfs://192.168.3.31:9000/ceshi/xmage2.parquet");        //df2.where("age>11").show();        /* HiveContext hivecon=new HiveContext(sc.sc());        hivecon.sql("create table mytab(key INT,xm STRING)");        hivecon.sql("insert into mytab values(11,'jingjing')");        hivecon.sql("from mytab select key,xm").show(); */        /*HiveContext hivecon=new HiveContext(sc.sc());        DataFrame df3=hivecon.read().json("hdfs://192.168.3.31:9000/ceshi/jia2.json");        //df3.write().orc("hdfs://192.168.3.31:9000/ceshi/xmage2.orc");        //hivecon.read().orc("hdfs://192.168.3.31:9000/ceshi/xmage2.orc").show();        DataFrame df4=hivecon.read().orc("hdfs://192.168.3.31:9000/ceshi/xmage2.orc");        df4.save("hdfs://192.168.3.31:9000/ceshi/xmage2.orc","orc", SaveMode.Append);        hivecon.read().orc("hdfs://192.168.3.31:9000/ceshi/xmage2.orc").show();*/        //System.out.println(df4.filter(df4.col("age").gt(12)).first().toString());        /* 很好的数据导入接口,用户可以json方式送数据        List<String> jsonData=Arrays.asList(                "[{\"dw\":\"公安厅\",\"age\":40,\"xm\":\"贵\"},{\"dw\":\"学校\",\"age\":20,\"xm\":\"小雪\"}]"        );        JavaRDD<String> rdd2=sc.parallelize(jsonData);        DataFrame df6 =sqlcont.read().json(rdd2);        df6.write().mode(SaveMode.Append).parquet("hdfs://192.168.3.31:9000/ceshi/xmage2.parquet");        sqlcont.read().parquet("hdfs://192.168.3.31:9000/ceshi/xmage2.parquet").show();        */        DataFrame df7=sqlcont.read().parquet("hdfs://192.168.3.31:9000/ceshi/xmage2.parquet");        df7.filter(df7.col("age").gt(18)).show();        df7.registerTempTable("mytab");        sqlcont.sql("select xm,age,dw from mytab where age>19 and age<40").show();        //long md=distFile.map(s->s.length()).reduce((a,b)->{if(a>b) return a; else return b;});        //System.out.println(md);    }}


0 0
原创粉丝点击