Java vs Scala

A comparison of Java with Scala

Java vs Scala

Comparison

1: Your code gets shorter in Scala

In Scala your code gets consistently shorter: you don't need getters and setters, case classes provide a lot of functionality for free (see the example bellow), collections are much more compact to use than the ones in Java 7 and even the ones in Java 8, etc.
There are reports of code getting 7x shorter than in Java:
In Java:

public class Person implements Serializable {

    private String firstName;
    private String lastName;
    private String email;
    private String password;

    public Person(
            String firstName, String lastName,
            String email, String password) {
        this.firstName = firstName;
        this.lastName = lastName;
        this.email = email;
        this.password = password;
    }

    public String getFirstName() {
        return firstName;
    }

    public void setFirstName(String firstName) {
        this.firstName = firstName;
    }

    public String getLastName() {
        return lastName;
    }

    public void setLastName(String lastName) {
        this.lastName = lastName;
    }

    public String getEmail() {
        return email;
    }

    public void setEmail(String email) {
        this.email = email;
    }

    public String getPassword() {
        return password;
    }

    public void setPassword(String password) {
        this.password = password;
    }

    @Override
    public boolean equals(Object o) {
        if (this == o) {
            return true;
        }
        if (o == null || getClass() != o.getClass()) {
            return false;
        }
        Person person = (Person) o;
        if (firstName != null ?
                !firstName.equals(person.firstName) :
                person.firstName != null) {
            return false;
        }
        if (lastName != null ?
                !lastName.equals(person.lastName) :
                person.lastName != null) {
            return false;
        }
        if (email != null ?
                !email.equals(person.email) :
                person.email != null) {
            return false;
        }
        if (password != null ?
                !password.equals(person.password) :
                person.password != null) {
            return false;
        }
        return true;
    }

    @Override
    public int hashCode() {
        int result = firstName != null ? firstName.hashCode() : 0;
        result = 31 * result + (lastName != null ? lastName.hashCode() : 0);
        result = 31 * result + (email != null ? email.hashCode() : 0);
        result = 31 * result + (password != null ? password.hashCode() : 0);
        return result;
    }

    @Override
    public String toString() {
        return "Person(" + firstName + "," +
                lastName + "," + email + "," + password + ")";
    }
}
            
In Scala:

case class Person(
                  var firstName: String,
                  var lastName: String,
                  var email: String,
                  var password: String)
            

2: Scala's powerful pattern matching

In Java you can mostly only match primitive types (byte, short, char, int, Strings, and Enums):

public class Main {
    public static void main(String[] args) {
        int i = 123;

        switch (i) {
            case 1:
                System.out.println("Matched 1");
        }
    }
}
            
In Scala you can match pretty much everything, with very powerful additions:

class Main extends App {

  abstract class Animal
  case class Cat(name: String) extends Animal
  case class Dog(age: Int) extends Animal

  val something: Object = new Dog(3)

  something match {
    // Match a number:
    case 123 => println("Found integer 123")
    // Match one of possible values:
    case -10 | 0 | 10 => println("Found -10, 0, or 10")
    // Match a number with any value:
    case i: Int => println("Found an int")
    // Match any Dog:
    case dog: Dog => println("Found a Dog")
    // Match a Dog with 10 years:
    case Dog(10) => println("Found a 10 year's old Dog")
    // Match any Dog, and extracts the age:
    case Dog(age) if age > 3 => println("Found a Dog with age > 3")
    // Match any Dog with age > 3, and extracts the age:
    case Dog(age) => println("Found a Dog with age: " + age)
    // Match any Animal:
    case animal: Animal => println("Found an animal")
  }
}
            

3: Multiple classes per file

In Java your classes have more boilerplate which, combined with having only one class per file, reduces the ease to navigate in the code and maintain it.

/*
 * About 430 lines of code:
 */

abstract class Person {

    public abstract Long getId();

    public abstract String getName();
}

class Employee extends Person {

    private Long id;
    private String name;
    private Double salary;

    public Employee(Long id, String name, Double salary) {
        this.id = id;
        this.name = name;
        this.salary = salary;
    }

    @Override
    public Long getId() {
        return id;
    }

    public void setId(Long id) {
        this.id = id;
    }

    @Override
    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public Double getSalary() {
        return salary;
    }

    public void setSalary(Double salary) {
        this.salary = salary;
    }

    @Override
    public String toString() {
        return "Employee{" +
                "id=" + id +
                ", name='" + name + '\'' +
                ", salary=" + salary +
                '}';
    }

    @Override
    public boolean equals(Object o) {
        if (this == o) return true;
        if (o == null || getClass() != o.getClass()) return false;

        Employee employee = (Employee) o;

        if (id != null ? !id.equals(employee.id) : employee.id != null) return false;
        if (name != null ? !name.equals(employee.name) : employee.name != null) return false;
        if (salary != null ? !salary.equals(employee.salary) : employee.salary != null) return false;

        return true;
    }

    @Override
    public int hashCode() {
        int result = id != null ? id.hashCode() : 0;
        result = 31 * result + (name != null ? name.hashCode() : 0);
        result = 31 * result + (salary != null ? salary.hashCode() : 0);
        return result;
    }
}

class Customer extends Person {

    private Long id;
    private String name;
    private String email;

    public Customer(Long id, String name, String email) {
        this.id = id;
        this.name = name;
        this.email = email;
    }

    @Override
    public Long getId() {
        return id;
    }

    public void setId(Long id) {
        this.id = id;
    }

    @Override
    public String getName() {
        return name;
    }

    public void setName(String name) {
        this.name = name;
    }

    public String getEmail() {
        return email;
    }

    public void setEmail(String email) {
        this.email = email;
    }

    @Override
    public String toString() {
        return "Customer{" +
                "id=" + id +
                ", name='" + name + '\'' +
                ", email='" + email + '\'' +
                '}';
    }

    @Override
    public boolean equals(Object o) {
        if (this == o) return true;
        if (o == null || getClass() != o.getClass()) return false;

        Customer customer = (Customer) o;

        if (email != null ? !email.equals(customer.email) : customer.email != null) return false;
        if (id != null ? !id.equals(customer.id) : customer.id != null) return false;
        if (name != null ? !name.equals(customer.name) : customer.name != null) return false;

        return true;
    }

    @Override
    public int hashCode() {
        int result = id != null ? id.hashCode() : 0;
        result = 31 * result + (name != null ? name.hashCode() : 0);
        result = 31 * result + (email != null ? email.hashCode() : 0);
        return result;
    }
}

class Payment {

    private Long id;
    private Customer customer;
    private Employee responsible;
    private Date submittedDate;
    private Date lastModifiedDate;
    private Double value;

    public Payment(Long id, Customer customer, Employee responsible, Date submittedDate, Date lastModifiedDate, Double value) {
        this.id = id;
        this.customer = customer;
        this.responsible = responsible;
        this.submittedDate = submittedDate;
        this.lastModifiedDate = lastModifiedDate;
        this.value = value;
    }

    public Long getId() {
        return id;
    }

    public void setId(Long id) {
        this.id = id;
    }

    public Customer getCustomer() {
        return customer;
    }

    public void setCustomer(Customer customer) {
        this.customer = customer;
    }

    public Employee getResponsible() {
        return responsible;
    }

    public void setResponsible(Employee responsible) {
        this.responsible = responsible;
    }

    public Date getSubmittedDate() {
        return submittedDate;
    }

    public void setSubmittedDate(Date submittedDate) {
        this.submittedDate = submittedDate;
    }

    public Date getLastModifiedDate() {
        return lastModifiedDate;
    }

    public void setLastModifiedDate(Date lastModifiedDate) {
        this.lastModifiedDate = lastModifiedDate;
    }

    public Double getValue() {
        return value;
    }

    public void setValue(Double value) {
        this.value = value;
    }

    @Override
    public String toString() {
        return "Payment{" +
                "id=" + id +
                ", customer=" + customer +
                ", responsible=" + responsible +
                ", submittedDate=" + submittedDate +
                ", lastModifiedDate=" + lastModifiedDate +
                ", value=" + value +
                '}';
    }

    @Override
    public boolean equals(Object o) {
        if (this == o) return true;
        if (o == null || getClass() != o.getClass()) return false;

        Payment payment = (Payment) o;

        if (customer != null ? !customer.equals(payment.customer) : payment.customer != null) return false;
        if (id != null ? !id.equals(payment.id) : payment.id != null) return false;
        if (lastModifiedDate != null ? !lastModifiedDate.equals(payment.lastModifiedDate) : payment.lastModifiedDate != null)
            return false;
        if (responsible != null ? !responsible.equals(payment.responsible) : payment.responsible != null) return false;
        if (submittedDate != null ? !submittedDate.equals(payment.submittedDate) : payment.submittedDate != null)
            return false;
        if (value != null ? !value.equals(payment.value) : payment.value != null) return false;

        return true;
    }

    @Override
    public int hashCode() {
        int result = id != null ? id.hashCode() : 0;
        result = 31 * result + (customer != null ? customer.hashCode() : 0);
        result = 31 * result + (responsible != null ? responsible.hashCode() : 0);
        result = 31 * result + (submittedDate != null ? submittedDate.hashCode() : 0);
        result = 31 * result + (lastModifiedDate != null ? lastModifiedDate.hashCode() : 0);
        result = 31 * result + (value != null ? value.hashCode() : 0);
        return result;
    }
}

class Project {

    private Long id;
    private Double budget;
    private Customer customer;
    private Employee manager;
    private ArrayList<Employee> team;

    public Project(Long id, Double budget, Customer customer, Employee manager, ArrayList<Employee> team) {
        this.id = id;
        this.budget = budget;
        this.customer = customer;
        this.manager = manager;
        this.team = team;
    }

    public Long getId() {
        return id;
    }

    public void setId(Long id) {
        this.id = id;
    }

    public Double getBudget() {
        return budget;
    }

    public void setBudget(Double budget) {
        this.budget = budget;
    }

    public Customer getCustomer() {
        return customer;
    }

    public void setCustomer(Customer customer) {
        this.customer = customer;
    }

    public Employee getManager() {
        return manager;
    }

    public void setManager(Employee manager) {
        this.manager = manager;
    }

    public ArrayList<Employee> getTeam() {
        return team;
    }

    public void setTeam(ArrayList<Employee> team) {
        this.team = team;
    }

    @Override
    public String toString() {
        return "Project{" +
                "id=" + id +
                ", budget=" + budget +
                ", customer=" + customer +
                ", manager=" + manager +
                ", team=" + team +
                '}';
    }

    @Override
    public boolean equals(Object o) {
        if (this == o) return true;
        if (o == null || getClass() != o.getClass()) return false;

        Project project = (Project) o;

        if (budget != null ? !budget.equals(project.budget) : project.budget != null) return false;
        if (customer != null ? !customer.equals(project.customer) : project.customer != null) return false;
        if (id != null ? !id.equals(project.id) : project.id != null) return false;
        if (manager != null ? !manager.equals(project.manager) : project.manager != null) return false;
        if (team != null ? !team.equals(project.team) : project.team != null) return false;

        return true;
    }

    @Override
    public int hashCode() {
        int result = id != null ? id.hashCode() : 0;
        result = 31 * result + (budget != null ? budget.hashCode() : 0);
        result = 31 * result + (customer != null ? customer.hashCode() : 0);
        result = 31 * result + (manager != null ? manager.hashCode() : 0);
        result = 31 * result + (team != null ? team.hashCode() : 0);
        return result;
    }
}

class ProjectFile {

    private Long id;
    private Long projectId;
    private String filename;
    private Long size;

    public ProjectFile(Long id, Long projectId, String filename, Long size) {
        this.id = id;
        this.projectId = projectId;
        this.filename = filename;
        this.size = size;
    }

    public Long getId() {
        return id;
    }

    public void setId(Long id) {
        this.id = id;
    }

    public Long getProjectId() {
        return projectId;
    }

    public void setProjectId(Long projectId) {
        this.projectId = projectId;
    }

    public String getFilename() {
        return filename;
    }

    public void setFilename(String filename) {
        this.filename = filename;
    }

    public Long getSize() {
        return size;
    }

    public void setSize(Long size) {
        this.size = size;
    }

    @Override
    public String toString() {
        return "ProjectFile{" +
                "id=" + id +
                ", projectId=" + projectId +
                ", filename='" + filename + '\'' +
                ", size=" + size +
                '}';
    }

    @Override
    public boolean equals(Object o) {
        if (this == o) return true;
        if (o == null || getClass() != o.getClass()) return false;

        ProjectFile that = (ProjectFile) o;

        if (filename != null ? !filename.equals(that.filename) : that.filename != null) return false;
        if (id != null ? !id.equals(that.id) : that.id != null) return false;
        if (projectId != null ? !projectId.equals(that.projectId) : that.projectId != null) return false;
        if (size != null ? !size.equals(that.size) : that.size != null) return false;

        return true;
    }

    @Override
    public int hashCode() {
        int result = id != null ? id.hashCode() : 0;
        result = 31 * result + (projectId != null ? projectId.hashCode() : 0);
        result = 31 * result + (filename != null ? filename.hashCode() : 0);
        result = 31 * result + (size != null ? size.hashCode() : 0);
        return result;
    }
}
            
Combined with much shorter syntax, having several classes per file is really useful. Consider how many files and lines of code this modeling would take in Java:

abstract class Person {
  val id: Long
  val name: String
}

case class Employee(
                     id: Long,
                     name: String,
                     salary: Double
                     ) extends Person

case class Customer(
                     id: Long,
                     name: String,
                     email: String
                     ) extends Person

case class Payment(
                    id: Long,
                    customer: Customer,
                    responsible: Employee,
                    submittedDate: Date,
                    lastModifiedDate: Date,
                    value: Double
                    )

case class Project(
                    id: Long,
                    budget: Double,
                    customer: Customer,
                    manager: Employee,
                    team: Seq[Employee]
                    )

case class ProjectFile(
                        id: Long,
                        projectId: Long,
                        filename: String,
                        size: Long
                        )
            

4: Better collections

Java 8 brings huge improvements to the Java collections, but they are still more cumbersome than in Scala, and don't quite fit in with the old ones:

public class Main {
  public static void main(String[] args) {

    List<Person> persons =
            Arrays.asList(
                    new Person("John Doe", 26),
                    new Person("Jane Doe", 21),
                    new Person("Mike", 28),
                    new Person("Anne", 16),
                    new Person("John", 14));

    Map<Character, List<String>> firstLetterIndex =
            persons.stream()
                    .map(p -> p.getName())
                    .sorted()
                    .collect(Collectors.groupingBy(it -> it.charAt(0)));

    // This variable cannot be changed, otherwise it won't work with the filter:
    final Integer adultAge = 18;

    List<Person> adults = persons.stream().filter(p -> p.getAge() >= adultAge).collect(Collectors.toList());

    Integer sumOfAges = persons.stream().map(p -> p.getAge()).reduce(0, (a1, a2) -> a1 + a2);
  }

  static class Person {
    private String name;
    private Integer age;

    public Integer getAge() {
      return age;
    }

    public void setAge(Integer age) {
      this.age = age;
    }

    public String getName() {
      return name;
    }

    public void setName(String name) {
      this.name = name;
    }

    public Person(String name, Integer age) {
      this.name = name;
      this.age = age;
    }
  }
}
            
As you can see in the code bellow, Scala collections' code gets much shorter and interacts better with its surroundings (ie.: no need for stream() or collect()):

class Main extends App {

  val persons = List(
    new Person("John Doe", 26),
    new Person("Jane Doe", 21),
    new Person("Mike", 28),
    new Person("Anne", 16),
    new Person("John", 14))

  val firstLetterIndex =
    persons.map(_.name).sorted.groupBy(_.charAt(0))

  // In Scala, this variable can be changed and still be used in the filter:
  var adultAge = 18

  val adults = persons.filter(_.age >= adultAge)

  val sumOfAges = persons.map(_.age).foldLeft(0)(_ + _)

  case class Person(name: String, age: Int)
}

5: Implicit methods

Implicit methods allows you, among other things, to extend an existing class with additional functionality without changing the class itself (which may be on a library you can't control).
In this example we're implicitly adding the method 'readToString()' to java.io.File:

class Main extends App {

  import java.io.File
  import scala.io.Source

  implicit class FileUtils(f: File) {
    // We are creating a method that can be used in any java.io.File:
    def readToString() =
      Source.fromFile(f).getLines().mkString(System.lineSeparator())
  }

  val myFile: File = new File("/some/location/my_file.txt")
  // java.io.File doesn't have this method, but it is defined implicitly (on top):
  val contents: String = myFile.readToString()

}

6: Traits

Traits are a great tool for modular, reusable design. They provide a kind of safe multiple inheritance and can contain both method implementations and state: (Java 8 introduced default methods in interfaces, but they are not nearly as powerful)

class Main extends App {

  trait Animal { var weight: Double; def move(): Unit }

  // Traits can have methods:
  trait FlyingAnimal extends Animal { def move() = println("I'm flying!") }
  trait WalkingAnimal extends Animal { def move() = println("I'm walking!") }

  // (Traits methods can be overriden):
  trait FlyingWalkingAnimal extends FlyingAnimal with WalkingAnimal {
    override def move() = println("I'm flying, or walking maybe!")
  }

  // Traits can have state:
  trait ColoredAnimal extends Animal { val color: String }
  trait LightColorAnimal extends ColoredAnimal { val color = "light" }
  trait DarkColorAnimal extends ColoredAnimal { val color = "dark" }

  trait HeavyAnimal extends Animal { var weight = 100d }
  trait LightAnimal extends Animal { var weight = 20d }

  trait AnimalInZoo

  // You can inherit multiple traits:
  class Lion    extends WalkingAnimal       with HeavyAnimal   with LightColorAnimal
  class Dog     extends WalkingAnimal       with LightAnimal   with DarkColorAnimal
  class Seagull extends FlyingWalkingAnimal with LightAnimal   with LightColorAnimal
  class Eagle   extends FlyingAnimal        with LightAnimal   with DarkColorAnimal

  val wildLion = new Lion()
  // This lion instance (only) is in the zoo!:
  val zooLion = new Seagull() with AnimalInZoo
}
Java 8 introduced default methods in interfaces, but they are not nearly as powerful - they cannot have, state for example. As Brian Goetz (Java Language Architect at Oracle) said on the lambda-dev mailing list: "The key goal of adding default methods to Java was “interface evolution”, not “poor man’s traits.".

7: Lots more...!

There are lots of more small language features which are really useful. Bellow is an example of lazy vals, default argument values, named arguments, multiline strings, string interpolation, and the list could go on:

class Main extends App {
  var displayAge = false

  // This value is only computed if it is used:
  lazy val averageAgeInUSA = {
    Thread.sleep(1000) /* Long call to the database... */
    35
  }

  // Because displayAge is false, the value is never computed
  if (displayAge) {
    println("Average age is: " + averageAgeInUSA)
  }

  class Image(
               // Methods, and constructors,
               // can have default values:
               colorMode: String = "RGBA",
               width: Int = 800,
               height: Int = 600,
               autoScaling: Boolean = false
               )

  val img1 = new Image() /* <-- takes all default values */
  val img2 = new Image("CMYK") /* <-- colorMode different from default */
  // You can pass arguments by name:
  val img3 = new Image(autoScaling = true) /* <-- autoScaling different from default */

  println(s"""Scala allows you to have
             |multiline strings, and even
             |using variables directly in those
             |strings with string interpolation.
             |Like this: $displayAge.
             |""".stripMargin)
}

8: A practical example - Spark

The Apache Spark framework (written in Scala) can be used in both Scala and Java. They have several examples of use cases on Github written in Scala and Java, here is an example (PageRank) - see the difference in the number of lines and the visual complexity of the code:

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.spark.examples;



import scala.Tuple2;

import com.google.common.collect.Iterables;

import org.apache.spark.SparkConf;
import org.apache.spark.api.java.JavaPairRDD;
import org.apache.spark.api.java.JavaRDD;
import org.apache.spark.api.java.JavaSparkContext;
import org.apache.spark.api.java.function.Function;
import org.apache.spark.api.java.function.Function2;
import org.apache.spark.api.java.function.PairFlatMapFunction;
import org.apache.spark.api.java.function.PairFunction;

import java.util.ArrayList;
import java.util.List;
import java.util.Iterator;
import java.util.regex.Pattern;

/**
 * Computes the PageRank of URLs from an input file. Input file should
 * be in format of:
 * URL         neighbor URL
 * URL         neighbor URL
 * URL         neighbor URL
 * ...
 * where URL and their neighbors are separated by space(s).
 *
 * This is an example implementation for learning how to use Spark. For more conventional use,
 * please refer to org.apache.spark.graphx.lib.PageRank
 */
public final class JavaPageRank {
  private static final Pattern SPACES = Pattern.compile("\\s+");

  static void showWarning() {
    String warning = "WARN: This is a naive implementation of PageRank " +
            "and is given as an example! \n" +
            "Please use the PageRank implementation found in " +
            "org.apache.spark.graphx.lib.PageRank for more conventional use.";
    System.err.println(warning);
  }

  private static class Sum implements Function2<Double, Double, Double> {
    @Override
    public Double call(Double a, Double b) {
      return a + b;
    }
  }

  public static void main(String[] args) throws Exception {
    if (args.length < 2) {
      System.err.println("Usage: JavaPageRank <file> <number_of_iterations>");
      System.exit(1);
    }

    showWarning();

    SparkConf sparkConf = new SparkConf().setAppName("JavaPageRank");
    JavaSparkContext ctx = new JavaSparkContext(sparkConf);

    // Loads in input file. It should be in format of:
    //     URL         neighbor URL
    //     URL         neighbor URL
    //     URL         neighbor URL
    //     ...
    JavaRDD<String> lines = ctx.textFile(args[0], 1);

    // Loads all URLs from input file and initialize their neighbors.
    JavaPairRDD<String, Iterable<String>> links = lines.mapToPair(new PairFunction<String, String, String>() {
      @Override
      public Tuple2<String, String> call(String s) {
        String[] parts = SPACES.split(s);
        return new Tuple2<String, String>(parts[0], parts[1]);
      }
    }).distinct().groupByKey().cache();

    // Loads all URLs with other URL(s) link to from input file and initialize ranks of them to one.
    JavaPairRDD<String, Double> ranks = links.mapValues(new Function<Iterable<String>, Double>() {
      @Override
      public Double call(Iterable<String> rs) {
        return 1.0;
      }
    });

    // Calculates and updates URL ranks continuously using PageRank algorithm.
    for (int current = 0; current < Integer.parseInt(args[1]); current++) {
      // Calculates URL contributions to the rank of other URLs.
      JavaPairRDD<String, Double> contribs = links.join(ranks).values()
        .flatMapToPair(new PairFlatMapFunction<Tuple2<Iterable<String>, Double>, String, Double>() {
          @Override
          public Iterable<Tuple2<String, Double>> call(Tuple2<Iterable<String>, Double> s) {
            int urlCount = Iterables.size(s._1);
            List<Tuple2<String, Double>> results = new ArrayList<Tuple2<String, Double>>();
            for (String n : s._1) {
              results.add(new Tuple2<String, Double>(n, s._2() / urlCount));
            }
            return results;
          }
      });

      // Re-calculates URL ranks based on neighbor contributions.
      ranks = contribs.reduceByKey(new Sum()).mapValues(new Function<Double, Double>() {
        @Override
        public Double call(Double sum) {
          return 0.15 + sum * 0.85;
        }
      });
    }

    // Collects all URL ranks and dump them to console.
    List<Tuple2<String, Double>> output = ranks.collect();
    for (Tuple2<?,?> tuple : output) {
        System.out.println(tuple._1() + " has rank: " + tuple._2() + ".");
    }

    ctx.stop();
  }
}
From: JavaPageRank.java

/*
 * Licensed to the Apache Software Foundation (ASF) under one or more
 * contributor license agreements.  See the NOTICE file distributed with
 * this work for additional information regarding copyright ownership.
 * The ASF licenses this file to You under the Apache License, Version 2.0
 * (the "License"); you may not use this file except in compliance with
 * the License.  You may obtain a copy of the License at
 *
 *    http://www.apache.org/licenses/LICENSE-2.0
 *
 * Unless required by applicable law or agreed to in writing, software
 * distributed under the License is distributed on an "AS IS" BASIS,
 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 * See the License for the specific language governing permissions and
 * limitations under the License.
 */

package org.apache.spark.examples

import org.apache.spark.SparkContext._
import org.apache.spark.{SparkConf, SparkContext}

/**
 * Computes the PageRank of URLs from an input file. Input file should
 * be in format of:
 * URL         neighbor URL
 * URL         neighbor URL
 * URL         neighbor URL
 * ...
 * where URL and their neighbors are separated by space(s).
 *
 * This is an example implementation for learning how to use Spark. For more conventional use,
 * please refer to org.apache.spark.graphx.lib.PageRank
 */
object SparkPageRank {

  def showWarning() {
    System.err.println(
      """WARN: This is a naive implementation of PageRank and is given as an example!
        |Please use the PageRank implementation found in org.apache.spark.graphx.lib.PageRank
        |for more conventional use.
      """.stripMargin)
  }

  def main(args: Array[String]) {
    if (args.length < 1) {
      System.err.println("Usage: SparkPageRank <file> <iter>")
      System.exit(1)
    }

    showWarning()

    val sparkConf = new SparkConf().setAppName("PageRank")
    val iters = if (args.length > 0) args(1).toInt else 10
    val ctx = new SparkContext(sparkConf)
    val lines = ctx.textFile(args(0), 1)
    val links = lines.map{ s =>
      val parts = s.split("\\s+")
      (parts(0), parts(1))
    }.distinct().groupByKey().cache()
    var ranks = links.mapValues(v => 1.0)

    for (i <- 1 to iters) {
      val contribs = links.join(ranks).values.flatMap{ case (urls, rank) =>
        val size = urls.size
        urls.map(url => (url, rank / size))
      }
      ranks = contribs.reduceByKey(_ + _).mapValues(0.15 + 0.85 * _)
    }

    val output = ranks.collect()
    output.foreach(tup => println(tup._1 + " has rank: " + tup._2 + "."))

    ctx.stop()
  }
}
From: SparkPageRank.scala
Do you have more suggestions? Leave your comment bellow!
Contact Us
Say Hey