package com.semanticsquare.functional.streams;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.List;
import java.util.function.Function;
import java.util.function.Predicate;
import java.util.stream.Stream;
import org.htmlcleaner.HtmlCleaner;
public class StreamsDemo {
static String doc1 = "
One of the most common uses of streams is to represent queries over data in collections";
static String doc2 = "Information integration systems provide valuable services to users by integrating information from a number of autonomous, heterogeneous and distributed Web sources";
static String doc3 = "Solr is the popular, blazing fast open source enterprise search platform from the Apache Lucene";
static String doc4 = "Java 8 goes one more step ahead and has developed a streams API which lets us think about parallelism";
static List documents = new ArrayList<>(Arrays.asList(doc1, doc2, doc3, doc4));
private static void imperative() {
System.out.println("Imperative: \n");
for (String doc : documents) {
Predicate filter = d -> d.contains("stream");
if (filter.test(doc)) {
Function htmlCleaner = Indexer::stripHtmlTags;
doc = htmlCleaner.apply(doc);
Function stopwordRemover = Indexer::removeStopwords;
doc = stopwordRemover.apply(doc);
System.out.println(doc);
}
}
}
private static void declarative() {
System.out.println("\n\nDeclarative: \n");
/*
// Stream pipeline (a common structure):
(a) set-up stream source (~ tables in SQL world)
(b) 0 or more intermediate operations (~ WHERE clause) -- lazy
& return Stream, i.e., transforms a stream into another stream
(c) terminal operation (~ column names) -- eager
& return NON-STREAM. Terminates (closes) a stream
*/
Stream stream = Stream.of(doc1, doc2, doc3, doc4)//Arrays.stream(new String[]{doc1, doc2, doc3, doc4})//documents.stream()
.filter(d -> d.contains("stream"))
.map(Indexer::stripHtmlTags)
.map(Indexer::removeStopwords);
print(stream);
//.forEach(System.out::println);
}
private static void print(Stream stream) {
stream.forEach(System.out::println);
//stream.forEach(System.out::println);
}
public static void main(String[] args) {
imperative();
declarative();
}
}
class Indexer {
private static List stopWords = Arrays.asList("of", "the", "a", "is", "to", "in", "and");
static String stripHtmlTags(String doc) {
System.out.println("In stripHtmlTags");
return new HtmlCleaner().clean(doc).getText().toString();
}
static String removeStopwords(String doc) {
StringBuilder sb = new StringBuilder();
for (String word : doc.split(" ")) {
if (!stopWords.contains(word))
sb.append(word).append(" ");
}
return sb.toString();
}
}