Wednesday, August 12, 2015

Apache Zookeeper : monitoring the zookeeper using four letters commands

here are few 4 words commands you can use with zookeeper to know the stats and status of zookeeper

[root@ip-10-0-0-251 ~]# 
[root@ip-10-0-0-251 ~]# echo ruok | nc `hostname` 5181
imok[root@ip-10-0-0-251 ~]# 
[root@ip-10-0-0-251 ~]# echo stat | nc `hostname` 5181
Zookeeper version: 3.4.5-mapr-1406--1, built on 01/19/2015 22:57 GMT
Clients:
 /10.0.0.251:57652[0](queued=0,recved=1,sent=0)
 /10.0.0.251:57623[1](queued=0,recved=9151,sent=9158)
 /10.0.0.251:57643[1](queued=0,recved=9120,sent=9125)
 /10.0.0.251:57622[1](queued=0,recved=9156,sent=9164)
 /10.0.0.251:57624[1](queued=0,recved=12974,sent=12974)
 /10.0.0.251:57620[1](queued=0,recved=9150,sent=9157)
 /10.0.0.251:57644[1](queued=0,recved=7708,sent=7966)
 /10.0.0.251:57626[1](queued=0,recved=9195,sent=9206)
 /10.0.0.251:57619[1](queued=0,recved=9185,sent=9186)
 /10.0.0.251:57621[1](queued=0,recved=9143,sent=9149)

Latency min/avg/max: 0/0/63
Received: 85043
Sent: 85345
Connections: 10
Outstanding: 0
Zxid: 0x136
Mode: standalone
Node count: 59
[root@ip-10-0-0-251 ~]# 
[root@ip-10-0-0-251 ~]# 
[root@ip-10-0-0-251 ~]# echo conf | nc `hostname` 5181
clientPort=5181
dataDir=/opt/mapr/zkdata/version-2
dataLogDir=/opt/mapr/zkdata/version-2
tickTime=2000
maxClientCnxns=100
minSessionTimeout=4000
maxSessionTimeout=40000
serverId=0
[root@ip-10-0-0-251 ~]# echo cons | nc `hostname` 5181
 /10.0.0.251:57623[1](queued=0,recved=9162,sent=9169,sid=0x14f1efc29d70004,lop=PING,est=1439333921242,to=30000,lcxid=0x1d,lzxid=0x136,lresp=1439425306327,llat=0,minlat=0,avglat=0,maxlat=2)
 /10.0.0.251:57643[1](queued=0,recved=9131,sent=9136,sid=0x14f1efc29d70008,lop=PING,est=1439334130760,to=30000,lcxid=0x13,lzxid=0x136,lresp=1439425306958,llat=0,minlat=0,avglat=0,maxlat=2)
 /10.0.0.251:57654[0](queued=0,recved=1,sent=0)
 /10.0.0.251:57622[1](queued=0,recved=9166,sent=9174,sid=0x14f1efc29d70003,lop=PING,est=1439333921221,to=30000,lcxid=0x22,lzxid=0x136,lresp=1439425302064,llat=0,minlat=0,avglat=0,maxlat=5)
 /10.0.0.251:57624[1](queued=0,recved=12989,sent=12989,sid=0x14f1efc29d70005,lop=GETC,est=1439333936044,to=30000,lcxid=0x1aef,lzxid=0x136,lresp=1439425303437,llat=0,minlat=0,avglat=0,maxlat=6)
 /10.0.0.251:57620[1](queued=0,recved=9161,sent=9168,sid=0x14f1efc29d70001,lop=PING,est=1439333921154,to=30000,lcxid=0x1d,lzxid=0x136,lresp=1439425307676,llat=0,minlat=0,avglat=0,maxlat=4)
 /10.0.0.251:57644[1](queued=0,recved=7716,sent=7974,sid=0x14f1efc29d70009,lop=PING,est=1439334142657,to=40000,lcxid=0x394,lzxid=0x136,lresp=1439425303832,llat=0,minlat=0,avglat=0,maxlat=16)
 /10.0.0.251:57626[1](queued=0,recved=9206,sent=9217,sid=0x14f1efc29d70007,lop=PING,est=1439333966309,to=30000,lcxid=0x4f,lzxid=0x136,lresp=1439425306675,llat=0,minlat=0,avglat=0,maxlat=6)
 /10.0.0.251:57619[1](queued=0,recved=9196,sent=9197,sid=0x14f1efc29d70000,lop=PING,est=1439333920963,to=30000,lcxid=0x41,lzxid=0x136,lresp=1439425304945,llat=0,minlat=0,avglat=0,maxlat=63)
 /10.0.0.251:57621[1](queued=0,recved=9153,sent=9159,sid=0x14f1efc29d70002,lop=PING,est=1439333921194,to=30000,lcxid=0x15,lzxid=0x136,lresp=1439425302145,llat=0,minlat=0,avglat=0,maxlat=6)

[root@ip-10-0-0-251 ~]# 
[root@ip-10-0-0-251 ~]# 
[root@ip-10-0-0-251 ~]# echo envi | nc `hostname` 5181
Environment:
zookeeper.version=3.4.5-mapr-1406--1, built on 01/19/2015 22:57 GMT
host.name=ip-10-0-0-251
java.version=1.7.0_45
java.vendor=Oracle Corporation
java.home=/usr/lib/jvm/java-1.7.0-openjdk-1.7.0.45.x86_64/jre
java.class.path=/opt/mapr/zookeeper/zookeeper-3.4.5/bin/../build/classes:/opt/mapr/zookeeper/zookeeper-3.4.5/bin/../build/lib/*.jar:/opt/mapr/zookeeper/zookeeper-3.4.5/bin/../lib/slf4j-log4j12-1.6.1.jar:/opt/mapr/zookeeper/zookeeper-3.4.5/bin/../lib/slf4j-api-1.6.1.jar:/opt/mapr/zookeeper/zookeeper-3.4.5/bin/../lib/netty-3.2.2.Final.jar:/opt/mapr/zookeeper/zookeeper-3.4.5/bin/../lib/log4j-1.2.15.jar:/opt/mapr/zookeeper/zookeeper-3.4.5/bin/../lib/jline-0.9.94.jar:/opt/mapr/zookeeper/zookeeper-3.4.5/bin/../zookeeper-3.4.5-mapr-1406.jar:/opt/mapr/zookeeper/zookeeper-3.4.5/bin/../src/java/lib/*.jar:/opt/mapr/zookeeper/zookeeper-3.4.5/conf::/opt/mapr/lib/maprfs-4.0.2-mapr.jar:/opt/mapr/lib/protobuf-java-2.5.0.jar:/opt/mapr/lib/libprotodefs-4.0.2-mapr.jar:/opt/mapr/lib/baseutils-4.0.2-mapr.jar:/opt/mapr/lib/json-20080701.jar:/opt/mapr/lib/flexjson-2.1.jar:/opt/mapr/lib/commons-codec-1.5.jar
java.library.path=/opt/mapr/lib
java.io.tmpdir=/tmp
java.compiler=
os.name=Linux
os.arch=amd64
os.version=2.6.32-431.el6.x86_64
user.name=mapr
user.home=/home/mapr
user.dir=/
[root@ip-10-0-0-251 ~]# 
[root@ip-10-0-0-251 ~]# 
[root@ip-10-0-0-251 ~]# echo wchc | nc `hostname` 5181
0x14f1efc29d70004
 /services/kvstore/ip-10-0-0-251
 /services/hoststats/ip-10-0-0-251
 /services/hoststats/master
 /nodes/ip-10-0-0-251/services/hoststats
0x14f1efc29d70008
 /nodes/ip-10-0-0-251/services/drill-bits
 /services/drill-bits/ip-10-0-0-251
 /services/drill-bits/master
0x14f1efc29d70003
 /nodes/ip-10-0-0-251/services/cldb
 /services/kvstore/ip-10-0-0-251
 /services/cldb/ip-10-0-0-251
 /datacenter/controlnodes/cldb/active/CLDBRunningMaster
 /services/cldb/master
0x14f1efc29d70005
 /datacenter/controlnodes/cldb/active/CLDBMaster
0x14f1efc29d70009
 /drill/sys.storage_plugins/hive
 /drill/sys.storage_plugins/cp
 /drill/sys.storage_plugins/mongo
 /drill/MyDrillCluster-402-drillbits/9d62d3ba-f9fa-4ad4-9b9c-684b854d41fe
 /drill/sys.storage_plugins/hbase
 /drill/sys.storage_plugins/dfs
0x14f1efc29d70001
 /services/webserver/master
 /nodes/ip-10-0-0-251/services/webserver
 /services/webserver/ip-10-0-0-251
 /services/cldb/master
0x14f1efc29d70007
 /services/drill-bits/master
 /services_config/kvstore/ip-10-0-0-251
 /services/kvstore/ip-10-0-0-251
 /services/services/drill-bits
 /services/services/webserver
 /services_config/drill-bits/ip-10-0-0-251
 /services/cldb/master
 /services/webserver/master
 /services/drill-bits/ip-10-0-0-251
 /services_config/cldb/ip-10-0-0-251
 /datacenter/controlnodes/cldb/active/CLDBMaster
 /services/services/cldb
 /services/services/kvstore
 /services/hoststats/ip-10-0-0-251
 /services/services/hoststats
 /services/cldb/ip-10-0-0-251
 /services_config/hoststats/ip-10-0-0-251
 /services/hoststats/master
 /services/webserver/ip-10-0-0-251
 /services/kvstore/master
 /services_config/webserver/ip-10-0-0-251
0x14f1efc29d70000
 /nodes/ip-10-0-0-251/services/nfs/start
 /nodes/ip-10-0-0-251/services/hbmaster/start
 /nodes/ip-10-0-0-251/services/historyserver/stop
 /nodes/ip-10-0-0-251/sdump
 /nodes/ip-10-0-0-251/services/hbmaster/stop
 /nodes/ip-10-0-0-251/services/nfs/stop
 /nodes/ip-10-0-0-251/services/tasktracker/stop
 /nodes/ip-10-0-0-251/services/fileserver/stop
 /nodes/ip-10-0-0-251/services/resourcemanager/stop
 /nodes/ip-10-0-0-251/services/fileserver/start
 /servers/ip-10-0-0-251
 /nodes/ip-10-0-0-251/services/jobtracker/stop
 /nodes/ip-10-0-0-251/services/nodemanager/start
 /nodes/ip-10-0-0-251/services/nodemanager/stop
 /nodes/ip-10-0-0-251/services/hbregionserver/stop
 /nodes/ip-10-0-0-251/services/historyserver/start
 /nodes/ip-10-0-0-251/services/jobtracker/start
 /nodes/ip-10-0-0-251/services/tasktracker/start
 /datacenter/controlnodes/cldb/active/CLDBRunningMaster
 /nodes/ip-10-0-0-251/services/hbregionserver/start
 /nodes/ip-10-0-0-251/stop
 /nodes/ip-10-0-0-251/services/resourcemanager/start
0x14f1efc29d70002
 /datacenter/license/m7/enabled
 /services/kvstore/ip-10-0-0-251
 /nodes/ip-10-0-0-251/services/kvstore
 /services/kvstore/master


Zookeeper : create,list,delete zknodes using java

quick and dirty way but working, to test and doing create,list and delete zknodes using java

package com.rajkrrsingh.zk;
import java.io.IOException;
import java.util.List;
import java.util.concurrent.CountDownLatch;
import org.apache.zookeeper.CreateMode;
import org.apache.zookeeper.KeeperException;
import org.apache.zookeeper.WatchedEvent;
import org.apache.zookeeper.Watcher;
import org.apache.zookeeper.ZooDefs;
import org.apache.zookeeper.ZooKeeper;
public class ZookeeperRecipe {
private ZooKeeper zk=null;
public ZooKeeper connnect(String hosts,int timeOut) throws IOException, InterruptedException{
final CountDownLatch connectedSignal = new CountDownLatch(1);
this.zk = new ZooKeeper(hosts,timeOut,new Watcher(){
@Override
public void process(WatchedEvent event) {
if(event.getState() == Watcher.Event.KeeperState.SyncConnected){
connectedSignal.countDown();
}
}
});
System.out.println("Before connectedSignal.await();");
connectedSignal.await();
System.out.println("after connectedSignal.await();");
return zk;
}
public void createGroup(String groupName) throws KeeperException, InterruptedException{
String path ="/"+groupName;
zk.create(path, null /* data */, ZooDefs.Ids.OPEN_ACL_UNSAFE, CreateMode.PERSISTENT);
}
public void creatingGroupMembersWithData(String groupName,String member,byte[] data) throws Exception{
String path="/"+groupName+"/"+member+"-"; // this will be sq
System.out.println("created child node with path "+zk.create(path, data, ZooDefs.Ids.OPEN_ACL_UNSAFE,CreateMode.EPHEMERAL_SEQUENTIAL));
}
public void list(String zknode) throws Exception{
String path="/"+zknode;
List<String> children = zk.getChildren(path, new Watcher(){
@Override
public void process(WatchedEvent event) {
if(event.getType()==Event.EventType.NodeChildrenChanged){
System.out.println("node children updated");
}
}});
if(children.isEmpty()){
System.out.println("no children in the zknode");
return;
}
System.out.println("Childrens : "+children+" : "+children.size());
}
private void delete(String zknode) throws Exception {
String path ="/"+zknode;
List<String> childNodes = zk.getChildren(path, false);
//remove the childNodes first
for(String childNode : childNodes){
zk.delete(path+"/"+childNode, -1);
}
zk.delete(path, -1);
System.out.println("Done deleting");
}
public static void main(String[] args) throws Exception{
ZookeeperRecipe zkr = new ZookeeperRecipe();
zkr.connnect("hostname:5181", 5000);
System.out.println("Connected");
System.out.println("Going to Create zk group");
zkr.createGroup("demo-group");
System.out.println("Creating group member zknodes with data");
zkr.creatingGroupMembersWithData("demo-group", "childnode", "data1".getBytes());
System.out.println("Created child node in the group");
System.out.println("listing zknode --> childrens");
zkr.list("demo-group");
System.out.println("Deleting a zknode");
zkr.delete("demo-group");
}
}

Monday, August 10, 2015

Quick and dirty way of Accessing Apache Drill using Drill JDBC connectivity

its a quick and dirty way of accessing the Apache Drill using the Drill-JDBC Driver,make sure you have Drill JDBC DRIVER (drill-jdbc-all-1.1.0.jar ) in the classpath while running your java program.
Java Class:

package com.test;

import java.sql.Connection;
import java.sql.DriverManager;
import java.sql.ResultSet;
import java.sql.Statement;


public class DrillJDBCTest {

public static void main(String[] args) throws Exception{
Class.forName("org.apache.drill.jdbc.Driver");
Connection connection =DriverManager.getConnection("jdbc:drill:zk=node3.mapr.com:5181/drill/my_cluster_com-drillbits");
Statement st = connection.createStatement();
ResultSet rs = st.executeQuery("SELECT * from cp.`employee`");
while(rs.next()){
System.out.println(rs.getString(1));
}

}

}

After running generated class file you will see the following output
:::::::::::::::::::::::::::::::::::::::::::::::::::::::::
 printing  full name of the employee  
:::::::::::::::::::::::::::::::::::::::::::::::::::::::::
Sheri Nowmer
Derrick Whelply
Michael Spence
Maya Gutierrez
Roberta Damstra
DONE


Writing a parquet file using Hadoop mapreduce job

This is a simple example to write a parquet file using the hadoop mapreduce job.
Env: Java 7,Maven 3.0.1,hadoop1

Step 1: Create a simple java project and add the repository information and dependencies in the pom.xml

<repository>
<id>sonatype-nexus-snapshots</id>
<url>https://oss.sonatype.org/content/repositories/snapshots</url>
<releases>
<enabled>false</enabled>
</releases>
<snapshots>
<enabled>true</enabled>
</snapshots>
</repository> <dependency>
<groupId>com.twitter</groupId>
<artifactId>parquet-common</artifactId>
<version>1.1.2-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>com.twitter</groupId>
<artifactId>parquet-encoding</artifactId>
<version>1.1.2-SNAPSHOT</version>
</dependency>
<dependency>
<groupId>com.twitter</groupId>
<artifactId>parquet-column</artifactId>
<version>1.1.2-SNAPSHOT</version>
</dependency>

<dependency>
<groupId>com.twitter</groupId>
<artifactId>parquet-hadoop</artifactId>
<version>1.1.2-SNAPSHOT</version>
</dependency>

<dependency>
<groupId>org.apache.hive</groupId>
<artifactId>hive-exec</artifactId>
<version>0.13.0</version>
</dependency>

Step 2: Write a Mapper implementation
package com.test;

import java.io.IOException;
import org.apache.hadoop.io.LongWritable;
import org.apache.hadoop.hive.ql.io.parquet.writable.BinaryWritable;
import org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriteSupport;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.io.Text;
import org.apache.hadoop.io.Writable;
import org.apache.hadoop.mapreduce.Mapper;
import parquet.io.api.Binary;
public class ParquetMapper extends Mapper<LongWritable, Text, Void, ArrayWritable>{



@Override
protected void setup(Context context) throws IOException, InterruptedException {
super.setup(context);
DataWritableWriteSupport.getSchema(context.getConfiguration());
}

@Override
public void map(LongWritable n, Text line, Context context) throws IOException, InterruptedException {
if(line!= null && line.getLength() > 0) {
String[] parts = line.toString().split("\t");
Writable[] data = new Writable[2];
for(int i =0; i<2; i++) {
data[i] = new BinaryWritable(Binary.fromString(parts[i]));
}
ArrayWritable aw = new ArrayWritable(Writable.class, data);
context.write(null, aw);
}
}
}

Step 3: finally implement the job driver class as follows
package com.test;
import static parquet.schema.PrimitiveType.PrimitiveTypeName.BINARY;
import static parquet.schema.Type.Repetition.OPTIONAL;
import static parquet.schema.Type.Repetition.REQUIRED;

import org.apache.hadoop.conf.Configuration;
import org.apache.hadoop.conf.Configured;
import org.apache.hadoop.fs.Path;
import org.apache.hadoop.hive.ql.io.parquet.write.DataWritableWriteSupport;
import org.apache.hadoop.io.ArrayWritable;
import org.apache.hadoop.mapreduce.Job;
import org.apache.hadoop.mapreduce.lib.input.FileInputFormat;
import org.apache.hadoop.mapreduce.lib.input.TextInputFormat;
import org.apache.hadoop.util.Tool;
import org.apache.hadoop.util.ToolRunner;

import parquet.hadoop.ParquetOutputFormat;
import parquet.schema.MessageType;
import parquet.schema.PrimitiveType;

@SuppressWarnings("deprecation")
public class App extends Configured implements Tool {



public int run(String[] args) throws Exception {

Path inpath = new Path(args[0]);
Path outpath = new Path(args[1]);

Configuration conf = getConf();

conf.set(ParquetOutputFormat.BLOCK_SIZE, Integer.toString(128 * 1024 * 1024));
conf.set(ParquetOutputFormat.COMPRESSION, "SNAPPY");

MessageType messagetype = new MessageType("employee",
new PrimitiveType(REQUIRED, BINARY, "empname"),
new PrimitiveType(OPTIONAL, BINARY, "designation"));

DataWritableWriteSupport.setSchema(messagetype,conf);

System.out.println("Schema: " + messagetype.toString());

Job job = new Job(conf, "parquet-convert");

job.setJarByClass(getClass());
job.setJobName(getClass().getName());
job.setMapOutputKeyClass(Void.class);
job.setMapOutputValueClass(ArrayWritable.class);
job.setOutputKeyClass(Void.class);
job.setOutputValueClass(ArrayWritable.class);
job.setMapperClass(ParquetMapper.class);
job.setNumReduceTasks(0);

job.setInputFormatClass(TextInputFormat.class);
job.setOutputFormatClass(ParquetOutputFormat.class);

FileInputFormat.setInputPaths(job, inpath);
ParquetOutputFormat.setOutputPath(job, outpath);

ParquetOutputFormat.setWriteSupportClass(job, DataWritableWriteSupport.class);

boolean success = job.waitForCompletion(true);
return success ? 0 : 1;
}

public static void main( String[] args ) throws Exception
{
int returnCode = ToolRunner.run(new Configuration(), new App(), args);
System.exit(returnCode);
}

}

Now we are good to go, build the project using maven and run your job on hadoop cluster.make sure you provide enough java heap to map task to avoid OOM.

Reading a parquet files using parquet tools

// Building a parquet tools
git clone https://github.com/Parquet/parquet-mr.git
cd parquet-mr/parquet-tools/
mvn clean package -Plocal
// know the schema of the parquet file
java -jar parquet-tools-1.6.0rc3-SNAPSHOT.jar schema sample.parquet
// Read parquet file
java -jar parquet-tools-1.6.0rc3-SNAPSHOT.jar cat sample.parquet
// Read few lines in parquet file
java -jar parquet-tools-1.6.0rc3-SNAPSHOT.jar head -n5 sample.parquet
// know the meta information of the parquet file
java -jar parquet-tools-1.6.0rc3-SNAPSHOT.jar meta sample.parquet

Saturday, August 8, 2015

Apache Drill : Creating Simple UDF

In this example I will demonstrate you how to create a simple Custom UDF in Apache Drill.
Env : Apache Drill 1.1.0
Java : Jdk-1.7_75

objective of the function is to get the row as an input and return the length of the string.in this example we are getting employee full name as input and returning the length of input employee name.

Step 1 : Create a simple java project using maven and add the drill-1.1.0 dependencies in the pom.xml

<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/maven-v4_0_0.xsd">
<modelVersion>4.0.0</modelVersion>
<groupId>com.mapr.simplefn.test</groupId>
<artifactId>my-simple-drill-fn</artifactId>
<packaging>jar</packaging>
<version>1.0-SNAPSHOT</version>
<name>my-simple-drill-fn</name>
<url>http://maven.apache.org</url>
<dependencies>
<dependency>
<groupId>junit</groupId>
<artifactId>junit</artifactId>
<version>3.8.1</version>
<scope>test</scope>
</dependency>
<dependency>
<groupId>org.apache.drill.exec</groupId>
<artifactId>drill-java-exec</artifactId>
<version>1.1.0</version>
</dependency>
</dependencies>
<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-source-plugin</artifactId>
<version>2.4</version>
<executions>
<execution>
<id>attach-sources</id>
<phase>package</phase>
<goals>
<goal>jar-no-fork</goal>
</goals>
</execution>
</executions>
</plugin>
</plugins>
</build>
</project>

Step 2: Create a java class with the name of EmpNameLength which implements DrillSimpleFunc interface.

package com.mapr.simplefn.test;


import javax.inject.Inject;

import io.netty.buffer.DrillBuf;

import org.apache.drill.exec.expr.DrillSimpleFunc;
import org.apache.drill.exec.expr.annotations.FunctionTemplate;
import org.apache.drill.exec.expr.annotations.Output;
import org.apache.drill.exec.expr.annotations.Param;
import org.apache.drill.exec.expr.holders.NullableVarCharHolder;
import org.apache.drill.exec.expr.holders.VarCharHolder;

@FunctionTemplate(name="empnlen",scope = FunctionTemplate.FunctionScope.SIMPLE, nulls = FunctionTemplate.NullHandling.NULL_IF_NULL)
public class EmpNameLength implements DrillSimpleFunc{

@Param
NullableVarCharHolder input;

@Output
VarCharHolder out;

@Inject
DrillBuf buffer;

@Override
public void setup() {
// TODO Auto-generated method stub

}

@Override
public void eval() {
String stringValue = org.apache.drill.exec.expr.fn.impl.StringFunctionHelpers.toStringFromUTF8(input.start, input.end, input.buffer);
int outValue = stringValue.length();
String outputValue = String.valueOf(outValue);
out.buffer = buffer;
out.start = 0;
out.end = outputValue.getBytes().length;
buffer.setBytes(0,outputValue.getBytes());

}

}

Step 3: Add a empty drill-override.conf in the resources folder of the project.
Step 4: run mvn package which will build the jar and created in the target folder,copy the jars to the on each node of the drill cluster on the location DRILL_HOME/jars/3rdparty/
Step 5: restart the Drillbits and run the query as follows

Expected Result:
0: jdbc:drill:zk=local> select empnlen(full_name) from cp.`employee.json` limit 5;
+---------+
| EXPR$0  |
+---------+
| 12      |
| 15      |
| 14      |
| 14      |
| 15      |
+---------+

Code Snippet to create a table in MapR-Db

private void createTable(String tableName, List<String> cfList)
throws IOException {
final String table = tableName;
final List<String> cfs = cfList;
try {
ugi.doAs(new PrivilegedExceptionAction<Void>() {
@Override
public Void run() throws Exception {
if (!admin.tableExists(table)) {
TableName tableName = TableName.valueOf(table);
HTableDescriptor tableDescriptor = new HTableDescriptor(tableName);
for (String cf : cfs) {
tableDescriptor
.addFamily(new HColumnDescriptor(cf));
}
admin.createTable(tableDescriptor);
log.info("Created table "+table);
}
return null;
}
});
} catch (InterruptedException e) {
e.printStackTrace();
}
}

Reading/Writing a file on MapR-FS (MapR filesystem) using a java program

In this short example I will try to demonstrate a java program to Read and Write MapR filesystem.
Step 1: Add the MapR repository and MapR dependencies in the pom.xml
<?xml version="1.0" encoding="UTF-8"?>
<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd">
<modelVersion>4.0.0</modelVersion>

<groupId>com.mapr</groupId>
<artifactId>maprfs-example</artifactId>
<version>1.0-SNAPSHOT</version>

<repositories>
<repository>
<id>mapr-releases</id>
<url>http://repository.mapr.com/maven/</url>
<snapshots>
<enabled>false</enabled>
</snapshots>
<releases>
<enabled>true</enabled>
</releases>
</repository>
</repositories>

<dependencies>
<dependency>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>1.0.3-mapr-4.1.0</version>
</dependency>
</dependencies>

<build>
<plugins>
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-dependency-plugin</artifactId>
<version>2.10</version>
<executions>
<execution>
<id>copy</id>
<phase>package</phase>
<goals>
<goal>copy</goal>
</goals>
<configuration>
<artifactItems>
<artifactItem>
<groupId>org.apache.hadoop</groupId>
<artifactId>hadoop-core</artifactId>
<version>1.0.3-mapr-4.1.0</version>
<type>jar</type>
<overWrite>true</overWrite>
<outputDirectory>target</outputDirectory>
<destFileName>hadoop-core-1.0.3-mapr-4.1.0.jar</destFileName>
</artifactItem>
</artifactItems>
</configuration>
</execution>
</executions>
</plugin>
<plugin>
<artifactId>maven-assembly-plugin</artifactId>
<configuration>
<finalName>${project.name}-${project.version}</finalName>
<appendAssemblyId>true</appendAssemblyId>
<descriptors>
<descriptor>src/main/assembly/assembly.xml</descriptor>
</descriptors>
</configuration>
</plugin>
</plugins>
</build>

</project>

Step 2: Java program to read and Write to the filesystem.
import java.net.*;
import org.apache.hadoop.fs.*;
import org.apache.hadoop.conf.*;
public class ReadWriteMapRFS
{
public static void main(String args[]) throws Exception {
byte buf[] = new byte[ 65*1024];
int ac = 0;
if (args.length != 1) {
System.out.println("usage: ReadWriteMapRFS pathname");
return;
}

String dirname = args[ac++];

Configuration conf = new Configuration();
conf.set("fs.default.name", "maprfs://mapr.cluster.com:7222");
FileSystem fs = FileSystem.get(conf);

Path testDir = new Path(dirname+"/testdir");
boolean rc = fs.mkdirs( testDir);
if (!rc) {
System.out.println("unable to create directory with path " + testDir);
return;
}
System.out.println("created a new directory on path "+testDir);
Path testfile = new Path( testDir + "/testfile.txt");
System.out.println("now going to create a file inside the directory");

FSDataOutputStream fos = fs.create( testfile,true,512,(short) 1, (long)(64*1024*1024));
fos.write(buf);
fos.close();

System.out.println( "reading a recently created file : " + testfile);
FSDataInputStream istr = fs.open( testfile);
int bb = istr.readInt();
istr.close();
System.out.println( "complete read : DONE");
}
}

Friday, August 7, 2015

Maven Plugin to create jar with source

maven plugin to generate source jar along with the class jars -- run with mvn package

<plugin>
    <groupId>org.apache.maven.plugins</groupId>
    <artifactId>maven-source-plugin</artifactId>
    <version>2.4</version>
    <executions>
        <execution>
            <id>attach-sources</id>
            <phase>package</phase>
            <goals>
                <goal>jar-no-fork</goal>
            </goals>
        </execution>
    </executions>
</plugin>