Thursday 24 March 2011

Hacking LinkedIn API – Take 2

I had taken a long holiday from work and from internet. So I hadn’t touched this blog till now. I discovered that many visits to my blog was to one of my post on Hacking LinkedIn API.

When the LinkedIn APIs first came out I played with it and bypassed the convoluted and stupid OAuth manual process. I documented the approach in my blog post. However, soon after I posted it, LinkedIn had changed the login page and the access code page so that the HTML posting and scraping code no longer worked. I promised to update my Java code, so here it is.

This time, I am using a later version of the Java wrapper for the LinkedIn APIs – LinkedIn-J 1.0.361. The Java API has totally changed from the previous one I used.

The main difference of my 2nd attempt of hacking the APIs are as following:

Using HTMLEditorKit

Once you go to the authorisation URL returned by LinkedIn, it displays a login form (you must clear your browser’s cookie to disable the auto-login). In this login form, there are a number of hidden fields just like before. However, this time LinkedIn had added a few more fields and a dynamic one – named csrfToken.  When we submit the form, we must include all the hidden field values as well. So we need to parse this HTML string to retrieve the dynamic field values. I used HTMLEditorKit library because it’s part of Java Swing so no external JARs are required. The login form looks something like this.

...

So to retrieve the field values, I added a HTML parser callback class.

class ReportAttributes extends HTMLEditorKit.ParserCallback {
 public String csrfToken, sourceAlias;

 public void handleStartTag(HTML.Tag tag, MutableAttributeSet attributes, int position) {
  this.listAttributes(attributes);
 }
 public void handleSimpleTag(HTML.Tag tag, MutableAttributeSet attributes, int position) { 
  this.listAttributes(attributes); 
 } 
 private void listAttributes(AttributeSet attributes) {
  if (attributes.containsAttribute(HTML.Attribute.ID, "csrfToken-oauthAuthorizeForm")) {
   csrfToken=attributes.getAttribute(HTML.Attribute.VALUE).toString();
   System.out.println("csrfToken="+csrfToken);
  } else if (attributes.containsAttribute(HTML.Attribute.ID, "sourceAlias-oauthAuthorizeForm")) {
   sourceAlias=attributes.getAttribute(HTML.Attribute.VALUE).toString();
   System.out.println("sourceAlias="+sourceAlias);
  }
 }
}

Enabling Cookies

It turned out that you must enable cookies otherwise LinkedIn will complain when you try to submit the login form. So here is the snippet for enabling cookie.

CookieManager manager = new CookieManager();
 manager.setCookiePolicy(CookiePolicy.ACCEPT_ALL);
 CookieHandler.setDefault(manager);

The overall structure of the code is pretty similar to before. Here is the full source code. Just modify the highlighted lines and it should just work for you.

package com.laws.LinkedIn;

import java.io.BufferedReader;
import java.io.DataOutputStream;
import java.io.IOException;
import java.io.InputStream;
import java.io.InputStreamReader;
import java.net.CookieHandler;
import java.net.CookieManager;
import java.net.CookiePolicy;
import java.net.HttpURLConnection;
import java.net.URL;

import javax.swing.text.AttributeSet;
import javax.swing.text.MutableAttributeSet;
import javax.swing.text.html.HTML;
import javax.swing.text.html.HTMLEditorKit;

import com.google.code.linkedinapi.client.LinkedInApiClient;
import com.google.code.linkedinapi.client.LinkedInApiClientFactory;
import com.google.code.linkedinapi.client.oauth.LinkedInAccessToken;
import com.google.code.linkedinapi.client.oauth.LinkedInOAuthService;
import com.google.code.linkedinapi.client.oauth.LinkedInOAuthServiceFactory;
import com.google.code.linkedinapi.client.oauth.LinkedInRequestToken;
import com.google.code.linkedinapi.schema.Person;

class ParserGetter extends HTMLEditorKit {
   public HTMLEditorKit.Parser getParser() {
     return super.getParser();
   }
 }
class ReportAttributes extends HTMLEditorKit.ParserCallback {
 public String csrfToken, sourceAlias;

  public void handleStartTag(HTML.Tag tag, MutableAttributeSet attributes, int position) {
   this.listAttributes(attributes);
  }
  public void handleSimpleTag(HTML.Tag tag, MutableAttributeSet attributes, int position) { 
   this.listAttributes(attributes); 
  } 
  private void listAttributes(AttributeSet attributes) {
   if (attributes.containsAttribute(HTML.Attribute.ID, "csrfToken-oauthAuthorizeForm")) {
  csrfToken=attributes.getAttribute(HTML.Attribute.VALUE).toString();
  System.out.println("csrfToken="+csrfToken);
  } else if (attributes.containsAttribute(HTML.Attribute.ID, "sourceAlias-oauthAuthorizeForm")) {
  sourceAlias=attributes.getAttribute(HTML.Attribute.VALUE).toString();
  System.out.println("sourceAlias="+sourceAlias);
   }
  }
}


public class Main {
 static final String apiKey="your api key";
 static final String secretKey="your secret key";
 static final String login="name%40company.com";
 static final String password="password";
 
 static public String getPin(String authUrl, String token) {
  DataOutputStream dataOut;
  ParserGetter kit = new ParserGetter();
     HTMLEditorKit.Parser parser = kit.getParser();
     ReportAttributes callback = new ReportAttributes();
     // must enable cookie, otherwise LinkedIn will not give you the access code
     CookieManager manager = new CookieManager();
     manager.setCookiePolicy(CookiePolicy.ACCEPT_ALL);
     CookieHandler.setDefault(manager);


        try {
         // this section gets the LinkedIn login form.
            URL url = new URL(authUrl);
            HttpURLConnection con = (HttpURLConnection)url.openConnection();
            con.setRequestMethod("POST");
            con.setUseCaches(false);
            con.setDoInput(true);
            con.setDoOutput(true);
            con.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
            //SSLException thrown here if server certificate is invalid
            InputStreamReader reader = new InputStreamReader(con.getInputStream());
            parser.parse(reader, callback, true);
            System.out.println("-------------------------------------");
            
            // POST the login form and get the access/verification code.
            url = new URL("https://www.linkedin.com/uas/oauth/authorize/submit");
            con = (HttpURLConnection)url.openConnection();
            con.setRequestMethod("POST");
            con.setUseCaches(false);
            con.setDoInput(true);
            con.setDoOutput(true);
            con.setRequestProperty("User-Agent", "Mozilla/4.0");
            con.setRequestProperty("Content-Type", "application/x-www-form-urlencoded");
            
            dataOut = new DataOutputStream(con.getOutputStream());

            String s="session_login="+login
             +"&session_password=" + password
             + "&duration=0&authorize=Ok%2C%20I'll%20Allow%20It&extra=&access=-3&agree=true&oauth_token="
          +token+"&appId=&csrfToken="+callback.csrfToken+"&sourceAlias="+callback.sourceAlias;
            System.out.println("writing bytes: "+s);
            dataOut.writeBytes(s);
            dataOut.flush();
            dataOut.close();

            //SSLException thrown here if server certificate is invalid
            String returnedHtml=convertStreamToString(con.getInputStream());
            //System.out.println(returnedHtml);
            
            /* extract the pin from the html string. the block looks like this       
          
73336
* It turns out that the whole html string only contains one 'div with class="access-code"' * also it seems that the pin is always 5-digit long, * so we will just crudely detect that string and get the pin out. * A proper HTML parser should be used in a real application. */ int i=returnedHtml.indexOf("access-code\">"); String pin = returnedHtml.substring(i+13, i+13+5); System.out.println("pin="+pin); return pin; } catch (Exception e) { // TODO Auto-generated catch block e.printStackTrace(); return null; } } /** * @param args */ public static void main(String[] args) { final LinkedInOAuthService oauthService = LinkedInOAuthServiceFactory.getInstance().createLinkedInOAuthService( apiKey, secretKey); LinkedInRequestToken requestToken = oauthService.getOAuthRequestToken(); System.out.println("request token: "); System.out.println(" auth URL: "+requestToken.getAuthorizationUrl()); System.out.println(" token: "+requestToken.getToken()); System.out.println(" token secret: "+requestToken.getTokenSecret()); System.out.println(" expiration time: "+requestToken.getExpirationTime()); // get the access code String pin=getPin(requestToken.getAuthorizationUrl(), requestToken.getToken()); LinkedInAccessToken accessToken = oauthService.getOAuthAccessToken(requestToken, pin); final LinkedInApiClientFactory factory = LinkedInApiClientFactory.newInstance(apiKey, secretKey); final LinkedInApiClient client = factory.createLinkedInApiClient(accessToken); // now we can call the LinkedIn APIs. Person profile = client.getProfileForCurrentUser(); System.out.println("I am "+profile.getFirstName()+" "+profile.getLastName()); } // Stolen liberally from http://www.kodejava.org/examples/266.html public static String convertStreamToString(InputStream is) { /* * To convert the InputStream to String we use the BufferedReader.readLine() * method. We iterate until the BufferedReader return null which means * there's no more data to read. Each line will appended to a StringBuilder * and returned as String. */ BufferedReader reader = new BufferedReader(new InputStreamReader(is)); StringBuilder sb = new StringBuilder(); String line = null; try { while ((line = reader.readLine()) != null) { sb.append(line + "\n"); } } catch (IOException e) { e.printStackTrace(); } finally { try { is.close(); } catch (IOException e) { e.printStackTrace(); } } return sb.toString(); } }