Here s a simple scraper I created in c# to get streaming quote data printed out to a console. It should be easily converted to java. Based on the following post:
http://blog.underdog-projects.net/2009/02/bringing-the-yahoo-finance-stream-to-the-shell/
Not too fancy (i.e. no regex etc), just a fast & dirty solution.
using System;
using System.Collections.Generic;
using System.IO;
using System.Linq;
using System.Net;
using System.Text;
using System.Web.Script.Serialization;
namespace WebDataAddin
{
public class YahooConstants
{
public const string AskPrice = "a00";
public const string BidPrice = "b00";
public const string DayRangeLow = "g00";
public const string DayRangeHigh = "h00";
public const string MarketCap = "j10";
public const string Volume = "v00";
public const string AskSize = "a50";
public const string BidSize = "b60";
public const string EcnBid = "b30";
public const string EcnBidSize = "o50";
public const string EcnExtHrBid = "z03";
public const string EcnExtHrBidSize = "z04";
public const string EcnAsk = "b20";
public const string EcnAskSize = "o40";
public const string EcnExtHrAsk = "z05";
public const string EcnExtHrAskSize = "z07";
public const string EcnDayHigh = "h01";
public const string EcnDayLow = "g01";
public const string EcnExtHrDayHigh = "h02";
public const string EcnExtHrDayLow = "g11";
public const string LastTradeTimeUnixEpochformat = "t10";
public const string EcnQuoteLastTime = "t50";
public const string EcnExtHourTime = "t51";
public const string RtQuoteLastTime = "t53";
public const string RtExtHourQuoteLastTime = "t54";
public const string LastTrade = "l10";
public const string EcnQuoteLastValue = "l90";
public const string EcnExtHourPrice = "l91";
public const string RtQuoteLastValue = "l84";
public const string RtExtHourQuoteLastValue = "l86";
public const string QuoteChangeAbsolute = "c10";
public const string EcnQuoteAfterHourChangeAbsolute = "c81";
public const string EcnQuoteChangeAbsolute = "c60";
public const string EcnExtHourChange1 = "z02";
public const string EcnExtHourChange2 = "z08";
public const string RtQuoteChangeAbsolute = "c63";
public const string RtExtHourQuoteAfterHourChangeAbsolute = "c85";
public const string RtExtHourQuoteChangeAbsolute = "c64";
public const string QuoteChangePercent = "p20";
public const string EcnQuoteAfterHourChangePercent = "c82";
public const string EcnQuoteChangePercent = "p40";
public const string EcnExtHourPercentChange1 = "p41";
public const string EcnExtHourPercentChange2 = "z09";
public const string RtQuoteChangePercent = "p43";
public const string RtExtHourQuoteAfterHourChangePercent = "c86";
public const string RtExtHourQuoteChangePercent = "p44";
public static readonly IDictionary<string, string> CodeMap = typeof(YahooConstants).GetFields().
Where(field => field.FieldType == typeof(string)).
ToDictionary(field => ((string)field.GetValue(null)).ToUpper(), field => field.Name);
}
public static class StringBuilderExtensions
{
public static bool HasPrefix(this StringBuilder builder, string prefix)
{
return ContainsAtIndex(builder, prefix, 0);
}
public static bool HasSuffix(this StringBuilder builder, string suffix)
{
return ContainsAtIndex(builder, suffix, builder.Length - suffix.Length);
}
private static bool ContainsAtIndex(this StringBuilder builder, string str, int index)
{
if (builder != null && !string.IsNullOrEmpty(str) && index >= 0
&& builder.Length >= str.Length + index)
{
return !str.Where((t, i) => builder[index + i] != t).Any();
}
return false;
}
}
public class WebDataAddin
{
public const string ScriptStart = "<script>";
public const string ScriptEnd = "</script>";
public const string MessageStart = "try{parent.yfs_";
public const string MessageEnd = ");}catch(e){}";
public const string DataMessage = "u1f(";
public const string InfoMessage = "mktmcb(";
protected static T ParseJson<T>(string json)
{
// parse json - max acceptable value retrieved from
//http://forums.asp.net/t/1343461.aspx
var deserializer = new JavaScriptSerializer { MaxJsonLength = 2147483647 };
return deserializer.Deserialize<T>(json);
}
public static void Main()
{
const string symbols = "GBPUSD=X,SPY,MSFT,BAC,QQQ,GOOG";
// these are constants in the YahooConstants enum above
const string attrs = "b00,b60,a00,a50";
const string url = "http://streamerapi.finance.yahoo.com/streamer/1.0?s={0}&k={1}&r=0&callback=parent.yfs_u1f&mktmcb=parent.yfs_mktmcb&gencallback=parent.yfs_gencb®ion=US&lang=en-US&localize=0&mu=1";
var req = WebRequest.Create(string.Format(url, symbols, attrs));
req.Proxy.Credentials = CredentialCache.DefaultCredentials;
var missingCodes = new HashSet<string>();
var response = req.GetResponse();
if(response != null)
{
var stream = response.GetResponseStream();
if (stream != null)
{
using (var reader = new StreamReader(stream))
{
var builder = new StringBuilder();
var initialPayloadReceived = false;
while (!reader.EndOfStream)
{
var c = (char)reader.Read();
builder.Append(c);
if(!initialPayloadReceived)
{
if (builder.HasSuffix(ScriptStart))
{
// chop off the first part, and re-append the
// script tag (this is all we care about)
builder.Clear();
builder.Append(ScriptStart);
initialPayloadReceived = true;
}
}
else
{
// check if we have a fully formed message
// (check suffix first to avoid re-checking
// the prefix over and over)
if (builder.HasSuffix(ScriptEnd) &&
builder.HasPrefix(ScriptStart))
{
var chop = ScriptStart.Length + MessageStart.Length;
var javascript = builder.ToString(chop,
builder.Length - ScriptEnd.Length - MessageEnd.Length - chop);
if (javascript.StartsWith(DataMessage))
{
var json = ParseJson<Dictionary<string, object>>(
javascript.Substring(DataMessage.Length));
// parse out the data. key should be the symbol
foreach(var symbol in json)
{
Console.WriteLine("Symbol: {0}", symbol.Key);
var symbolData = (Dictionary<string, object>) symbol.Value;
foreach(var dataAttr in symbolData)
{
var codeKey = dataAttr.Key.ToUpper();
if (YahooConstants.CodeMap.ContainsKey(codeKey))
{
Console.WriteLine(" {0}: {1}", YahooConstants.
CodeMap[codeKey], dataAttr.Value);
} else
{
missingCodes.Add(codeKey);
Console.WriteLine(" {0}: {1} (Warning! No Code Mapping Found)",
codeKey, dataAttr.Value);
}
}
Console.WriteLine();
}
} else if(javascript.StartsWith(InfoMessage))
{
var json = ParseJson<Dictionary<string, object>>(
javascript.Substring(InfoMessage.Length));
foreach (var dataAttr in json)
{
Console.WriteLine(" {0}: {1}", dataAttr.Key, dataAttr.Value);
}
Console.WriteLine();
} else
{
throw new Exception("Cannot recognize the message type");
}
builder.Clear();
}
}
}
}
}
}
}
}
}