Realtime web search refers to the retrieval of very fresh content which is in high demand. An effective portal web search engine must support a variety of search needs, including realtime web search. However, supporting realtime web search introduces two challenges not encountered in non-realtime web search: quickly crawling relevant content and ranking documents with impoverished link and click information. In this paper, we advocate the use of realtime micro-blogging data for addressing both of these problems. We propose a method to use the micro-blogging data stream to detect fresh URLs. We also use micro-blogging data to compute novel and effective features for ranking fresh URLs. We demonstrate these methods improve effective of the portal web search engine for realtime web search.
bibtex
Copied!
@inproceedings{dong:twitter-mlr,
year = {2010},
title = {Time is of the essence: improving recency ranking using Twitter data},
publisher = {ACM},
pages = {331--340},
location = {Raleigh, North Carolina, USA},
isbn = {978-1-60558-799-8},
doi = {http://doi.acm.org/10.1145/1772690.1772725},
booktitle = {WWW '10: Proceedings of the 19th international conference on World wide web},
author = {Dong, Anlei and Zhang, Ruiqiang and Kolari, Pranam and Bai, Jing and Diaz, Fernando and Chang, Yi and Zheng, Zhaohui and Zha, Hongyuan},
address = {New York, NY, USA}
}