lance-publications.bib

@ARTICLE{Agar-etal05b,
  AUTHOR = {Nitin Agarwal and Ehtesham Haque and Huan Liu and Lance Parsons},
  TITLE = {A Subspace Clustering Framework for Research Group Collaboration},
  JOURNAL = {International Journal of Information Technology and Web Engineering},
  YEAR = {2006},
  VOLUME = {1},
  PAGES = {??},
  ABSTRACT = {Researchers spend considerable time searching for relevant papers
	on the topic in which they are currently interested. Often, despite
	having similar interests, researchers in the same lab do not find
	it convenient to share results of bibliographic searches and thus
	conduct independent time-consuming searches. Research paper recommender
	systems can help the researcher avoid such time-consuming searches
	by allowing each researcher to automatically take advantage of previous
	searches performed by others in the lab. Existing recommender systems
	were developed for commercial domains to assist users by focussing
	towards products of their interests. Unlike those domains, the research
	paper domain has relatively few users when compared with the huge
	number of research papers. In this paper we present a novel system
	to recommend relevant research papers to a user based on the user's
	recent querying and browsing habits. The core of the system is a
	scalable subspace clustering algorithm (SCuBA) that performs well
	on the sparse, high-dimensional data collected in this domain. Both
	synthetic and benchmark datasets are used to evaluate the recommendation
	system and to demonstrate that it performs better than the traditional
	collaborative filtering approaches when recommending research papers.},
  OWNER = {Lance}
}

@INPROCEEDINGS{Agar-etal05,
  AUTHOR = {Nitin Agarwal and Ehtesham Haque and Huan Liu and Lance Parsons},
  TITLE = {Research Paper Recommender System: A Subspace Clustering Approach},
  BOOKTITLE = {International Conference on Web-Age Information Management (WAIM)
	2005},
  YEAR = {2005},
  ABSTRACT = {Researchers from the same lab often spend a considerable amount of
	time searching for published articles relevant to their current
	project. Despite having sim- ilar interests, they conduct independent,
	time consuming searches. While they may share the results afterwards,
	they are unable to leverage previous search results dur- ing the
	search process. We propose a research paper recommender system that
	avoids such time consuming searches by augmenting existing search
	engines with recom- mendations based on previous searches performed
	by others in the lab. Most existing recommender systems were developed
	for commercial domains with millions of users. The research paper
	domain has relatively few users compared to the large number of
	online research papers. The two major challenges with this type
	of data are the large number of dimensions and the sparseness of
	the data. The novel contribution of the paper is a scalable subspace
	clustering algorithm (SCuBA) that tackles these problems. Both synthetic
	and benchmark datasets are used to evaluate the clustering algorithm
	and to demonstrate that it performs better than the traditional
	collabora- tive filtering approaches when recommending research
	papers.},
  OWNER = {Lance}
}

@ARTICLE{Liu-etal05,
  AUTHOR = {Huan Liu and  Edward R. Dougherty and Jennifer G. Dy and Kari Torkkola
	and Eugene Tuv and Hanchuan Peng and Chris Ding and Fuhui Long and
	Michael Berens and Lance Parsons and Zheng Zhao and Lei Yu and George
	Forman},
  TITLE = {Evolving Feature Selection},
  JOURNAL = {IEEE Intelligent Systems},
  YEAR = {2005},
  VOLUME = {20},
  PAGES = {64-76},
  NUMBER = {6},
  MONTH = {Nov/Dec},
  ABSTRACT = {Feature selection is a preprocessing technique, commonly used on high-dimensional
	data, that studies how to select a subset or list of attributes
	or variables that are used to construct models describing data.
	Wide data sets, which have a huge number of features but relatively
	few instances, introduce a novel challenge to feature selection.
	This installment of Trends & Controversies looks at several different
	ways of meeting this challenge.},
  DOI = {10.1109/MIS.2005.105},
  OWNER = {Lance},
  URL = {http://doi.ieeecomputersociety.org/10.1109/MIS.2005.105}
}

@INPROCEEDINGS{Pars-etal04a,
  AUTHOR = {Parsons, Lance and Haque, Ehtesham and Liu, Huan},
  TITLE = {Evaluating Subspace Clustering Algorithms},
  BOOKTITLE = {Workshop on Clustering High Dimensional Data and its Applications,
	SIAM International Conference on Data Mining (SDM 2004)},
  YEAR = {2004},
  PAGES = {48-56},
  MONTH = {April},
  ABSTRACT = {Clustering techniques often define the similarity between instances
	using distance measures over the various dimensions of the data.
	Subspace clustering is an extension of traditional clustering that
	seeks to find clusters in different subspaces within a dataset.
	Traditional clustering algorithms consider all of the dimensions
	of an input dataset in an attempt to learn as much as possible about
	each instance described. In high dimensional data, however, many
	of the dimensions are often irrelevant. These irrelevant dimensions
	confuse clustering algorithms by hiding clusters in noisy data.
	In very high dimensions it is common for all of the instances in
	a dataset to be nearly equidistant from each other, completely masking
	the clusters. Subspace clustering algorithms localize the search
	for relevant dimensions allowing them to find clusters that exist
	in multiple, possibly overlapping subspaces. This paper presents
	a survey of the various subspace clustering algorithms. We then
	compare the two main approaches to subspace clustering using empirical
	scalability and accuracy tests.},
  DATA = {ToyDataset.csv},
  PDF = {subspace_clustering_SIAM_SDM04.pdf},
  PRESENTATION = {SubspaceClusteringSIAMPresentation.zip},
  PS = {subspace_clustering_SIAM_SDM04.ps}
}

@ARTICLE{Pars-etal04b,
  AUTHOR = {Lance Parsons and Ehtesham Haque and Huan Liu},
  TITLE = {Subspace Clustering for High Dimensional Data: A Review},
  JOURNAL = {SIGKDD Explorations, Newsletter of the ACM Special Interest Group
	on Knowledge Discovery and Data Mining},
  YEAR = {2004},
  VOLUME = {6},
  PAGES = {90},
  NUMBER = {1},
  ABSTRACT = {Subspace clustering is an extension of traditional clustering that
	seeks to find clusters in different subspaces within a dataset.
	Often in high dimensional data, many dimensions are irrelevant and
	can mask existing clusters in noisy data. Feature selection removes
	irrelevant and redundant dimensions by analyzing the entire dataset.
	Subspace clustering algorithms localize the search for relevant
	dimensions allowing them to find clusters that exist in multiple,
	possibly overlapping subspaces. There are two major branches of
	subspace clustering based on their search strategy. Top-down algorithms
	find an initial clustering in the full set of dimensions and evaluate
	the subspaces of each cluster, iteratively improving the results.
	Bottom-up approaches find dense regions in low dimensional spaces
	and combine them to form clusters. This paper presents a survey
	of the various subspace clustering algorithms along with a hierarchy
	organizing the algorithms by their defining characteristics. We
	then compare the two main approaches to subspace clustering using
	empirical scalability and accuracy tests and discuss some potential
	applications where subspace clustering could be particularly useful.},
  DATA = {ToyDataset.csv},
  OWNER = {lparsons},
  URL = {http://www.acm.org/sigs/sigkdd/explorations/issue.php?volume=6&issue=1&year=2004&month=06}
}


This file has been generated by bibtex2html 1.74