@ARTICLE{Agar-etal05b,
AUTHOR = {Nitin Agarwal and Ehtesham Haque and Huan Liu and Lance Parsons},
TITLE = {A Subspace Clustering Framework for Research Group Collaboration},
JOURNAL = {International Journal of Information Technology and Web Engineering},
YEAR = {2006},
VOLUME = {1},
PAGES = {??},
ABSTRACT = {Researchers spend considerable time searching for relevant papers
on the topic in which they are currently interested. Often, despite
having similar interests, researchers in the same lab do not find
it convenient to share results of bibliographic searches and thus
conduct independent time-consuming searches. Research paper recommender
systems can help the researcher avoid such time-consuming searches
by allowing each researcher to automatically take advantage of previous
searches performed by others in the lab. Existing recommender systems
were developed for commercial domains to assist users by focussing
towards products of their interests. Unlike those domains, the research
paper domain has relatively few users when compared with the huge
number of research papers. In this paper we present a novel system
to recommend relevant research papers to a user based on the user's
recent querying and browsing habits. The core of the system is a
scalable subspace clustering algorithm (SCuBA) that performs well
on the sparse, high-dimensional data collected in this domain. Both
synthetic and benchmark datasets are used to evaluate the recommendation
system and to demonstrate that it performs better than the traditional
collaborative filtering approaches when recommending research papers.},
OWNER = {Lance}
}
@INPROCEEDINGS{Agar-etal05,
AUTHOR = {Nitin Agarwal and Ehtesham Haque and Huan Liu and Lance Parsons},
TITLE = {Research Paper Recommender System: A Subspace Clustering Approach},
BOOKTITLE = {International Conference on Web-Age Information Management (WAIM)
2005},
YEAR = {2005},
ABSTRACT = {Researchers from the same lab often spend a considerable amount of
time searching for published articles relevant to their current
project. Despite having sim- ilar interests, they conduct independent,
time consuming searches. While they may share the results afterwards,
they are unable to leverage previous search results dur- ing the
search process. We propose a research paper recommender system that
avoids such time consuming searches by augmenting existing search
engines with recom- mendations based on previous searches performed
by others in the lab. Most existing recommender systems were developed
for commercial domains with millions of users. The research paper
domain has relatively few users compared to the large number of
online research papers. The two major challenges with this type
of data are the large number of dimensions and the sparseness of
the data. The novel contribution of the paper is a scalable subspace
clustering algorithm (SCuBA) that tackles these problems. Both synthetic
and benchmark datasets are used to evaluate the clustering algorithm
and to demonstrate that it performs better than the traditional
collabora- tive filtering approaches when recommending research
papers.},
OWNER = {Lance}
}
@ARTICLE{Liu-etal05,
AUTHOR = {Huan Liu and Edward R. Dougherty and Jennifer G. Dy and Kari Torkkola
and Eugene Tuv and Hanchuan Peng and Chris Ding and Fuhui Long and
Michael Berens and Lance Parsons and Zheng Zhao and Lei Yu and George
Forman},
TITLE = {Evolving Feature Selection},
JOURNAL = {IEEE Intelligent Systems},
YEAR = {2005},
VOLUME = {20},
PAGES = {64-76},
NUMBER = {6},
MONTH = {Nov/Dec},
ABSTRACT = {Feature selection is a preprocessing technique, commonly used on high-dimensional
data, that studies how to select a subset or list of attributes
or variables that are used to construct models describing data.
Wide data sets, which have a huge number of features but relatively
few instances, introduce a novel challenge to feature selection.
This installment of Trends & Controversies looks at several different
ways of meeting this challenge.},
DOI = {10.1109/MIS.2005.105},
OWNER = {Lance},
URL = {http://doi.ieeecomputersociety.org/10.1109/MIS.2005.105}
}
@INPROCEEDINGS{Pars-etal04a,
AUTHOR = {Parsons, Lance and Haque, Ehtesham and Liu, Huan},
TITLE = {Evaluating Subspace Clustering Algorithms},
BOOKTITLE = {Workshop on Clustering High Dimensional Data and its Applications,
SIAM International Conference on Data Mining (SDM 2004)},
YEAR = {2004},
PAGES = {48-56},
MONTH = {April},
ABSTRACT = {Clustering techniques often define the similarity between instances
using distance measures over the various dimensions of the data.
Subspace clustering is an extension of traditional clustering that
seeks to find clusters in different subspaces within a dataset.
Traditional clustering algorithms consider all of the dimensions
of an input dataset in an attempt to learn as much as possible about
each instance described. In high dimensional data, however, many
of the dimensions are often irrelevant. These irrelevant dimensions
confuse clustering algorithms by hiding clusters in noisy data.
In very high dimensions it is common for all of the instances in
a dataset to be nearly equidistant from each other, completely masking
the clusters. Subspace clustering algorithms localize the search
for relevant dimensions allowing them to find clusters that exist
in multiple, possibly overlapping subspaces. This paper presents
a survey of the various subspace clustering algorithms. We then
compare the two main approaches to subspace clustering using empirical
scalability and accuracy tests.},
DATA = {ToyDataset.csv},
PDF = {subspace_clustering_SIAM_SDM04.pdf},
PRESENTATION = {SubspaceClusteringSIAMPresentation.zip},
PS = {subspace_clustering_SIAM_SDM04.ps}
}
@ARTICLE{Pars-etal04b,
AUTHOR = {Lance Parsons and Ehtesham Haque and Huan Liu},
TITLE = {Subspace Clustering for High Dimensional Data: A Review},
JOURNAL = {SIGKDD Explorations, Newsletter of the ACM Special Interest Group
on Knowledge Discovery and Data Mining},
YEAR = {2004},
VOLUME = {6},
PAGES = {90},
NUMBER = {1},
ABSTRACT = {Subspace clustering is an extension of traditional clustering that
seeks to find clusters in different subspaces within a dataset.
Often in high dimensional data, many dimensions are irrelevant and
can mask existing clusters in noisy data. Feature selection removes
irrelevant and redundant dimensions by analyzing the entire dataset.
Subspace clustering algorithms localize the search for relevant
dimensions allowing them to find clusters that exist in multiple,
possibly overlapping subspaces. There are two major branches of
subspace clustering based on their search strategy. Top-down algorithms
find an initial clustering in the full set of dimensions and evaluate
the subspaces of each cluster, iteratively improving the results.
Bottom-up approaches find dense regions in low dimensional spaces
and combine them to form clusters. This paper presents a survey
of the various subspace clustering algorithms along with a hierarchy
organizing the algorithms by their defining characteristics. We
then compare the two main approaches to subspace clustering using
empirical scalability and accuracy tests and discuss some potential
applications where subspace clustering could be particularly useful.},
DATA = {ToyDataset.csv},
OWNER = {lparsons},
URL = {http://www.acm.org/sigs/sigkdd/explorations/issue.php?volume=6&issue=1&year=2004&month=06}
}
This file has been generated by bibtex2html 1.74