@INPROCEEDINGS{ZadroznyEtAl94,
  author = {Wlodek Zadrozny and Marcin Szummer and Stanislaw Jarecki and David Johnson and Leora Morgenstern},
  title = {{NL} Understanding with a Grammar of Constructions},
  booktitle = {Intl.\ Conf.\ on Computational Linguistics ({COLING})},
  year = {1994},
  month = aug,
  volume = {15},
  pages = {1289--1293},
  address = {Kyoto, Japan},
  url = {http://research.microsoft.com/~szummer/papers/coling-94/ZadroznySzummerJarecki-coling94.ps.gz},
  abstract = {We present an approach to natural language understanding based on a
computable grammar of constructions. A "construction" consists of a
set of features of form and a description of meaning in a context. A
grammar is a set of constructions. This kind of grammar is the key
element of Mincal, an implemented natural language, speech-enabled
interface to an on-line calendar system. The system consists of a NL
grammar, a parser, an on-line calendar, a domain knowledge base (about
dates, times and meetings), an application knowledge base (about the
calendar), a speech recognizer, a speech generator, and the interfaces
between those modules. We claim that this architecture should work in
general for spoken interfaces in small domains. In this paper we
present two novel aspects of the architecture: (a) the use of
constructions, integrating descriptions of form, meaning and context
into one whole; and (b) the separation of domain knowledge from
application knowledge. We describe the data structures for encoding
constructions, the structure of the knowledge bases, and the
interactions of the key modules of the system.},
}

%   note = {\url{http://vismod.media.mit.edu/pub/tech-reports/TR-346.ps.Z}},
@MastersThesis{Szummer95,
  author = {Martin Szummer},
  title = {Temporal Texture Modeling},
  school = {{MIT} Media Lab Perceptual Computing},
  year = {1995},
  month = sep,
  number = {346},
  annote = {Master's thesis},
  url = {http://research.microsoft.com/~szummer/papers/Szummer-MEng-thesis.ps.gz},
  abstract = {Temporal textures are textures with motion. Examples include wavy water, rising steam and a crowd milling about. We model image sequences of temporal textures using the spatio-temporal autoregressive model (STAR). This model expresses each pixel as a linear combination of surrounding pixels lagged both in space and in time. The model provides a basis both for recognition and synthesis. We show how the least squares method can accurately estimate model parameters for large, causal neighborhoods with more than 1000 parameters. Synthesis results show that the model can adequately capture the spatial and temporal characteristics of many temporal textures.}
}

@UNPUBLISHED{Szummer95b,
  author = {Martin Szummer},
  title = {An Image Browser that learns from User Interaction},
  year = {1995},
  month = dec,
  url  = {http://research.microsoft.com/~szummer/learning-browser/learning-browser.html},
  note = {\url{http://xenia.media.mit.edu/~szummer/learning-browser.html}},
  abstract = {Large image databases with millions of images are being built. It is very tedious to browse these databases; the user will only have time to see a small fraction of the images. Currently, there are very few tools that assist the user in finding the right selection of images. 
This project combines learning algorithms and machine vision techniques to create a flexible and powerful image browser. The user is presented with a selection of images. They select positive and negative examples of the type of images they want to see or avoid seeing. The browser analyzes the examples and chooses the best search metrics. It then uses these metrics to find images similar to the examples. The results form a hierarchy that the user can browse with a tree browser. Next, the user selects more positive and negative examples, and the process repeats.},
}

%   note = {\url{http://vismod.media.mit.edu/pub/tech-reports/TR-381.ps.Z}},
@INPROCEEDINGS{Szummer96a,
  author = {Martin Szummer and Rosalind W. Picard},
  title = {Temporal Texture Modeling},
  booktitle = {{IEEE} Intl.\ Conf.\ on Image Processing ({ICIP})}},
  year = {1996},
  month = sep,
  volume = {3},
  pages = {823--826},
  address = {Lausanne, Switzerland},
  url = {http://research.microsoft.com/~szummer/papers/icip-96/SzummerPicard-icip96.pdf},
  abstract = {Temporal textures are textures with motion. Examples include wavy water, rising steam and fire. We model image sequences of temporal textures using the spatio-temporal autoregressive model (STAR). This model expresses each pixel as a linear combination of surrounding pixels lagged both in space and in time. The model provides a base for both recognition and synthesis. We show how the least squares method can accurately estimate model parameters for large, causal neighborhoods with more than 1000 parameters. Synthesis results show that the model can adequately capture the spatial and temporal characteristics of many temporal textures. A 95\% recognition rate is achieved for a 135 element database with 15 texture classes.},
}

%   note = {\url{http://vismod.media.mit.edu/pub/tech-reports/TR-382.ps.Z}},
@INPROCEEDINGS{Szummer96b,
  author = {Rosalind W. Picard and Thomas P. Minka and Martin Szummer},
  title = {Modeling User Subjectivity In Image Libraries},
  booktitle = {{IEEE} Intl.\ Conf.\ on Image Processing ({ICIP})}},
  year = {1996},
  month = sep,
  volume = {2},
  pages = {777--780},
  address = {Lausanne, Switzerland},
  url =  {http://research.microsoft.com/~szummer/papers/icip-96/PicardMinkaSzummer-icip96.pdf},
  abstract = {In addition to the problem of {\it which} image analysis models to use
in digital libraries, e.g.\ wavelet, Wold, color histograms, is the
problem of {\it how} to combine these models with their different
strengths.  Most present systems place the burden of combination on
the user, e.g. the user specifies 50\% texture features, 20\% color
features, etc.  This is a problem since most users do not know how
to best pick the settings for the given data and search problem.  This
paper addresses this problem, describing research in progress for a
system that (1) automatically infers which combination of models best
represents the data of interest to the user and (2) learns continuously
during interaction with each user.  In particular, these two
components -- inference and learning -- provide a solution that adapts
to the subjective and hard-to-predict behaviors frequently seen when
people query or browse image libraries.},
}

%   note = {\url{http://vismod.media.mit.edu/pub/tech-reports/TR-445.ps.Z}},
@INPROCEEDINGS{Szummer98,
  author = {Martin Szummer and Rosalind W. Picard},
  title = {Indoor-Outdoor Image Classification},
  booktitle = {{IEEE} International Workshop on Content-Based Access of Image and Video Databases ({CAIVD})},
  year = {1998},
  month = jan,
  pages = {42--51},
  address = {Bombay, India},
  url = {http://research.microsoft.com/~szummer/papers/caivd98/SzummerPicard-caivd98.ps.gz},
  abstract = {We show how high-level scene properties can be inferred from
classification of low-level image features, specifically for the
indoor-outdoor scene retrieval problem.  We systematically studied the
features: (1) histograms in the Ohta color space (2) multiresolution,
simultaneous autoregressive model parameters (3) coefficients of a
shift-invariant DCT.  We demonstrate that performance is improved by
computing features on subblocks, classifying these subblocks, and then
combining these results in a way reminiscent of ``stacking.''  State
of the art single-feature methods are shown to result in about 75--86\%
performance, while the new method results in 90.3\% correct
classification, when evaluated on a diverse database of over 1300
consumer images provided by Kodak.},
}

@INPROCEEDINGS{Sorgel98,
  author = {Wolfgang S{\"o}rgel and Sabine Girod and Martin Szummer and Bernd Girod},
  title = {Computer Aided Diagnosis of Bone Lesions in the Facial Skeleton},
  booktitle = {Workshop Bildverarbeitung f{\"u}r die Medizin},
  year = {1998},
  address = {Aachen, Germany},
  month = mar,
  url = {http://research.microsoft.com/~szummer/papers/SorgelEtal-aachen98.pdf},
  abstract = {We present a system for computer aided diagnosis of bone tumors in the facial skeleton. There are many different lesions with ra­diographic manifestation in the jaws. Our system helps performing the differential diagnosis of these. The input is a digitized orthopantomograph (OPG) in which the user marks the position of the lesion with a single mouse click. An active contour model then automatically finds the boundaries of the lesion. Gray­level histograms, MRSAR texture featu­res and Gabor filter features are computed for the lesion region. These features are then combined and used to query a database containing expert-diagnosed reference cases. The result is a number of similar cases, with tumor position marked and with available expert annotations. We show good agreement between our results and differential diagnosis given by humans. The system is also a suitable tool for training and education.}
}

%% crossref = {NIPS-13},
%% note = {\url{http://www.ai.mit.edu/people/szummer/papers/kernelexp-nips00.ps.gz}},
@INPROCEEDINGS{Szummer-kernel-expansions,
  author = {Martin Szummer and Tommi Jaakkola},
  title = {Kernel expansions with unlabeled examples},
  pages = {626--632},
  year = {2001},
  month = jan,
  volume = {13},
  publisher = {{MIT} Press},
  booktitle = {Advances in Neural Information Processing Systems ({NIPS})},
  url = {http://research.microsoft.com/~szummer/papers/SzummerJaakkola-nips00.pdf},
  abstract = {Modern classification applications necessitate supplementing the
few available labeled examples with unlabeled examples to improve
classification performance. We present a new tractable algorithm for
exploiting unlabeled examples in discriminative classification. This
is achieved essentially by expanding the input vectors into longer
feature vectors via both labeled and unlabeled examples. The resulting
classification method can be interpreted as a discriminative kernel
density estimate and is readily trained via the EM algorithm, which in
this case is both discriminative and achieves the optimal solution. We
provide, in addition, a purely discriminative formulation of the
estimation problem by appealing to the maximum entropy framework. We
demonstrate that the proposed approach requires very few labeled
examples for high classification accuracy.},
}

% note = {\url{http://www.ai.mit.edu/people/szummer/}}, 
@INPROCEEDINGS{Szummer-partially-labeled-walks,
  author = {Martin Szummer and Tommi Jaakkola},
  title = {Partially labeled classification with Markov random walks},
  pages = {945--952},
  year = {2002},
  month = jan,
  volume = {14},
  publisher = {{MIT} Press},
  booktitle = {Advances in Neural Information Processing Systems ({NIPS})},
  url = {http://research.microsoft.com/~szummer/papers/SzummerJaakkola-nips01.pdf},
  abstract = {To classify a large number of unlabeled examples we combine a limited number of labeled examples with a Markov random walk representation
over the unlabeled examples. The random walk representation exploits
any low dimensional structure in the data in a robust, probabilistic
manner. We develop and compare several estimation criteria/algorithms
suited to this representation. This includes in particular multi-way
classification with an average margin criterion which permits a closed
form solution. The time scale of the random walk regularizes the
representation and can be set through a margin-based criterion
favoring unambiguous classification. We also extend this basic
regularization by adapting time scales for individual examples. We
demonstrate the approach on synthetic examples and on text
classification problems.},
}

% note = {\url{http://www.ai.mit.edu/people/szummer/}},
@INPROCEEDINGS{SzummerJaakkola02,
  author = {Martin Szummer and Tommi Jaakkola},
  title = {Information Regularization with Partially Labeled Data},
  pages = {1025--1032},
  year = {2003},
  month = jan,
  volume = {15},
  publisher = {{MIT} Press},
  booktitle = {Advances in Neural Information Processing Systems ({NIPS})},
  url = {http://research.microsoft.com/~szummer/papers/SzummerJaakkola-nips02.pdf},
  abstract = {Classification with partially labeled data requires using a large
number of unlabeled examples (or an estimated marginal $P(x)$), to further
constrain the conditional $P(y|x)$ beyond a few available labeled examples. We
formulate a regularization approach to linking the marginal and
the conditional in a general way. The regularization penalty measures
the information that is implied about the labels over covering regions. No
parametric assumptions are required and the approach remains tractable even
for continuous 
marginal densities $P(x)$. We develop algorithms for solving the
regularization problem for finite covers, establish a limiting
differential equation, and exemplify the behavior of the new
regularization approach in simple cases.},
  abstract_html = {Classification with partially labeled data requires using a large
number of unlabeled examples (or an estimated marginal P(x)), to further
constrain the conditional P(y|x) beyond a few available labeled examples. We
formulate a regularization approach to linking the marginal and
the conditional in a general way. The regularization penalty measures
the information that is implied about the labels over covering regions. No
parametric assumptions are required and the approach remains tractable even
for continuous 
marginal densities P(x). We develop algorithms for solving the
regularization problem for finite covers, establish a limiting
differential equation, and exemplify the behavior of the new
regularization approach in simple cases.},
}

% Kjaerulff - a and e should be merged, but tool does not support it, so replaced directly by Windows char, Kjærulff
@INPROCEEDINGS{Yeang-markov-walks-uai03,
  author = {Chen-Hsiang Yeang and Martin Szummer},
  title = {Markov Random Walk Representations with Continuous Distributions},
  booktitle = {Proc.\ Uncertainty in Artificial Intelligence ({UAI})},
  year = {2003},
  month = aug,
  editor = {U. Kj{\ae}rulff and C. Meek},
  pages = {600--607},
  publisher = {Morgan Kaufmann},
  url = {http://research.microsoft.com/~szummer/papers/YeangSzummer-uai03.ps.gz},
  abstract = {Representations based on random walks can exploit discrete data distributions for clustering and classification.  We extend such representations from
discrete to continuous distributions.  Transition probabilities are now
calculated using a diffusion equation with a diffusion coefficient that
varies inversely with the data density.  We relate this diffusion equation
to a path integral and derive the corresponding path probability measure.
The framework is useful for incorporating continuous data
densities and prior knowledge.},
}

@INPROCEEDINGS{Szummer-incorporating-context-aaai04,
  author = {Martin Szummer and Philip J. Cowans},
  title = {Incorporating Context and User Feedback in Pen-Based Interfaces},
  booktitle = {{AAAI} Fall symposium, Making Pen-Based Interaction Intelligent and Natural},
  year = {2004},
  month = oct,
  editor = {R. Davis and J. Landay et al.},
  series = {FS-04-06},
  pages = {159--166},
  publisher = {{AAAI Press}},
  url = {http://research.microsoft.com/~szummer/papers/SzummerCowans-aaai04.pdf},
  abstract = {We propose a joint probabilistic model for grouping and labeling hand-drawn ink strokes. We demonstrate that simultaneous grouping and labeling yields superior accuracy to labeling alone.   Our probabilistic formulation has many
advantages, exact inference is feasible, and we obtain confidence estimates. 
We show how to incorporate user feedback by conditioning
our model, and discuss different types of inference tasks suited for
various user interactions.},
}

@INPROCEEDINGS{Krishnapuram-generative-models-iwfhr04,
  author = {Balaji Krishnapuram and Christopher M. Bishop and Martin Szummer},
  title = {Generative Models and {B}ayesian Model Comparison for Shape Recognition},
  booktitle = {9th Intl.\ Workshop on Frontiers in Handwriting Recognition ({IWFHR})},
  year = {2004},
  month = oct,
  editor = {F. Kimura and H. Fujisawa},
  pages = {20--25},
  organization = {{IEEE}},
  url = {http://research.microsoft.com/~szummer/papers/KrishnapuramBishopSzummer-iwfhr04.pdf},
  abstract = {Recognition of hand-drawn shapes is an important and
widely studied problem. By adopting a generative probabilistic
framework we are able to formulate a robust and flexible approach
to shape recognition which allows for a wide range of shapes and
which can recognize new shapes from a single exemplar. It also provides
meaningful probabilistic measures of model score which can be used
as part of a larger probabilistic framework for interpreting a
page of ink. We also show how Bayesian model comparison allows the
trade-off between data fit and model complexity to be
optimized automatically.},
}

@INPROCEEDINGS{Szummer-contextual-recog-iwfhr04,
  author = {Martin Szummer and Yuan Qi},
  title = {Contextual Recognition of Hand-drawn Diagrams with Conditional Random Fields},
  booktitle = {9th Intl.\ Workshop on Frontiers in Handwriting Recognition ({IWFHR})},
  year = {2004},
  month = oct,
  editor = {F. Kimura and H. Fujisawa},
  pages = {32--37},
  organization = {{IEEE}},
  url = {http://research.microsoft.com/~szummer/papers/Szummer-Qi-iwfhr04.pdf},
  abstract = {  Hand-drawn diagrams present a complex recognition problem.  Fragments of the drawing are often individually ambiguous, and require context to 
      be interpreted.
      We present a recognizer based on conditional random fields
      (CRFs) that jointly analyze all drawing fragments in order to incorporate
      contextual cues. The classification of each fragment influences the
      classification of its neighbors. CRFs
      allow flexible and correlated features, and take temporal information
      into account.  Training is done via conditional MAP estimation that
      is guaranteed to reach the global optimum.  During recognition we
      propagate information globally to find the joint MAP or maximum
      marginal solution for each fragment.  We demonstrate the framework on a
      container versus connector recognition task.},
}

% Best student paper award.
@INPROCEEDINGS{Cowans-partitioning-aistats05,
  author = {Philip J. Cowans and Martin Szummer},
  title = {A Graphical Model for Simultaneous Partitioning and Labeling},
  booktitle = {AI \& Statistics {AISTATS}},
  year = {2005},
  month = jan,
  comment = {Best student paper award. Tenth International Workshop on Artificial Intelligence and Statistics},
  url = {http://research.microsoft.com/~szummer/papers/CowansSzummer-aistats05.pdf},
  abstract = {In this work we develop a graphical model for describing
probability distributions over labeled partitions of an undirected
graph which are conditioned on observed data.  We show how to
efficiently perform exact inference in these models, by exploiting the
structure of the graph and adapting the sum-product and max-product
algorithms. We demonstrate our approach on the task of segmenting and
labeling hand-drawn ink fragments, and show that a significant
performance increase is obtained by labeling and partitioning
simultaneously.},
}

@INPROCEEDINGS{Qi-BCRF-aistats05,
  author = {Yuan Qi and Martin Szummer and Thomas P. Minka},
  title = {Bayesian Conditional Random Fields},
  booktitle = {AI \& Statistics {AISTATS}},
  year = {2005},
  month = jan,
  pages = {269--276},
  url = {http://research.microsoft.com/~szummer/papers/QiSzummerMinka-aistats05.pdf},
  comment = {Tenth International Workshop on Artificial Intelligence and Statistics},
  abstract = { We propose Bayesian Conditional Random Fields (BCRFs) for classifying interdependent and structured data, such as sequences, images or webs. 
 BCRFs are a Bayesian approach to training and inference with conditional
 random fields, which were previously trained by maximizing likelihood (ML)
 (Lafferty et al., 2001).  Our framework avoids the problem of overfitting,
 and offers the full advantages of a Bayesian treatment.  Unlike the ML approach,
 we estimate the posterior distribution 
 of the  model parameters during training, and average predictions
 over this posterior during  inference. 
 We apply two extensions of expectation propagation (EP), the power EP
 and the novel transformed EP methods, to incorporate
 the partition function. For algorithmic stability and accuracy, we 
 flatten the approximation structures to avoid
 two-level approximations.
 We demonstrate the superior prediction accuracy of BCRFs over conditional
 random fields trained with ML or MAP on synthetic and real
 datasets},
}

% URL not available as not final version
% TODO: set year & month to correct value
@ARTICLE{Qi-BCRF-jmlr06,
  author = {Yuan Qi and Martin Szummer and Thomas P. Minka},
  title = {Bayesian Conditional Random Fields},
  journal = {Journal of Machine Learning Research ({JMLR})},
  year = {2010},
  month = dec,
  url  = {http://research.microsoft.com/~szummer/papers/paper-not-available.html},
  abstract = { We propose Bayesian Conditional Random Fields (BCRFs) for classifying interdependent and structured data, such as sequences, images or webs. 
 BCRFs are a Bayesian approach to training and inference with conditional
 random fields, which were previously trained by maximizing likelihood (ML)
 (Lafferty et al., 2001).  Our framework avoids the problem of overfitting,
 and offers the full advantages of a Bayesian treatment.  Unlike the ML approach,
 we estimate the posterior distribution 
 of the  model parameters during training, and average predictions
 over this posterior during  inference. 
 We apply two extensions of expectation propagation (EP), the power EP
 and the novel transformed EP methods, to incorporate
 the partition function. For algorithmic stability and accuracy, we 
 flatten the approximation structures to avoid
 two-level approximations.
 We demonstrate the superior prediction accuracy of BCRFs over conditional
 random fields trained with ML or MAP on synthetic and real
 datasets.},
}

@INPROCEEDINGS{Qi-BCRF-cvpr05,
  author = {Yuan Qi and Martin Szummer and Thomas P. Minka},
  title = {Diagram Structure Recognition by Bayesian Conditional Random Fields},
  booktitle = {Proc.\ Comp.\ Vision Pattern Recogn ({CVPR})},
  pages = {191--196},
  editor = {C. Schmid and S. Soatto and C. Tomasi},
  year = {2005},
  month = jun,
  url = {http://research.microsoft.com/~szummer/papers/QiSzummerMinka-cvpr05.pdf},
  abstract = {Hand-drawn diagrams present a complex recognition problem.
 Elements of the diagram are often individually ambiguous, and require
 context to be interpreted.
 We present a recognition method based on Bayesian conditional random fields
 (BCRFs) that jointly analyzes all drawing elements in order to
 incorporate contextual cues. The classification of each object affects
 the classification of its neighbors. BCRFs allow flexible and
 correlated features, and take both spatial and temporal information
 into account.  BCRFs estimate the posterior distribution of parameters
 during training, and average predictions over the posterior for
 testing. As a result of model averaging, BCRFs avoid the overfitting
 problems associated with maximum likelihood training. We also
 incorporate Automatic Relevance Determination (ARD), a Bayesian feature
 selection technique, into BCRFs.  The result is significantly lower
 error rates compared to ML- and MAP-trained CRFs.},
}


@INPROCEEDINGS{Szummer-icdar05,
  author = {Martin Szummer},
  title = {Learning Diagram Parts with Hidden Random Fields},
  booktitle = {Intl.\ Conf.\ Document Analysis and Recognition ({ICDAR})},
  pages = {1188--1193},
  year = {2005},
  month = aug,
  url = {http://research.microsoft.com/~szummer/papers/Szummer-icdar05.pdf},
  abstract = {Many diagrams contain compound objects composed of parts. 
We propose a recognition framework that learns parts in an unsupervised way,
and requires training labels only for compound objects.  Thus, human
labeling effort is 
reduced and parts are not predetermined, instead appropriate parts are discovered
based on the data.  
We model contextual relations between parts, such that
the label of a part can depend simultaneously on the labels of its neighbors,
as well as spatial and temporal information.  The model is a Hidden Random Field
(HRF), an extension of a Conditional Random Field.  We apply it to find
parts of boxes, arrows and flowchart shapes in hand-drawn diagrams, and
also demonstrate improved recognition accuracy over the conditional random
field model without parts.},
}

@INPROCEEDINGS{Szummer-disc-adapt-iwfhr06,
  author = {Martin Szummer and Christopher M. Bishop},
  title = {Discriminative Writer Adaptation},
  booktitle = {10th Intl.\ Workshop on Frontiers in Handwriting Recognition ({IWFHR})},
  pages= {293--298},
  year = {2006},
  month = oct,
  url = {http://research.microsoft.com/~szummer/papers/SzummerBishop-IWFHR06.pdf},
  abstract = {We propose a general method for adapting a writer-independent
classifier to an individual writer.  We employ a mixture of experts
formulation, where the classifiers are trained on weighted clusters of
writers.  The clusters are determined by which experts classify individual
writing correctly.  The method adapts by choosing the appropriate combination
of classifiers for a new user.  It applies to any probabilistic
discriminative classifier, and adapts discriminatively without modeling the
input feature distribution.

We apply the method to online character recognition.  Specifically, we use a
mixture of neural networks as well as a mixture of logistic regressions.  We
train the mixture via conjugate gradient ascent or via the EM algorithm on
192,000 Latin characters of 98 classes and 216 writers, and show adaptation
results for 21 writers.},
}

@InProceedings{Mickens-Szummer-Narayanan07,
  author =       {James Mickens and Martin Szummer and Dushyanth Narayanan},
  title =        {Snitch: {I}nteractive Decision Trees for Troubleshooting Misconfigurations},
  booktitle =    {{SysML07}: 2nd Workshop on Tackling Computer Systems Problems with Machine Learning Techniques},
  year =         2007,
  month =        apr,
  url = {http://research.microsoft.com/~szummer/papers/MickensSzummerNarayanan-Snitch-sysml07.pdf},
  abstract = {
Troubleshooting misconfigurations of modern applications is difficult
due to their large and complex state.  Snitch is a prototype tool that
assists human troubleshooters by finding relationships between
application state and subsequent faults.  It correlates configuration
state and application errors across many machines and users, and
across long periods of time.  Snitch aids the human expert in
extracting patterns from this rich but enormous data set by building
decision trees pinpointing potential configuration problems.

We applied Snitch to 114 GB of configuration traces from 151
machines over 567 days.  We illustrate how Snitch can suggest
misconfigurations in case studies of two Windows applications:
Messenger and Outlook.}
}


@InProceedings{Rother-QPBO07,
  author =       {Carsten Rother and Vladimir Kolmogorov and Victor Lempitsky and Martin Szummer},
  title =        {Optimizing Binary {MRFs} via Extended Roof Duality},
  booktitle =    {Proc.\ Comp.\ Vision Pattern Recogn.\ ({CVPR})},
  year = {2007},
  month = jun,
  url = {http://research.microsoft.com/~szummer/papers/Rother-etal-QPBO-cvpr07.pdf},
  abstract = {
Many computer vision applications rely on the efficient optimization of
challenging, so-called non-submodular, binary pairwise MRFs. A promising
graph cut based approach for optimizing such MRFs known as "roof duality" was
recently introduced into computer vision.  We study two methods which extend
this approach. First, we discuss an efficient implementation of the "probing"
technique introduced recently by Boros et al. 2006. It simplifies the
MRF while preserving the global optimum.  Our code is 400-700 faster on some
graphs than the implementation of [Boros 2006].  Second, we present a new
technique which takes an arbitrary input labeling and tries to improve its
energy.  We give theoretical characterizations of local minima of this
procedure.

We applied both techniques to many applications, including image
segmentation, new view synthesis, super-resolution, diagram recognition,
parameter learning, texture restoration, and image deconvolution. For several
applications we see that we are able to find the global minimum very
efficiently, and considerably outperform the original roof duality
approach. In comparison to existing techniques, such as graph cut, TRW, BP,
ICM, and simulated annealing, we nearly always find a lower energy. 
} 
}

@InProceedings{Craswell-Szummer-random-walks-sigir07,
  author =       {Nick Craswell and Martin Szummer},
  title =        {Random Walks on the Click Graph},
  booktitle =    {{SIGIR} Conf.\ Research and Development in Information Retrieval},
  year =         2007,
  month =        jul,
  pages =        {239--246},                  
  url = {http://research.microsoft.com/~szummer/papers/CraswellSzummer-random-walks-sigir07.pdf},
  abstract =     {Search engines can record which documents were clicked for which
  query, and use these query-document pairs as 'soft' relevance
  judgments.  However, compared to the true judgments, click logs give
  noisy and sparse relevance information.  We apply a Markov random
  walk model to a large click log, producing a probabilistic ranking
  of documents for a given query.  A key advantage of the model is its
  ability to retrieve relevant documents that have not yet been
  clicked for that query and rank those effectively.  We conduct
  experiments on click logs from image search, comparing our
  ('backward') random walk model to a different ('forward') random walk,
  varying parameters such as walk length and self-transition
  probability.  The most effective combination is a long backward walk
  with high self-transition probability.}
}

@InProceedings{Szummer-Craswell-behavioral-classification-www08,
  author =       {Martin Szummer and Nick Craswell},
  title =        {Behavioral Classification on the Click Graph},
  booktitle =    {World Wide Web Conference {WWW}},
  year =         2008,
  month =        apr,
  pages =        {1241--1242},                  
  url = {http://research.microsoft.com/~szummer/papers/SzummerCraswell-behavioral-classification-www08.pdf},
  abstract =     {A bipartite query-URL graph, where an edge indicates that a document was clicked for a query, is a useful construct for finding groups of related
queries and URLs.  Here we use this behavior graph for classification.  We
choose a click graph sampled from two weeks of image search activity, and the
task of ``adult'' filtering: identifying content in the graph that is
inappropriate for minors.  We show how to perform classification using random
walks on this graph, and two methods for estimating classifier parameters.}
}

@InProceedings{Ranzato-Szummer-semisupervised-icml08,
  author =       {Marc'Aurelio Ranzato and Martin Szummer},
  title =        {Semi-supervised Learning of Compact Document Representations with Deep Networks},
  booktitle = {Proc.\ Intl.\ Conf.\ on Machine Learning ({ICML})},
  month = jul,                   
  year = 2008,
  pages = {792--799},
  url = {http://research.microsoft.com/~szummer/papers/RanzatoSzummer-semisupervised-deep-icml08.pdf},
  abtract = {Finding a good representation of text documents is crucial in information
retrieval and classification systems. Today the most popular document representation
is based on a vector of word counts in the document. This representation neither captures dependencies
between related words, nor handles synonyms or polysemous words.

In this paper, we propose an algorithm to learn text document representations
based on semi-supervised autoencoders that are stacked to form a deep network. 
The model can be efficiently trained on partially labeled corpora, producing a very compact representation of a document, while retaining as much class information and joint word statistics as possible.
We show that it is advantageous to exploit even a few labeled samples during training.}
}
                  
% pages = fix                  
% URL - update to the following after SIGIR; already final version in uploaded                  
% url = {http://research.microsoft.com/~szummer/papers/ZoeterEtAl-decision-theoretic-lr4ir-sigir08.pdf},
@InProceedings{ZoeterEtAl-decision-theoretic-lr4ir-sigir08,
  author =       {Onno Zoeter and Michael Taylor and Ed Snelson and John
                  Guiver and Nick Craswell and Martin Szummer},
  title =        {A Decision Theoretic Framework for Ranking using Implicit Feedback},
  booktitle = {{SIGIR} 2008 Workshop on Learning to Rank for Information Retrieval},
  year =      2008,
  month =     jul,
  url = {http://research.microsoft.com/~szummer/papers/paper-not-available.html},
  abstract = {
This paper presents a decision theoretic ranking system that incorporates
both explicit and implicit feedback.  The system has a model that predicts,
given all available data at query time, different interactions a person might
have with search results.  Possible interactions include relevance labelling
and clicking.  We define a utility function that takes as input the outputs
of the interaction model to provide a real valued score to the user's
session.  The optimal ranking is the list of documents that, in expectation
under the model, maximizes the utility for a user session.

The system presented is based on a simple example utility function that
combines both click behavior and labelling.  The click prediction model is a
Bayesian generalized linear model. Its notable characteristic is that it
incorporates both weights for explanatory features and weights for each
query-document pair. This allows the model to generalize to unseen queries
but makes it at the same time flexible enough to keep in a `memory' where the
model should deviate from its feature based prediction.  Such a
click-predicting model could be particularly useful in an application such as
enterprise search, allowing on-site adaptation to local documents and user
behaviour.  The example utility function has a parameter that controls the
tradeoff between optimizing for clicks and optimizing for labels.
Experimental results in the context of enterprise search show that a balance
in the tradeoff leads to the best NDCG and good (predicted) clickthrough.
}                
}

% pages = {},  % Missing                  
% abstract - revise to the final version                  
@InProceedings{SzummerKohliHoiem-learning-crf-cuts-eccv08,
  author =       {Martin Szummer and Pushmeet Kohli and Derek Hoiem},
  title =        {Learning {CRFs} using Graph Cuts},
  booktitle =    {European Conference on Computer Vision ({ECCV})},
  year =         2008,
  month =        oct,                   
  url    =       {http://research.microsoft.com/~szummer/papers/SzummerKohliHoiem-learning-crf-cuts-eccv08.pdf},
  abstract =     {
Many computer vision problems are naturally formulated as random
fields, specifically MRFs or CRFs.  The introduction of graph cuts
has enabled efficient and optimal inference in associative random
fields, greatly advancing applications such as segmentation, stereo
reconstruction and many others. However, while fast inference is now
widespread, parameter learning in random fields has remained an
intractable problem. This paper shows how to apply fast inference algorithms, in particular
graph cuts, to learn parameters of random fields with similar
efficiency. We find optimal parameter values under standard
regularized objective functions that ensure good generalization.
Our algorithm enables learning of many parameters in reasonable
time, and we explore further speedup techniques. We also discuss
extensions to non-associative and multi-class problems.  We
evaluate the method on image segmentation and geometry
recognition.}
}

@InProceedings{TorresaniSzummerFitzgibbon09,
  author =    {Lorenzo Torresani and Martin Szummer and Andrew Fitzgibbon},
  title =     {Learning Query-dependent Prefilters for Scalable Image Retrieval},
  booktitle = {Proc.\ Comp.\ Vision Pattern Recogn.\ ({CVPR})},
  pages =     {2615--2622},                   
  year =      2009,
  month =     jun,
  url   =     {http://research.microsoft.com/pubs/81353/TorresaniSzummerFitzgibbon-learning-query-cvpr09.pdf},                  
  abstract = {
We describe an algorithm for similar-image search which is designed to be
efficient for extremely large collections of images.  For each query, a
small response set is selected by a fast prefilter, after which
a more accurate ranker may be applied to each image in the response set.
We consider a class of prefilters comprising disjunctions of conjunctions
("ORs of ANDs") of Boolean features. AND filters can be implemented
efficiently using skipped inverted files, a key component of
web-scale text search engines.  These structures permit search in time
proportional to the response set size.  The prefilters are learned from
training examples, and refined at query time to produce an approximately
bounded response set.

We cast prefiltering as an optimization problem: for each test query,
select the OR-of-AND filter which maximizes training-set recall for an
adjustable bound on response set size.  This may be efficiently implemented
by selecting from a large pool of candidate conjunctions of Boolean
features using a linear program relaxation.  Tests on object class
recognition show that this relatively simple filter
is nevertheless powerful enough to capture some
semantic information.
}}
                  
@InProceedings{RadlinskiSzummerCraswell-query-intents-www10,
  author =    {Filip Radlinski and Martin Szummer and Nick Craswell},
  title =     {Inferring Query Intent from Reformulations and Clicks},
  booktitle = {World Wide Web Conference ({WWW})},
  pages =     {1171--1172},
  year =      2010,
  url = {\url{http://research.microsoft.com/pubs/122720/RadlinskiSzummerCraswell-query-intent-www10.pdf}},
  abstract = {Many researchers have noted that web search queries are often
  ambiguous or unclear. We present an approach for identifying the popular
  meanings of queries using web search logs and user click behavior. We show
  our approach to produce more complete and user-centric intents than expert
  judges by evaluating on TREC queries. This approach was also used by the
  TREC 2009 Web Track judges to obtain more representative topic descriptions
  from real queries.} 
}

@InProceedings{RadlinskiSzummerCraswell-metrics-sigir10,
  author =    {Filip Radlinski and Martin Szummer and Nick Craswell},
  title =     {Metrics for Assessing Sets of Subtopics},
  booktitle = {{SIGIR} Conf.\ Research and Development in Information Retrieval}, 
  year =      2010,
  pages =     {853--854},
  url =
  {\url{http://research.microsoft.com/pubs/130615/RadlinskiSzummerCraswell-metrics-sigir10.pdf}},
  abstract = {To evaluate the diversity of search results, test collections
  have been developed that identify multiple intents for each query. Intents
  are the different meanings or facets that should be covered in a search
  results list. This means that topic development involves proposing a set of
  intents. We propose four measurable properties of query-to-intent mappings,
  allowing for more principled topic development for such test collections.}
}

@InProceedings{TorresaniSzummerFitzgibbon10,
  author =    {Lorenzo Torresani and Martin Szummer and Andrew Fitzgibbon},
  title =     {Efficient Object Category Recognition using Classemes},
  booktitle = {European Conference on Computer Vision ({ECCV})},
  pages =     {776--789},                   
  year =      2010,
  month =     sep,
  series =    {{LNCS} 6311},                  
  url   =     {\url{http://research.microsoft.com/pubs/136846/TorresaniSzummerFitzgibbon-classemes-eccv10.pdf}},
  abstract = {We introduce a new descriptor for images which allows the construction of
    efficient and compact classifiers with good accuracy on object category
    recognition.  The descriptor is the output of a large number of weakly
    trained object category classifiers on the image.  The trained categories are
    selected from an ontology of visual concepts, but the intention is not to
    encode an explicit decomposition of the scene.  Rather, we accept that
    existing object category classifiers often encode not the category per
    se but ancillary image characteristics; and that these ancillary
    characteristics can combine to represent visual classes unrelated to the
    constituent categories' semantic meanings.

    The advantage of this descriptor is that it allows object-category queries
    to be made against image databases using efficient classifiers (efficient
    at test time) such as linear support vector machines, and allows these
    queries to be for novel categories.  Even when the representation is
    reduced to 200 bytes per image, classification accuracy on object category
    recognition is comparable with the state of the art (36\% versus 42\%), but
    at orders of magnitude lower computational cost.
}}

@InProceedings{Szummer-semisup-rank-cikm11,
  author =       {Martin Szummer and Emine Yilmaz},
  title =        {Semi-supervised Learning to Rank with Preference Regularization},
  booktitle = cikm,
  month =     oct,                
  year =      2011,
  url = {\url{http://research.microsoft.com/pubs/154323/SzummerYilmaz-semisupervised-ranking-cikm11.pdf}},
  abstract = {We propose a semi-supervised learning to rank algorithm. 
    It learns from both labeled data (pairwise preferences or absolute labels)
    and unlabeled data. 
    The data can consist of multiple groups of items (such as queries), some of
    which may contain only unlabeled items.
    We introduce a preference regularizer favoring that similar items are
    similar in preference to each other. The regularizer captures manifold
    structure in the data, and we also propose a rank-sensitive version
    designed for top-heavy retrieval metrics including NDCG and mean average
    precision. 

    The regularizer is employed in SSLambdaRank, a semi-supervised version of
    LambdaRank. This algorithm directly optimizes popular retrieval metrics
    and improves retrieval accuracy over LambdaRank,
    a state-of-the-art ranker that was used as part of the winner of the Yahoo!
    Learning to Rank challenge 2010. The algorithm runs in linear time in the
    number of queries, and can work with huge datasets.
 }
}
% pages unknown
                  
@InProceedings{Wang-relevance-feedback-cikm11,
  author =       {Chang Wang and Emine Yilmaz and Martin Szummer},
  title =        {Relevance Feedback Exploiting Query-Specific Document Manifolds},
  booktitle = cikm,
  month =     oct,
  year =      2011,
  url  = {http://research.microsoft.com/pubs/154324/WangYSzummer-relevance-manifold-cikm11.pdf},
  abstract = {
   We incorporate relevance feedback into a learning to rank
   framework by exploiting query-specific document similarities. Given
   a few judged feedback documents and many retrieved but unjudged
   documents for a query, we learn a function that adjusts the initial ranking
   score of each document. Scores are fit so that
   documents with similar term content get similar scores, and scores
   of judged documents are close to their labels. By such smoothing
   along the manifold of retrieved documents, we avoid overfitting,
   and can therefore learn a detailed query-specific scoring function
   with several dozen term weights.
  }
}
% pages unknown