@INPROCEEDINGS{ZadroznyEtAl94, author = {Wlodek Zadrozny and Marcin Szummer and Stanislaw Jarecki and David Johnson and Leora Morgenstern}, title = {{NL} Understanding with a Grammar of Constructions}, booktitle = {Intl.\ Conf.\ on Computational Linguistics ({COLING})}, year = {1994}, month = aug, volume = {15}, pages = {1289--1293}, address = {Kyoto, Japan}, url = {http://research.microsoft.com/~szummer/papers/coling-94/ZadroznySzummerJarecki-coling94.ps.gz}, abstract = {We present an approach to natural language understanding based on a computable grammar of constructions. A "construction" consists of a set of features of form and a description of meaning in a context. A grammar is a set of constructions. This kind of grammar is the key element of Mincal, an implemented natural language, speech-enabled interface to an on-line calendar system. The system consists of a NL grammar, a parser, an on-line calendar, a domain knowledge base (about dates, times and meetings), an application knowledge base (about the calendar), a speech recognizer, a speech generator, and the interfaces between those modules. We claim that this architecture should work in general for spoken interfaces in small domains. In this paper we present two novel aspects of the architecture: (a) the use of constructions, integrating descriptions of form, meaning and context into one whole; and (b) the separation of domain knowledge from application knowledge. We describe the data structures for encoding constructions, the structure of the knowledge bases, and the interactions of the key modules of the system.}, } % note = {\url{http://vismod.media.mit.edu/pub/tech-reports/TR-346.ps.Z}}, @MastersThesis{Szummer95, author = {Martin Szummer}, title = {Temporal Texture Modeling}, school = {{MIT} Media Lab Perceptual Computing}, year = {1995}, month = sep, number = {346}, annote = {Master's thesis}, url = {http://research.microsoft.com/~szummer/papers/Szummer-MEng-thesis.ps.gz}, abstract = {Temporal textures are textures with motion. Examples include wavy water, rising steam and a crowd milling about. We model image sequences of temporal textures using the spatio-temporal autoregressive model (STAR). This model expresses each pixel as a linear combination of surrounding pixels lagged both in space and in time. The model provides a basis both for recognition and synthesis. We show how the least squares method can accurately estimate model parameters for large, causal neighborhoods with more than 1000 parameters. Synthesis results show that the model can adequately capture the spatial and temporal characteristics of many temporal textures.} } @UNPUBLISHED{Szummer95b, author = {Martin Szummer}, title = {An Image Browser that learns from User Interaction}, year = {1995}, month = dec, url = {http://research.microsoft.com/~szummer/learning-browser/learning-browser.html}, note = {\url{http://xenia.media.mit.edu/~szummer/learning-browser.html}}, abstract = {Large image databases with millions of images are being built. It is very tedious to browse these databases; the user will only have time to see a small fraction of the images. Currently, there are very few tools that assist the user in finding the right selection of images. This project combines learning algorithms and machine vision techniques to create a flexible and powerful image browser. The user is presented with a selection of images. They select positive and negative examples of the type of images they want to see or avoid seeing. The browser analyzes the examples and chooses the best search metrics. It then uses these metrics to find images similar to the examples. The results form a hierarchy that the user can browse with a tree browser. Next, the user selects more positive and negative examples, and the process repeats.}, } % note = {\url{http://vismod.media.mit.edu/pub/tech-reports/TR-381.ps.Z}}, @INPROCEEDINGS{Szummer96a, author = {Martin Szummer and Rosalind W. Picard}, title = {Temporal Texture Modeling}, booktitle = {{IEEE} Intl.\ Conf.\ on Image Processing ({ICIP})}}, year = {1996}, month = sep, volume = {3}, pages = {823--826}, address = {Lausanne, Switzerland}, url = {http://research.microsoft.com/~szummer/papers/icip-96/SzummerPicard-icip96.pdf}, abstract = {Temporal textures are textures with motion. Examples include wavy water, rising steam and fire. We model image sequences of temporal textures using the spatio-temporal autoregressive model (STAR). This model expresses each pixel as a linear combination of surrounding pixels lagged both in space and in time. The model provides a base for both recognition and synthesis. We show how the least squares method can accurately estimate model parameters for large, causal neighborhoods with more than 1000 parameters. Synthesis results show that the model can adequately capture the spatial and temporal characteristics of many temporal textures. A 95\% recognition rate is achieved for a 135 element database with 15 texture classes.}, } % note = {\url{http://vismod.media.mit.edu/pub/tech-reports/TR-382.ps.Z}}, @INPROCEEDINGS{Szummer96b, author = {Rosalind W. Picard and Thomas P. Minka and Martin Szummer}, title = {Modeling User Subjectivity In Image Libraries}, booktitle = {{IEEE} Intl.\ Conf.\ on Image Processing ({ICIP})}}, year = {1996}, month = sep, volume = {2}, pages = {777--780}, address = {Lausanne, Switzerland}, url = {http://research.microsoft.com/~szummer/papers/icip-96/PicardMinkaSzummer-icip96.pdf}, abstract = {In addition to the problem of {\it which} image analysis models to use in digital libraries, e.g.\ wavelet, Wold, color histograms, is the problem of {\it how} to combine these models with their different strengths. Most present systems place the burden of combination on the user, e.g. the user specifies 50\% texture features, 20\% color features, etc. This is a problem since most users do not know how to best pick the settings for the given data and search problem. This paper addresses this problem, describing research in progress for a system that (1) automatically infers which combination of models best represents the data of interest to the user and (2) learns continuously during interaction with each user. In particular, these two components -- inference and learning -- provide a solution that adapts to the subjective and hard-to-predict behaviors frequently seen when people query or browse image libraries.}, } % note = {\url{http://vismod.media.mit.edu/pub/tech-reports/TR-445.ps.Z}}, @INPROCEEDINGS{Szummer98, author = {Martin Szummer and Rosalind W. Picard}, title = {Indoor-Outdoor Image Classification}, booktitle = {{IEEE} International Workshop on Content-Based Access of Image and Video Databases ({CAIVD})}, year = {1998}, month = jan, pages = {42--51}, address = {Bombay, India}, url = {http://research.microsoft.com/~szummer/papers/caivd98/SzummerPicard-caivd98.ps.gz}, abstract = {We show how high-level scene properties can be inferred from classification of low-level image features, specifically for the indoor-outdoor scene retrieval problem. We systematically studied the features: (1) histograms in the Ohta color space (2) multiresolution, simultaneous autoregressive model parameters (3) coefficients of a shift-invariant DCT. We demonstrate that performance is improved by computing features on subblocks, classifying these subblocks, and then combining these results in a way reminiscent of ``stacking.'' State of the art single-feature methods are shown to result in about 75--86\% performance, while the new method results in 90.3\% correct classification, when evaluated on a diverse database of over 1300 consumer images provided by Kodak.}, } @INPROCEEDINGS{Sorgel98, author = {Wolfgang S{\"o}rgel and Sabine Girod and Martin Szummer and Bernd Girod}, title = {Computer Aided Diagnosis of Bone Lesions in the Facial Skeleton}, booktitle = {Workshop Bildverarbeitung f{\"u}r die Medizin}, year = {1998}, address = {Aachen, Germany}, month = mar, url = {http://research.microsoft.com/~szummer/papers/SorgelEtal-aachen98.pdf}, abstract = {We present a system for computer aided diagnosis of bone tumors in the facial skeleton. There are many different lesions with ra­diographic manifestation in the jaws. Our system helps performing the differential diagnosis of these. The input is a digitized orthopantomograph (OPG) in which the user marks the position of the lesion with a single mouse click. An active contour model then automatically finds the boundaries of the lesion. Gray­level histograms, MRSAR texture featu­res and Gabor filter features are computed for the lesion region. These features are then combined and used to query a database containing expert-diagnosed reference cases. The result is a number of similar cases, with tumor position marked and with available expert annotations. We show good agreement between our results and differential diagnosis given by humans. The system is also a suitable tool for training and education.} } %% crossref = {NIPS-13}, %% note = {\url{http://www.ai.mit.edu/people/szummer/papers/kernelexp-nips00.ps.gz}}, @INPROCEEDINGS{Szummer-kernel-expansions, author = {Martin Szummer and Tommi Jaakkola}, title = {Kernel expansions with unlabeled examples}, pages = {626--632}, year = {2001}, month = jan, volume = {13}, publisher = {{MIT} Press}, booktitle = {Advances in Neural Information Processing Systems ({NIPS})}, url = {http://research.microsoft.com/~szummer/papers/SzummerJaakkola-nips00.pdf}, abstract = {Modern classification applications necessitate supplementing the few available labeled examples with unlabeled examples to improve classification performance. We present a new tractable algorithm for exploiting unlabeled examples in discriminative classification. This is achieved essentially by expanding the input vectors into longer feature vectors via both labeled and unlabeled examples. The resulting classification method can be interpreted as a discriminative kernel density estimate and is readily trained via the EM algorithm, which in this case is both discriminative and achieves the optimal solution. We provide, in addition, a purely discriminative formulation of the estimation problem by appealing to the maximum entropy framework. We demonstrate that the proposed approach requires very few labeled examples for high classification accuracy.}, } % note = {\url{http://www.ai.mit.edu/people/szummer/}}, @INPROCEEDINGS{Szummer-partially-labeled-walks, author = {Martin Szummer and Tommi Jaakkola}, title = {Partially labeled classification with Markov random walks}, pages = {945--952}, year = {2002}, month = jan, volume = {14}, publisher = {{MIT} Press}, booktitle = {Advances in Neural Information Processing Systems ({NIPS})}, url = {http://research.microsoft.com/~szummer/papers/SzummerJaakkola-nips01.pdf}, abstract = {To classify a large number of unlabeled examples we combine a limited number of labeled examples with a Markov random walk representation over the unlabeled examples. The random walk representation exploits any low dimensional structure in the data in a robust, probabilistic manner. We develop and compare several estimation criteria/algorithms suited to this representation. This includes in particular multi-way classification with an average margin criterion which permits a closed form solution. The time scale of the random walk regularizes the representation and can be set through a margin-based criterion favoring unambiguous classification. We also extend this basic regularization by adapting time scales for individual examples. We demonstrate the approach on synthetic examples and on text classification problems.}, } % note = {\url{http://www.ai.mit.edu/people/szummer/}}, @INPROCEEDINGS{SzummerJaakkola02, author = {Martin Szummer and Tommi Jaakkola}, title = {Information Regularization with Partially Labeled Data}, pages = {1025--1032}, year = {2003}, month = jan, volume = {15}, publisher = {{MIT} Press}, booktitle = {Advances in Neural Information Processing Systems ({NIPS})}, url = {http://research.microsoft.com/~szummer/papers/SzummerJaakkola-nips02.pdf}, abstract = {Classification with partially labeled data requires using a large number of unlabeled examples (or an estimated marginal $P(x)$), to further constrain the conditional $P(y|x)$ beyond a few available labeled examples. We formulate a regularization approach to linking the marginal and the conditional in a general way. The regularization penalty measures the information that is implied about the labels over covering regions. No parametric assumptions are required and the approach remains tractable even for continuous marginal densities $P(x)$. We develop algorithms for solving the regularization problem for finite covers, establish a limiting differential equation, and exemplify the behavior of the new regularization approach in simple cases.}, abstract_html = {Classification with partially labeled data requires using a large number of unlabeled examples (or an estimated marginal P(x)), to further constrain the conditional P(y|x) beyond a few available labeled examples. We formulate a regularization approach to linking the marginal and the conditional in a general way. The regularization penalty measures the information that is implied about the labels over covering regions. No parametric assumptions are required and the approach remains tractable even for continuous marginal densities P(x). We develop algorithms for solving the regularization problem for finite covers, establish a limiting differential equation, and exemplify the behavior of the new regularization approach in simple cases.}, } % Kjaerulff - a and e should be merged, but tool does not support it, so replaced directly by Windows char, Kjærulff @INPROCEEDINGS{Yeang-markov-walks-uai03, author = {Chen-Hsiang Yeang and Martin Szummer}, title = {Markov Random Walk Representations with Continuous Distributions}, booktitle = {Proc.\ Uncertainty in Artificial Intelligence ({UAI})}, year = {2003}, month = aug, editor = {U. Kj{\ae}rulff and C. Meek}, pages = {600--607}, publisher = {Morgan Kaufmann}, url = {http://research.microsoft.com/~szummer/papers/YeangSzummer-uai03.ps.gz}, abstract = {Representations based on random walks can exploit discrete data distributions for clustering and classification. We extend such representations from discrete to continuous distributions. Transition probabilities are now calculated using a diffusion equation with a diffusion coefficient that varies inversely with the data density. We relate this diffusion equation to a path integral and derive the corresponding path probability measure. The framework is useful for incorporating continuous data densities and prior knowledge.}, } @INPROCEEDINGS{Szummer-incorporating-context-aaai04, author = {Martin Szummer and Philip J. Cowans}, title = {Incorporating Context and User Feedback in Pen-Based Interfaces}, booktitle = {{AAAI} Fall symposium, Making Pen-Based Interaction Intelligent and Natural}, year = {2004}, month = oct, editor = {R. Davis and J. Landay et al.}, series = {FS-04-06}, pages = {159--166}, publisher = {{AAAI Press}}, url = {http://research.microsoft.com/~szummer/papers/SzummerCowans-aaai04.pdf}, abstract = {We propose a joint probabilistic model for grouping and labeling hand-drawn ink strokes. We demonstrate that simultaneous grouping and labeling yields superior accuracy to labeling alone. Our probabilistic formulation has many advantages, exact inference is feasible, and we obtain confidence estimates. We show how to incorporate user feedback by conditioning our model, and discuss different types of inference tasks suited for various user interactions.}, } @INPROCEEDINGS{Krishnapuram-generative-models-iwfhr04, author = {Balaji Krishnapuram and Christopher M. Bishop and Martin Szummer}, title = {Generative Models and {B}ayesian Model Comparison for Shape Recognition}, booktitle = {9th Intl.\ Workshop on Frontiers in Handwriting Recognition ({IWFHR})}, year = {2004}, month = oct, editor = {F. Kimura and H. Fujisawa}, pages = {20--25}, organization = {{IEEE}}, url = {http://research.microsoft.com/~szummer/papers/KrishnapuramBishopSzummer-iwfhr04.pdf}, abstract = {Recognition of hand-drawn shapes is an important and widely studied problem. By adopting a generative probabilistic framework we are able to formulate a robust and flexible approach to shape recognition which allows for a wide range of shapes and which can recognize new shapes from a single exemplar. It also provides meaningful probabilistic measures of model score which can be used as part of a larger probabilistic framework for interpreting a page of ink. We also show how Bayesian model comparison allows the trade-off between data fit and model complexity to be optimized automatically.}, } @INPROCEEDINGS{Szummer-contextual-recog-iwfhr04, author = {Martin Szummer and Yuan Qi}, title = {Contextual Recognition of Hand-drawn Diagrams with Conditional Random Fields}, booktitle = {9th Intl.\ Workshop on Frontiers in Handwriting Recognition ({IWFHR})}, year = {2004}, month = oct, editor = {F. Kimura and H. Fujisawa}, pages = {32--37}, organization = {{IEEE}}, url = {http://research.microsoft.com/~szummer/papers/Szummer-Qi-iwfhr04.pdf}, abstract = { Hand-drawn diagrams present a complex recognition problem. Fragments of the drawing are often individually ambiguous, and require context to be interpreted. We present a recognizer based on conditional random fields (CRFs) that jointly analyze all drawing fragments in order to incorporate contextual cues. The classification of each fragment influences the classification of its neighbors. CRFs allow flexible and correlated features, and take temporal information into account. Training is done via conditional MAP estimation that is guaranteed to reach the global optimum. During recognition we propagate information globally to find the joint MAP or maximum marginal solution for each fragment. We demonstrate the framework on a container versus connector recognition task.}, } % Best student paper award. @INPROCEEDINGS{Cowans-partitioning-aistats05, author = {Philip J. Cowans and Martin Szummer}, title = {A Graphical Model for Simultaneous Partitioning and Labeling}, booktitle = {AI \& Statistics {AISTATS}}, year = {2005}, month = jan, comment = {Best student paper award. Tenth International Workshop on Artificial Intelligence and Statistics}, url = {http://research.microsoft.com/~szummer/papers/CowansSzummer-aistats05.pdf}, abstract = {In this work we develop a graphical model for describing probability distributions over labeled partitions of an undirected graph which are conditioned on observed data. We show how to efficiently perform exact inference in these models, by exploiting the structure of the graph and adapting the sum-product and max-product algorithms. We demonstrate our approach on the task of segmenting and labeling hand-drawn ink fragments, and show that a significant performance increase is obtained by labeling and partitioning simultaneously.}, } @INPROCEEDINGS{Qi-BCRF-aistats05, author = {Yuan Qi and Martin Szummer and Thomas P. Minka}, title = {Bayesian Conditional Random Fields}, booktitle = {AI \& Statistics {AISTATS}}, year = {2005}, month = jan, pages = {269--276}, url = {http://research.microsoft.com/~szummer/papers/QiSzummerMinka-aistats05.pdf}, comment = {Tenth International Workshop on Artificial Intelligence and Statistics}, abstract = { We propose Bayesian Conditional Random Fields (BCRFs) for classifying interdependent and structured data, such as sequences, images or webs. BCRFs are a Bayesian approach to training and inference with conditional random fields, which were previously trained by maximizing likelihood (ML) (Lafferty et al., 2001). Our framework avoids the problem of overfitting, and offers the full advantages of a Bayesian treatment. Unlike the ML approach, we estimate the posterior distribution of the model parameters during training, and average predictions over this posterior during inference. We apply two extensions of expectation propagation (EP), the power EP and the novel transformed EP methods, to incorporate the partition function. For algorithmic stability and accuracy, we flatten the approximation structures to avoid two-level approximations. We demonstrate the superior prediction accuracy of BCRFs over conditional random fields trained with ML or MAP on synthetic and real datasets}, } % URL not available as not final version % TODO: set year & month to correct value @ARTICLE{Qi-BCRF-jmlr06, author = {Yuan Qi and Martin Szummer and Thomas P. Minka}, title = {Bayesian Conditional Random Fields}, journal = {Journal of Machine Learning Research ({JMLR})}, year = {2010}, month = dec, url = {http://research.microsoft.com/~szummer/papers/paper-not-available.html}, abstract = { We propose Bayesian Conditional Random Fields (BCRFs) for classifying interdependent and structured data, such as sequences, images or webs. BCRFs are a Bayesian approach to training and inference with conditional random fields, which were previously trained by maximizing likelihood (ML) (Lafferty et al., 2001). Our framework avoids the problem of overfitting, and offers the full advantages of a Bayesian treatment. Unlike the ML approach, we estimate the posterior distribution of the model parameters during training, and average predictions over this posterior during inference. We apply two extensions of expectation propagation (EP), the power EP and the novel transformed EP methods, to incorporate the partition function. For algorithmic stability and accuracy, we flatten the approximation structures to avoid two-level approximations. We demonstrate the superior prediction accuracy of BCRFs over conditional random fields trained with ML or MAP on synthetic and real datasets.}, } @INPROCEEDINGS{Qi-BCRF-cvpr05, author = {Yuan Qi and Martin Szummer and Thomas P. Minka}, title = {Diagram Structure Recognition by Bayesian Conditional Random Fields}, booktitle = {Proc.\ Comp.\ Vision Pattern Recogn ({CVPR})}, pages = {191--196}, editor = {C. Schmid and S. Soatto and C. Tomasi}, year = {2005}, month = jun, url = {http://research.microsoft.com/~szummer/papers/QiSzummerMinka-cvpr05.pdf}, abstract = {Hand-drawn diagrams present a complex recognition problem. Elements of the diagram are often individually ambiguous, and require context to be interpreted. We present a recognition method based on Bayesian conditional random fields (BCRFs) that jointly analyzes all drawing elements in order to incorporate contextual cues. The classification of each object affects the classification of its neighbors. BCRFs allow flexible and correlated features, and take both spatial and temporal information into account. BCRFs estimate the posterior distribution of parameters during training, and average predictions over the posterior for testing. As a result of model averaging, BCRFs avoid the overfitting problems associated with maximum likelihood training. We also incorporate Automatic Relevance Determination (ARD), a Bayesian feature selection technique, into BCRFs. The result is significantly lower error rates compared to ML- and MAP-trained CRFs.}, } @INPROCEEDINGS{Szummer-icdar05, author = {Martin Szummer}, title = {Learning Diagram Parts with Hidden Random Fields}, booktitle = {Intl.\ Conf.\ Document Analysis and Recognition ({ICDAR})}, pages = {1188--1193}, year = {2005}, month = aug, url = {http://research.microsoft.com/~szummer/papers/Szummer-icdar05.pdf}, abstract = {Many diagrams contain compound objects composed of parts. We propose a recognition framework that learns parts in an unsupervised way, and requires training labels only for compound objects. Thus, human labeling effort is reduced and parts are not predetermined, instead appropriate parts are discovered based on the data. We model contextual relations between parts, such that the label of a part can depend simultaneously on the labels of its neighbors, as well as spatial and temporal information. The model is a Hidden Random Field (HRF), an extension of a Conditional Random Field. We apply it to find parts of boxes, arrows and flowchart shapes in hand-drawn diagrams, and also demonstrate improved recognition accuracy over the conditional random field model without parts.}, } @INPROCEEDINGS{Szummer-disc-adapt-iwfhr06, author = {Martin Szummer and Christopher M. Bishop}, title = {Discriminative Writer Adaptation}, booktitle = {10th Intl.\ Workshop on Frontiers in Handwriting Recognition ({IWFHR})}, pages= {293--298}, year = {2006}, month = oct, url = {http://research.microsoft.com/~szummer/papers/SzummerBishop-IWFHR06.pdf}, abstract = {We propose a general method for adapting a writer-independent classifier to an individual writer. We employ a mixture of experts formulation, where the classifiers are trained on weighted clusters of writers. The clusters are determined by which experts classify individual writing correctly. The method adapts by choosing the appropriate combination of classifiers for a new user. It applies to any probabilistic discriminative classifier, and adapts discriminatively without modeling the input feature distribution. We apply the method to online character recognition. Specifically, we use a mixture of neural networks as well as a mixture of logistic regressions. We train the mixture via conjugate gradient ascent or via the EM algorithm on 192,000 Latin characters of 98 classes and 216 writers, and show adaptation results for 21 writers.}, } @InProceedings{Mickens-Szummer-Narayanan07, author = {James Mickens and Martin Szummer and Dushyanth Narayanan}, title = {Snitch: {I}nteractive Decision Trees for Troubleshooting Misconfigurations}, booktitle = {{SysML07}: 2nd Workshop on Tackling Computer Systems Problems with Machine Learning Techniques}, year = 2007, month = apr, url = {http://research.microsoft.com/~szummer/papers/MickensSzummerNarayanan-Snitch-sysml07.pdf}, abstract = { Troubleshooting misconfigurations of modern applications is difficult due to their large and complex state. Snitch is a prototype tool that assists human troubleshooters by finding relationships between application state and subsequent faults. It correlates configuration state and application errors across many machines and users, and across long periods of time. Snitch aids the human expert in extracting patterns from this rich but enormous data set by building decision trees pinpointing potential configuration problems. We applied Snitch to 114 GB of configuration traces from 151 machines over 567 days. We illustrate how Snitch can suggest misconfigurations in case studies of two Windows applications: Messenger and Outlook.} } @InProceedings{Rother-QPBO07, author = {Carsten Rother and Vladimir Kolmogorov and Victor Lempitsky and Martin Szummer}, title = {Optimizing Binary {MRFs} via Extended Roof Duality}, booktitle = {Proc.\ Comp.\ Vision Pattern Recogn.\ ({CVPR})}, year = {2007}, month = jun, url = {http://research.microsoft.com/~szummer/papers/Rother-etal-QPBO-cvpr07.pdf}, abstract = { Many computer vision applications rely on the efficient optimization of challenging, so-called non-submodular, binary pairwise MRFs. A promising graph cut based approach for optimizing such MRFs known as "roof duality" was recently introduced into computer vision. We study two methods which extend this approach. First, we discuss an efficient implementation of the "probing" technique introduced recently by Boros et al. 2006. It simplifies the MRF while preserving the global optimum. Our code is 400-700 faster on some graphs than the implementation of [Boros 2006]. Second, we present a new technique which takes an arbitrary input labeling and tries to improve its energy. We give theoretical characterizations of local minima of this procedure. We applied both techniques to many applications, including image segmentation, new view synthesis, super-resolution, diagram recognition, parameter learning, texture restoration, and image deconvolution. For several applications we see that we are able to find the global minimum very efficiently, and considerably outperform the original roof duality approach. In comparison to existing techniques, such as graph cut, TRW, BP, ICM, and simulated annealing, we nearly always find a lower energy. } } @InProceedings{Craswell-Szummer-random-walks-sigir07, author = {Nick Craswell and Martin Szummer}, title = {Random Walks on the Click Graph}, booktitle = {{SIGIR} Conf.\ Research and Development in Information Retrieval}, year = 2007, month = jul, pages = {239--246}, url = {http://research.microsoft.com/~szummer/papers/CraswellSzummer-random-walks-sigir07.pdf}, abstract = {Search engines can record which documents were clicked for which query, and use these query-document pairs as 'soft' relevance judgments. However, compared to the true judgments, click logs give noisy and sparse relevance information. We apply a Markov random walk model to a large click log, producing a probabilistic ranking of documents for a given query. A key advantage of the model is its ability to retrieve relevant documents that have not yet been clicked for that query and rank those effectively. We conduct experiments on click logs from image search, comparing our ('backward') random walk model to a different ('forward') random walk, varying parameters such as walk length and self-transition probability. The most effective combination is a long backward walk with high self-transition probability.} } @InProceedings{Szummer-Craswell-behavioral-classification-www08, author = {Martin Szummer and Nick Craswell}, title = {Behavioral Classification on the Click Graph}, booktitle = {World Wide Web Conference {WWW}}, year = 2008, month = apr, pages = {1241--1242}, url = {http://research.microsoft.com/~szummer/papers/SzummerCraswell-behavioral-classification-www08.pdf}, abstract = {A bipartite query-URL graph, where an edge indicates that a document was clicked for a query, is a useful construct for finding groups of related queries and URLs. Here we use this behavior graph for classification. We choose a click graph sampled from two weeks of image search activity, and the task of ``adult'' filtering: identifying content in the graph that is inappropriate for minors. We show how to perform classification using random walks on this graph, and two methods for estimating classifier parameters.} } @InProceedings{Ranzato-Szummer-semisupervised-icml08, author = {Marc'Aurelio Ranzato and Martin Szummer}, title = {Semi-supervised Learning of Compact Document Representations with Deep Networks}, booktitle = {Proc.\ Intl.\ Conf.\ on Machine Learning ({ICML})}, month = jul, year = 2008, pages = {792--799}, url = {http://research.microsoft.com/~szummer/papers/RanzatoSzummer-semisupervised-deep-icml08.pdf}, abtract = {Finding a good representation of text documents is crucial in information retrieval and classification systems. Today the most popular document representation is based on a vector of word counts in the document. This representation neither captures dependencies between related words, nor handles synonyms or polysemous words. In this paper, we propose an algorithm to learn text document representations based on semi-supervised autoencoders that are stacked to form a deep network. The model can be efficiently trained on partially labeled corpora, producing a very compact representation of a document, while retaining as much class information and joint word statistics as possible. We show that it is advantageous to exploit even a few labeled samples during training.} } % pages = fix % URL - update to the following after SIGIR; already final version in uploaded % url = {http://research.microsoft.com/~szummer/papers/ZoeterEtAl-decision-theoretic-lr4ir-sigir08.pdf}, @InProceedings{ZoeterEtAl-decision-theoretic-lr4ir-sigir08, author = {Onno Zoeter and Michael Taylor and Ed Snelson and John Guiver and Nick Craswell and Martin Szummer}, title = {A Decision Theoretic Framework for Ranking using Implicit Feedback}, booktitle = {{SIGIR} 2008 Workshop on Learning to Rank for Information Retrieval}, year = 2008, month = jul, url = {http://research.microsoft.com/~szummer/papers/paper-not-available.html}, abstract = { This paper presents a decision theoretic ranking system that incorporates both explicit and implicit feedback. The system has a model that predicts, given all available data at query time, different interactions a person might have with search results. Possible interactions include relevance labelling and clicking. We define a utility function that takes as input the outputs of the interaction model to provide a real valued score to the user's session. The optimal ranking is the list of documents that, in expectation under the model, maximizes the utility for a user session. The system presented is based on a simple example utility function that combines both click behavior and labelling. The click prediction model is a Bayesian generalized linear model. Its notable characteristic is that it incorporates both weights for explanatory features and weights for each query-document pair. This allows the model to generalize to unseen queries but makes it at the same time flexible enough to keep in a `memory' where the model should deviate from its feature based prediction. Such a click-predicting model could be particularly useful in an application such as enterprise search, allowing on-site adaptation to local documents and user behaviour. The example utility function has a parameter that controls the tradeoff between optimizing for clicks and optimizing for labels. Experimental results in the context of enterprise search show that a balance in the tradeoff leads to the best NDCG and good (predicted) clickthrough. } } % pages = {}, % Missing % abstract - revise to the final version @InProceedings{SzummerKohliHoiem-learning-crf-cuts-eccv08, author = {Martin Szummer and Pushmeet Kohli and Derek Hoiem}, title = {Learning {CRFs} using Graph Cuts}, booktitle = {European Conference on Computer Vision ({ECCV})}, year = 2008, month = oct, url = {http://research.microsoft.com/~szummer/papers/SzummerKohliHoiem-learning-crf-cuts-eccv08.pdf}, abstract = { Many computer vision problems are naturally formulated as random fields, specifically MRFs or CRFs. The introduction of graph cuts has enabled efficient and optimal inference in associative random fields, greatly advancing applications such as segmentation, stereo reconstruction and many others. However, while fast inference is now widespread, parameter learning in random fields has remained an intractable problem. This paper shows how to apply fast inference algorithms, in particular graph cuts, to learn parameters of random fields with similar efficiency. We find optimal parameter values under standard regularized objective functions that ensure good generalization. Our algorithm enables learning of many parameters in reasonable time, and we explore further speedup techniques. We also discuss extensions to non-associative and multi-class problems. We evaluate the method on image segmentation and geometry recognition.} } @InProceedings{TorresaniSzummerFitzgibbon09, author = {Lorenzo Torresani and Martin Szummer and Andrew Fitzgibbon}, title = {Learning Query-dependent Prefilters for Scalable Image Retrieval}, booktitle = {Proc.\ Comp.\ Vision Pattern Recogn.\ ({CVPR})}, pages = {2615--2622}, year = 2009, month = jun, url = {http://research.microsoft.com/pubs/81353/TorresaniSzummerFitzgibbon-learning-query-cvpr09.pdf}, abstract = { We describe an algorithm for similar-image search which is designed to be efficient for extremely large collections of images. For each query, a small response set is selected by a fast prefilter, after which a more accurate ranker may be applied to each image in the response set. We consider a class of prefilters comprising disjunctions of conjunctions ("ORs of ANDs") of Boolean features. AND filters can be implemented efficiently using skipped inverted files, a key component of web-scale text search engines. These structures permit search in time proportional to the response set size. The prefilters are learned from training examples, and refined at query time to produce an approximately bounded response set. We cast prefiltering as an optimization problem: for each test query, select the OR-of-AND filter which maximizes training-set recall for an adjustable bound on response set size. This may be efficiently implemented by selecting from a large pool of candidate conjunctions of Boolean features using a linear program relaxation. Tests on object class recognition show that this relatively simple filter is nevertheless powerful enough to capture some semantic information. }} @InProceedings{RadlinskiSzummerCraswell-query-intents-www10, author = {Filip Radlinski and Martin Szummer and Nick Craswell}, title = {Inferring Query Intent from Reformulations and Clicks}, booktitle = {World Wide Web Conference ({WWW})}, pages = {1171--1172}, year = 2010, url = {\url{http://research.microsoft.com/pubs/122720/RadlinskiSzummerCraswell-query-intent-www10.pdf}}, abstract = {Many researchers have noted that web search queries are often ambiguous or unclear. We present an approach for identifying the popular meanings of queries using web search logs and user click behavior. We show our approach to produce more complete and user-centric intents than expert judges by evaluating on TREC queries. This approach was also used by the TREC 2009 Web Track judges to obtain more representative topic descriptions from real queries.} } @InProceedings{RadlinskiSzummerCraswell-metrics-sigir10, author = {Filip Radlinski and Martin Szummer and Nick Craswell}, title = {Metrics for Assessing Sets of Subtopics}, booktitle = {{SIGIR} Conf.\ Research and Development in Information Retrieval}, year = 2010, pages = {853--854}, url = {\url{http://research.microsoft.com/pubs/130615/RadlinskiSzummerCraswell-metrics-sigir10.pdf}}, abstract = {To evaluate the diversity of search results, test collections have been developed that identify multiple intents for each query. Intents are the different meanings or facets that should be covered in a search results list. This means that topic development involves proposing a set of intents. We propose four measurable properties of query-to-intent mappings, allowing for more principled topic development for such test collections.} } @InProceedings{TorresaniSzummerFitzgibbon10, author = {Lorenzo Torresani and Martin Szummer and Andrew Fitzgibbon}, title = {Efficient Object Category Recognition using Classemes}, booktitle = {European Conference on Computer Vision ({ECCV})}, pages = {776--789}, year = 2010, month = sep, series = {{LNCS} 6311}, url = {\url{http://research.microsoft.com/pubs/136846/TorresaniSzummerFitzgibbon-classemes-eccv10.pdf}}, abstract = {We introduce a new descriptor for images which allows the construction of efficient and compact classifiers with good accuracy on object category recognition. The descriptor is the output of a large number of weakly trained object category classifiers on the image. The trained categories are selected from an ontology of visual concepts, but the intention is not to encode an explicit decomposition of the scene. Rather, we accept that existing object category classifiers often encode not the category per se but ancillary image characteristics; and that these ancillary characteristics can combine to represent visual classes unrelated to the constituent categories' semantic meanings. The advantage of this descriptor is that it allows object-category queries to be made against image databases using efficient classifiers (efficient at test time) such as linear support vector machines, and allows these queries to be for novel categories. Even when the representation is reduced to 200 bytes per image, classification accuracy on object category recognition is comparable with the state of the art (36\% versus 42\%), but at orders of magnitude lower computational cost. }} @InProceedings{Szummer-semisup-rank-cikm11, author = {Martin Szummer and Emine Yilmaz}, title = {Semi-supervised Learning to Rank with Preference Regularization}, booktitle = cikm, month = oct, year = 2011, url = {\url{http://research.microsoft.com/pubs/154323/SzummerYilmaz-semisupervised-ranking-cikm11.pdf}}, abstract = {We propose a semi-supervised learning to rank algorithm. It learns from both labeled data (pairwise preferences or absolute labels) and unlabeled data. The data can consist of multiple groups of items (such as queries), some of which may contain only unlabeled items. We introduce a preference regularizer favoring that similar items are similar in preference to each other. The regularizer captures manifold structure in the data, and we also propose a rank-sensitive version designed for top-heavy retrieval metrics including NDCG and mean average precision. The regularizer is employed in SSLambdaRank, a semi-supervised version of LambdaRank. This algorithm directly optimizes popular retrieval metrics and improves retrieval accuracy over LambdaRank, a state-of-the-art ranker that was used as part of the winner of the Yahoo! Learning to Rank challenge 2010. The algorithm runs in linear time in the number of queries, and can work with huge datasets. } } % pages unknown @InProceedings{Wang-relevance-feedback-cikm11, author = {Chang Wang and Emine Yilmaz and Martin Szummer}, title = {Relevance Feedback Exploiting Query-Specific Document Manifolds}, booktitle = cikm, month = oct, year = 2011, url = {http://research.microsoft.com/pubs/154324/WangYSzummer-relevance-manifold-cikm11.pdf}, abstract = { We incorporate relevance feedback into a learning to rank framework by exploiting query-specific document similarities. Given a few judged feedback documents and many retrieved but unjudged documents for a query, we learn a function that adjusts the initial ranking score of each document. Scores are fit so that documents with similar term content get similar scores, and scores of judged documents are close to their labels. By such smoothing along the manifold of retrieved documents, we avoid overfitting, and can therefore learn a detailed query-specific scoring function with several dozen term weights. } } % pages unknown