1 import re
2 from copy import deepcopy
3 from xml.dom import minidom, getDOMImplementation
4
5 from intermine.util import openAnything, ReadableException
6 from intermine.pathfeatures import PathDescription, Join, SortOrder, SortOrderList
7 from intermine.model import Column, Class, Model, Reference, ConstraintNode
8 import constraints
9
10 """
11 Classes representing queries against webservices
12 ================================================
13
14 Representations of queries, and templates.
15
16 """
17
18 __author__ = "Alex Kalderimis"
19 __organization__ = "InterMine"
20 __license__ = "LGPL"
21 __contact__ = "dev@intermine.org"
22
23
24 -class Query(object):
25 """
26 A Class representing a structured database query
27 ================================================
28
29 Objects of this class have properties that model the
30 attributes of the query, and methods for performing
31 the request.
32
33 SYNOPSIS
34 --------
35
36 example:
37
38 >>> service = Service("http://www.flymine.org/query/service")
39 >>> query = service.new_query()
40 >>>
41 >>> query.add_view("Gene.symbol", "Gene.pathways.name", "Gene.proteins.symbol")
42 >>> query.add_sort_order("Gene.pathways.name")
43 >>>
44 >>> query.add_constraint("Gene", "LOOKUP", "eve")
45 >>> query.add_constraint("Gene.pathways.name", "=", "Phosphate*")
46 >>>
47 >>> query.set_logic("A or B")
48 >>>
49 >>> for row in query.rows():
50 ... handle_row(row)
51
52 OR, using an SQL style DSL:
53
54 >>> s = Service("www.flymine.org/query")
55 >>> query = s.query("Gene").\\
56 ... select("*", "pathways.*").\\
57 ... where("symbol", "=", "H").\\
58 ... outerjoin("pathways").\\
59 ... order_by("symbol")
60 >>> for row in query.rows(start=10, size=5):
61 ... handle_row(row)
62
63 OR, for a more SQL-alchemy, ORM style:
64
65 >>> for gene in s.query(s.model.Gene).filter(s.model.Gene.symbol == ["zen", "H", "eve"]).add_columns(s.model.Gene.alleles):
66 ... handle(gene)
67
68 Query objects represent structured requests for information over the database
69 housed at the datawarehouse whose webservice you are querying. They utilise
70 some of the concepts of relational databases, within an object-related
71 ORM context. If you don't know what that means, don't worry: you
72 don't need to write SQL, and the queries will be fast.
73
74 To make things slightly more familiar to those with knowledge of SQL, some syntactical
75 sugar is provided to make constructing queries a bit more recognisable.
76
77 PRINCIPLES
78 ----------
79
80 The data model represents tables in the databases as classes, with records
81 within tables as instances of that class. The columns of the database are the
82 fields of that object::
83
84 The Gene table - showing two records/objects
85 +---------------------------------------------------+
86 | id | symbol | length | cyto-location | organism |
87 +----------------------------------------+----------+
88 | 01 | eve | 1539 | 46C10-46C10 | 01 |
89 +----------------------------------------+----------+
90 | 02 | zen | 1331 | 84A5-84A5 | 01 |
91 +----------------------------------------+----------+
92 ...
93
94 The organism table - showing one record/object
95 +----------------------------------+
96 | id | name | taxon id |
97 +----------------------------------+
98 | 01 | D. melanogaster | 7227 |
99 +----------------------------------+
100
101 Columns that contain a meaningful value are known as 'attributes' (in the tables above, that is
102 everything except the id columns). The other columns (such as "organism" in the gene table)
103 are ones that reference records of other tables (ie. other objects), and are called
104 references. You can refer to any field in any class, that has a connection,
105 however tenuous, with a table, by using dotted path notation::
106
107 Gene.organism.name -> the name column in the organism table, referenced by a record in the gene table
108
109 These paths, and the connections between records and tables they represent,
110 are the basis for the structure of InterMine queries.
111
112 THE STUCTURE OF A QUERY
113 -----------------------
114
115 A query has two principle sets of properties:
116 - its view: the set of output columns
117 - its constraints: the set of rules for what to include
118
119 A query must have at least one output column in its view, but constraints
120 are optional - if you don't include any, you will get back every record
121 from the table (every object of that type)
122
123 In addition, the query must be coherent: if you have information about
124 an organism, and you want a list of genes, then the "Gene" table
125 should be the basis for your query, and as such the Gene class, which
126 represents this table, should be the root of all the paths that appear in it:
127
128 So, to take a simple example::
129
130 I have an organism name, and I want a list of genes:
131
132 The view is the list of things I want to know about those genes:
133
134 >>> query.add_view("Gene.name")
135 >>> query.add_view("Gene.length")
136 >>> query.add_view("Gene.proteins.sequence.length")
137
138 Note I can freely mix attributes and references, as long as every view ends in
139 an attribute (a meaningful value). As a short-cut I can also write:
140
141 >>> query.add_views("Gene.name", "Gene.length", "Gene.proteins.sequence.length")
142
143 or:
144
145 >>> query.add_views("Gene.name Gene.length Gene.proteins.sequence.length")
146
147 They are all equivalent. You can also use common SQL style shortcuts such as "*" for all
148 attribute fields:
149
150 >>> query.add_views("Gene.*")
151
152 You can also use "select" as a synonymn for "add_view"
153
154 Now I can add my constraints. As, we mentioned, I have information about an organism, so:
155
156 >>> query.add_constraint("Gene.organism.name", "=", "D. melanogaster")
157
158 (note, here I can use "where" as a synonymn for "add_constraint")
159
160 If I run this query, I will get literally millions of results -
161 it needs to be filtered further:
162
163 >>> query.add_constraint("Gene.proteins.sequence.length", "<", 500)
164
165 If that doesn't restrict things enough I can add more filters:
166
167 >>> query.add_constraint("Gene.symbol", "ONE OF", ["eve", "zen", "h"])
168
169 Now I am guaranteed to get only information on genes I am interested in.
170
171 Note, though, that because I have included the link (or "join") from Gene -> Protein,
172 this, by default, means that I only want genes that have protein information associated
173 with them. If in fact I want information on all genes, and just want to know the
174 protein information if it is available, then I can specify that with:
175
176 >>> query.add_join("Gene.proteins", "OUTER")
177
178 And if perhaps my query is not as simple as a strict cumulative filter, but I want all
179 D. mel genes that EITHER have a short protein sequence OR come from one of my favourite genes
180 (as unlikely as that sounds), I can specify the logic for that too:
181
182 >>> query.set_logic("A and (B or C)")
183
184 Each letter refers to one of the constraints - the codes are assigned in the order you add
185 the constraints. If you want to be absolutely certain about the constraints you mean, you
186 can use the constraint objects themselves:
187
188 >>> gene_is_eve = query.add_constraint("Gene.symbol", "=", "eve")
189 >>> gene_is_zen = query.add_constraint("Gene.symbol", "=", "zne")
190 >>>
191 >>> query.set_logic(gene_is_eve | gene_is_zen)
192
193 By default the logic is a straight cumulative filter (ie: A and B and C and D and ...)
194
195 Putting it all together:
196
197 >>> query.add_view("Gene.name", "Gene.length", "Gene.proteins.sequence.length")
198 >>> query.add_constraint("Gene.organism.name", "=", "D. melanogaster")
199 >>> query.add_constraint("Gene.proteins.sequence.length", "<", 500)
200 >>> query.add_constraint("Gene.symbol", "ONE OF", ["eve", "zen", "h"])
201 >>> query.add_join("Gene.proteins", "OUTER")
202 >>> query.set_logic("A and (B or C)")
203
204 This can be made more concise and readable with a little DSL sugar:
205
206 >>> query = service.query("Gene")
207 >>> query.select("name", "length", "proteins.sequence.length").\
208 ... where('organism.name' '=', 'D. melanogaster').\
209 ... where("proteins.sequence.length", "<", 500).\
210 ... where('symbol', 'ONE OF', ['eve', 'h', 'zen']).\
211 ... outerjoin('proteins').\
212 ... set_logic("A and (B or C)")
213
214 And the query is defined.
215
216 Result Processing: Rows
217 -----------------------
218
219 calling ".rows()" on a query will return an iterator of rows, where each row
220 is a ResultRow object, which can be treated as both a list and a dictionary.
221
222 Which means you can refer to columns by name:
223
224 >>> for row in query.rows():
225 ... print "name is %s" % (row["name"])
226 ... print "length is %d" % (row["length"])
227
228 As well as using list indices:
229
230 >>> for row in query.rows():
231 ... print "The first column is %s" % (row[0])
232
233 Iterating over a row iterates over the cell values as a list:
234
235 >>> for row in query.rows():
236 ... for column in row:
237 ... do_something(column)
238
239 Here each row will have a gene name, a gene length, and a sequence length, eg:
240
241 >>> print row.to_l
242 ["even skipped", "1359", "376"]
243
244 To make that clearer, you can ask for a dictionary instead of a list:
245
246 >>> for row in query.rows()
247 ... print row.to_d
248 {"Gene.name":"even skipped","Gene.length":"1359","Gene.proteins.sequence.length":"376"}
249
250
251 If you just want the raw results, for printing to a file, or for piping to another program,
252 you can request strings instead:
253
254 >>> for row in query.result("string")
255 ... print(row)
256
257 Result Processing: Results
258 --------------------------
259
260 Results can also be processing on a record by record basis. If you have a query that
261 has output columns of "Gene.symbol", "Gene.pathways.name" and "Gene.proteins.proteinDomains.primaryIdentifier",
262 than processing it by records will return one object per gene, and that gene will have a property
263 named "pathways" which contains objects which have a name property. Likewise there will be a
264 proteins property which holds a list of proteinDomains which all have a primaryIdentifier property, and so on.
265 This allows a more object orientated approach to database records, familiar to users of
266 other ORMs.
267
268 This is the format used when you choose to iterate over a query directly, or can be explicitly
269 chosen by invoking L{intermine.query.Query.results}:
270
271 >>> for gene in query:
272 ... print gene.name, map(lambda x: x.name, gene.pathways)
273
274 The structure of the object and the information it contains depends entirely
275 on the output columns selected. The values may be None, of course, but also any valid values of an object
276 (according to the data model) will also be None if they were not selected for output. Attempts
277 to access invalid properties (such as gene.favourite_colour) will cause exceptions to be thrown.
278
279 Getting us to Generate your Code
280 --------------------------------
281
282 Not that you have to actually write any of this! The webapp will happily
283 generate the code for any query (and template) you can build in it. A good way to get
284 started is to use the webapp to generate your code, and then run it as scripts
285 to speed up your queries. You can always tinker with and edit the scripts you download.
286
287 To get generated queries, look for the "python" link at the bottom of query-builder and
288 template form pages, it looks a bit like this::
289
290 . +=====================================+=============
291 | |
292 | Perl | Python | Java [Help] |
293 | |
294 +==============================================
295
296 """
297
298 SO_SPLIT_PATTERN = re.compile("\s*(asc|desc)\s*", re.I)
299 LOGIC_SPLIT_PATTERN = re.compile("\s*(?:and|or|\(|\))\s*", re.I)
300 TRAILING_OP_PATTERN = re.compile("\s*(and|or)\s*$", re.I)
301 LEADING_OP_PATTERN = re.compile("^\s*(and|or)\s*", re.I)
302 ORPHANED_OP_PATTERN = re.compile("(?:\(\s*(?:and|or)\s*|\s*(?:and|or)\s*\))", re.I)
303 LOGIC_OPS = ["and", "or"]
304 LOGIC_PRODUCT = [(x, y) for x in LOGIC_OPS for y in LOGIC_OPS]
305
306 - def __init__(self, model, service=None, validate=True, root=None):
307 """
308 Construct a new Query
309 =====================
310
311 Construct a new query for making database queries
312 against an InterMine data warehouse.
313
314 Normally you would not need to use this constructor
315 directly, but instead use the factory method on
316 intermine.webservice.Service, which will handle construction
317 for you.
318
319 @param model: an instance of L{intermine.model.Model}. Required
320 @param service: an instance of l{intermine.service.Service}. Optional,
321 but you will not be able to make requests without one.
322 @param validate: a boolean - defaults to True. If set to false, the query
323 will not try and validate itself. You should not set this to false.
324
325 """
326 self.model = model
327 if root is None:
328 self.root = root
329 else:
330 self.root = model.make_path(root).root
331
332 self.name = ''
333 self.description = ''
334 self.service = service
335 self.prefetch_depth = service.prefetch_depth if service is not None else 1
336 self.prefetch_id_only = service.prefetch_id_only if service is not None else False
337 self.do_verification = validate
338 self.path_descriptions = []
339 self.joins = []
340 self.constraint_dict = {}
341 self.uncoded_constraints = []
342 self.views = []
343 self._sort_order_list = SortOrderList()
344 self._logic_parser = constraints.LogicParser(self)
345 self._logic = None
346 self.constraint_factory = constraints.ConstraintFactory()
347
348
349 self.c = self.column
350 self.filter = self.where
351 self.add_column = self.add_view
352 self.add_columns = self.add_view
353 self.add_views = self.add_view
354 self.add_to_select = self.add_view
355 self.order_by = self.add_sort_order
356 self.all = self.get_results_list
357 self.size = self.count
358 self.summarize = self.summarise
359
361 """Return an iterator over all the objects returned by this query"""
362 return self.results("jsonobjects")
363
365 """Return the number of rows this query will return."""
366 return self.count()
367
369 """Construct a new list from the symmetric difference of these things"""
370 return self.service._list_manager.subtract([self], [other])
371
373 """Calculate the symmetric difference of this query and another"""
374 return self.service._list_manager.xor([self, other])
375
377 """
378 Intersect this query and another query or list
379 """
380 return self.service._list_manager.intersect([self, other])
381
383 """
384 Return the union of this query and another query or list.
385 """
386 return self.service._list_manager.union([self, other])
387
389 """
390 Return the union of this query and another query or list
391 """
392 return self.service._list_manager.union([self, other])
393
394 @classmethod
395 - def from_xml(cls, xml, *args, **kwargs):
396 """
397 Deserialise a query serialised to XML
398 =====================================
399
400 This method is used to instantiate serialised queries.
401 It is used by intermine.webservice.Service objects
402 to instantiate Template objects and it can be used
403 to read in queries you have saved to a file.
404
405 @param xml: The xml as a file name, url, or string
406
407 @raise QueryParseError: if the query cannot be parsed
408 @raise ModelError: if the query has illegal paths in it
409 @raise ConstraintError: if the constraints don't make sense
410
411 @rtype: L{Query}
412 """
413 obj = cls(*args, **kwargs)
414 obj.do_verification = False
415 f = openAnything(xml)
416 doc = minidom.parse(f)
417 f.close()
418
419 queries = doc.getElementsByTagName('query')
420 if len(queries) != 1:
421 raise QueryParseError("wrong number of queries in xml. "
422 + "Only one <query> element is allowed. Found %d" % len(queries))
423 q = queries[0]
424 obj.name = q.getAttribute('name')
425 obj.description = q.getAttribute('description')
426 obj.add_view(q.getAttribute('view'))
427 for p in q.getElementsByTagName('pathDescription'):
428 path = p.getAttribute('pathString')
429 description = p.getAttribute('description')
430 obj.add_path_description(path, description)
431 for j in q.getElementsByTagName('join'):
432 path = j.getAttribute('path')
433 style = j.getAttribute('style')
434 obj.add_join(path, style)
435 for c in q.getElementsByTagName('constraint'):
436 args = {}
437 args['path'] = c.getAttribute('path')
438 if args['path'] is None:
439 if c.parentNode.tagName != "node":
440 msg = "Constraints must have a path"
441 raise QueryParseError(msg)
442 args['path'] = c.parentNode.getAttribute('path')
443 args['op'] = c.getAttribute('op')
444 args['value'] = c.getAttribute('value')
445 args['code'] = c.getAttribute('code')
446 args['subclass'] = c.getAttribute('type')
447 args['editable'] = c.getAttribute('editable')
448 args['optional'] = c.getAttribute('switchable')
449 args['extra_value'] = c.getAttribute('extraValue')
450 args['loopPath'] = c.getAttribute('loopPath')
451 values = []
452 for val_e in c.getElementsByTagName('value'):
453 texts = []
454 for node in val_e.childNodes:
455 if node.nodeType == node.TEXT_NODE: texts.append(node.data)
456 values.append(' '.join(texts))
457 if len(values) > 0: args["values"] = values
458 for k, v in args.items():
459 if v is None or v == '': del args[k]
460 if "loopPath" in args:
461 args["op"] = {
462 "=" : "IS",
463 "!=": "IS NOT"
464 }.get(args["op"])
465 con = obj.add_constraint(**args)
466 if not con:
467 raise ConstraintError("error adding constraint with args: " + args)
468
469 def group(iterator, count):
470 itr = iter(iterator)
471 while True:
472 yield tuple([itr.next() for i in range(count)])
473
474 if q.getAttribute('sortOrder') is not None:
475 sos = Query.SO_SPLIT_PATTERN.split(q.getAttribute('sortOrder'))
476 if len(sos) == 1:
477 if sos[0] in obj.views:
478 obj.add_sort_order(sos[0])
479 else:
480 sos.pop()
481 for path, direction in group(sos, 2):
482 if path in obj.views:
483 obj.add_sort_order(path, direction)
484
485 if q.getAttribute('constraintLogic') is not None:
486 obj._set_questionable_logic(q.getAttribute('constraintLogic'))
487
488 obj.verify()
489
490 return obj
491
493 """Attempts to sanity check the logic argument before it is set"""
494 logic = questionable_logic
495 used_codes = set(self.constraint_dict.keys())
496 logic_codes = set(Query.LOGIC_SPLIT_PATTERN.split(questionable_logic))
497 if "" in logic_codes:
498 logic_codes.remove("")
499 irrelevant_codes = logic_codes - used_codes
500 for c in irrelevant_codes:
501 pattern = re.compile("\\b" + c + "\\b", re.I)
502 logic = pattern.sub("", logic)
503
504 logic = re.sub("\((:?and|or|\s)*\)", "", logic)
505
506 logic = Query.LEADING_OP_PATTERN.sub("", logic)
507 logic = Query.TRAILING_OP_PATTERN.sub("", logic)
508 for x in range(2):
509 for left, right in Query.LOGIC_PRODUCT:
510 if left == right:
511 repl = left
512 else:
513 repl = "and"
514 pattern = re.compile(left + "\s*" + right, re.I)
515 logic = pattern.sub(repl, logic)
516 logic = Query.ORPHANED_OP_PATTERN.sub(lambda x: "(" if "(" in x.group(0) else ")", logic)
517 logic = logic.strip().lstrip()
518 logic = Query.LEADING_OP_PATTERN.sub("", logic)
519 logic = Query.TRAILING_OP_PATTERN.sub("", logic)
520 try:
521 if len(logic) > 0 and logic not in ["and", "or"]:
522 self.set_logic(logic)
523 except Exception, e:
524 raise Exception("Error parsing logic string "
525 + repr(questionable_logic)
526 + " (which is " + repr(logic) + " after irrelevant codes have been removed)"
527 + " with available codes: " + repr(list(used_codes))
528 + " because: " + e.message)
529
531 """Return the XML serialisation of this query"""
532 return self.to_xml()
533
535 """
536 Validate the query
537 ==================
538
539 Invalid queries will fail to run, and it is not always
540 obvious why. The validation routine checks to see that
541 the query will not cause errors on execution, and tries to
542 provide informative error messages.
543
544 This method is called immediately after a query is fully
545 deserialised.
546
547 @raise ModelError: if the paths are invalid
548 @raise QueryError: if there are errors in query construction
549 @raise ConstraintError: if there are errors in constraint construction
550
551 """
552 self.verify_views()
553 self.verify_constraint_paths()
554 self.verify_join_paths()
555 self.verify_pd_paths()
556 self.validate_sort_order()
557 self.do_verification = True
558
560 """
561 Replace the current selection of output columns with this one
562 =============================================================
563
564 example::
565
566 query.select("*", "proteins.name")
567
568 This method is intended to provide an API familiar to those
569 with experience of SQL or other ORM layers. This method, in
570 contrast to other view manipulation methods, replaces
571 the selection of output columns, rather than appending to it.
572
573 Note that any sort orders that are no longer in the view will
574 be removed.
575
576 @param paths: The output columns to add
577 """
578 self.views = []
579 self.add_view(*paths)
580 so_elems = self._sort_order_list
581 self._sort_order_list = SortOrderList()
582
583 for so in so_elems:
584 if so.path in self.views:
585 self._sort_order_list.append(so)
586 return self
587
589 """
590 Add one or more views to the list of output columns
591 ===================================================
592
593 example::
594
595 query.add_view("Gene.name Gene.organism.name")
596
597 This is the main method for adding views to the list
598 of output columns. As well as appending views, it
599 will also split a single, space or comma delimited
600 string into multiple paths, and flatten out lists, or any
601 combination. It will also immediately try to validate
602 the views.
603
604 Output columns must be valid paths according to the
605 data model, and they must represent attributes of tables
606
607 Also available as:
608 - add_views
609 - add_column
610 - add_columns
611 - add_to_select
612
613 @see: intermine.model.Model
614 @see: intermine.model.Path
615 @see: intermine.model.Attribute
616 """
617 views = []
618 for p in paths:
619 if isinstance(p, (set, list)):
620 views.extend(list(p))
621 elif isinstance(p, Class):
622 views.append(p.name + ".*")
623 elif isinstance(p, Column):
624 if p._path.is_attribute():
625 views.append(str(p))
626 else:
627 views.append(str(p) + ".*")
628 elif isinstance(p, Reference):
629 views.append(p.name + ".*")
630 else:
631 views.extend(re.split("(?:,?\s+|,)", str(p)))
632
633 views = map(self.prefix_path, views)
634
635 views_to_add = []
636 for view in views:
637 if view.endswith(".*"):
638 view = re.sub("\.\*$", "", view)
639 scd = self.get_subclass_dict()
640 def expand(p, level, id_only=False):
641 if level > 0:
642 path = self.model.make_path(p, scd)
643 cd = path.end_class
644 add_f = lambda x: p + "." + x.name
645 vs = [p + ".id"] if id_only and cd.has_id else map(add_f, cd.attributes)
646 next_level = level - 1
647 rs_and_cs = cd.references + cd.collections
648 for r in rs_and_cs:
649 rp = add_f(r)
650 if next_level:
651 self.outerjoin(rp)
652 vs.extend(expand(rp, next_level, self.prefetch_id_only))
653 return vs
654 else:
655 return []
656 depth = self.prefetch_depth
657 views_to_add.extend(expand(view, depth))
658 else:
659 views_to_add.append(view)
660
661 if self.do_verification:
662 self.verify_views(views_to_add)
663
664 self.views.extend(views_to_add)
665
666 return self
667
669 if self.root is None:
670 if self.do_verification:
671 if path.endswith(".*"):
672 trimmed = re.sub("\.\*$", "", path)
673 else:
674 trimmed = path
675 self.root = self.model.make_path(trimmed, self.get_subclass_dict()).root
676 return path
677 else:
678 if path.startswith(self.root.name):
679 return path
680 else:
681 return self.root.name + "." + path
682
684 """
685 Clear the output column list
686 ============================
687
688 Deletes all entries currently in the view list.
689 """
690 self.views = []
691
693 """
694 Check to see if the views given are valid
695 =========================================
696
697 This method checks to see if the views:
698 - are valid according to the model
699 - represent attributes
700
701 @see: L{intermine.model.Attribute}
702
703 @raise intermine.model.ModelError: if the paths are invalid
704 @raise ConstraintError: if the paths are not attributes
705 """
706 if views is None: views = self.views
707 for path in views:
708 path = self.model.make_path(path, self.get_subclass_dict())
709 if not path.is_attribute():
710 raise ConstraintError("'" + str(path)
711 + "' does not represent an attribute")
712
714 """
715 Add a constraint (filter on records)
716 ====================================
717
718 example::
719
720 query.add_constraint("Gene.symbol", "=", "zen")
721
722 This method will try to make a constraint from the arguments
723 given, trying each of the classes it knows of in turn
724 to see if they accept the arguments. This allows you
725 to add constraints of different types without having to know
726 or care what their classes or implementation details are.
727 All constraints derive from intermine.constraints.Constraint,
728 and they all have a path attribute, but are otherwise diverse.
729
730 Before adding the constraint to the query, this method
731 will also try to check that the constraint is valid by
732 calling Query.verify_constraint_paths()
733
734 @see: L{intermine.constraints}
735
736 @rtype: L{intermine.constraints.Constraint}
737 """
738 if len(args) == 1 and len(kwargs) == 0:
739 if isinstance(args[0], tuple):
740 con = self.constraint_factory.make_constraint(*args[0])
741 else:
742 try:
743 con = self.constraint_factory.make_constraint(*args[0].vargs, **args[0].kwargs)
744 except AttributeError:
745 con = args[0]
746 else:
747 if len(args) == 0 and len(kwargs) == 1:
748 k, v = kwargs.items()[0]
749 d = {"path": k}
750 if v in constraints.UnaryConstraint.OPS:
751 d["op"] = v
752 else:
753 d["op"] = "="
754 d["value"] = v
755 kwargs = d
756
757 if len(args) and args[0] in self.constraint_factory.reference_ops:
758 args = [self.root] + list(args)
759
760 con = self.constraint_factory.make_constraint(*args, **kwargs)
761
762 con.path = self.prefix_path(con.path)
763 if self.do_verification: self.verify_constraint_paths([con])
764 if hasattr(con, "code"):
765 self.constraint_dict[con.code] = con
766 else:
767 self.uncoded_constraints.append(con)
768
769 return con
770
771 - def where(self, *cons, **kwargs):
772 """
773 Return a new query like this one but with an additional constraint
774 ==================================================================
775
776 In contrast to add_constraint, this method returns
777 a new object with the given comstraint added, it does not
778 mutate the Query it is invoked on.
779
780 Also available as Query.filter
781 """
782 c = self.clone()
783 try:
784 for conset in cons:
785 codeds = c.coded_constraints
786 lstr = str(c.get_logic()) + " AND " if codeds else ""
787 start_c = chr(ord(codeds[-1].code) + 1) if codeds else 'A'
788 for con in conset:
789 c.add_constraint(*con.vargs, **con.kwargs)
790 try:
791 c.set_logic(lstr + conset.as_logic(start = start_c))
792 except constraints.EmptyLogicError:
793 pass
794 for path, value in kwargs.items():
795 c.add_constraint(path, "=", value)
796 except AttributeError:
797 c.add_constraint(*cons, **kwargs)
798 return c
799
801 """
802 Return a Column object suitable for using to construct constraints with
803 =======================================================================
804
805 This method is part of the SQLAlchemy style API.
806
807 Also available as Query.c
808 """
809 return self.model.column(self.prefix_path(str(col)), self.get_subclass_dict(), self)
810
812 """
813 Check that the constraints are valid
814 ====================================
815
816 This method will check the path attribute of each constraint.
817 In addition it will:
818 - Check that BinaryConstraints and MultiConstraints have an Attribute as their path
819 - Check that TernaryConstraints have a Reference as theirs
820 - Check that SubClassConstraints have a correct subclass relationship
821 - Check that LoopConstraints have a valid loopPath, of a compatible type
822 - Check that ListConstraints refer to an object
823 - Don't even try to check RangeConstraints: these have variable semantics
824
825 @param cons: The constraints to check (defaults to all constraints on the query)
826
827 @raise ModelError: if the paths are not valid
828 @raise ConstraintError: if the constraints do not satisfy the above rules
829
830 """
831 if cons is None: cons = self.constraints
832 for con in cons:
833 pathA = self.model.make_path(con.path, self.get_subclass_dict())
834 if isinstance(con, constraints.RangeConstraint):
835 pass
836 elif isinstance(con, constraints.IsaConstraint):
837 if pathA.get_class() is None:
838 raise ConstraintError("'" + str(pathA) + "' does not represent a class, or a reference to a class")
839 for c in con.values:
840 if c not in self.model.classes:
841 raise ConstraintError("Illegal constraint: " + repr(con) + " '" + str(c) + "' is not a class in this model")
842 elif isinstance(con, constraints.TernaryConstraint):
843 if pathA.get_class() is None:
844 raise ConstraintError("'" + str(pathA) + "' does not represent a class, or a reference to a class")
845 elif isinstance(con, constraints.BinaryConstraint) or isinstance(con, constraints.MultiConstraint):
846 if not pathA.is_attribute():
847 raise ConstraintError("'" + str(pathA) + "' does not represent an attribute")
848 elif isinstance(con, constraints.SubClassConstraint):
849 pathB = self.model.make_path(con.subclass, self.get_subclass_dict())
850 if not pathB.get_class().isa(pathA.get_class()):
851 raise ConstraintError("'" + con.subclass + "' is not a subclass of '" + con.path + "'")
852 elif isinstance(con, constraints.LoopConstraint):
853 pathB = self.model.make_path(con.loopPath, self.get_subclass_dict())
854 for path in [pathA, pathB]:
855 if not path.get_class():
856 raise ConstraintError("'" + str(path) + "' does not refer to an object")
857 (classA, classB) = (pathA.get_class(), pathB.get_class())
858 if not classA.isa(classB) and not classB.isa(classA):
859 raise ConstraintError("the classes are of incompatible types: " + str(classA) + "," + str(classB))
860 elif isinstance(con, constraints.ListConstraint):
861 if not pathA.get_class():
862 raise ConstraintError("'" + str(pathA) + "' does not refer to an object")
863
864 @property
866 """
867 Returns the constraints of the query
868 ====================================
869
870 Query.constraints S{->} list(intermine.constraints.Constraint)
871
872 Constraints are returned in the order of their code (normally
873 the order they were added to the query) and with any
874 subclass contraints at the end.
875
876 @rtype: list(Constraint)
877 """
878 ret = sorted(self.constraint_dict.values(), key=lambda con: con.code)
879 ret.extend(self.uncoded_constraints)
880 return ret
881
883 """
884 Returns the constraint with the given code
885 ==========================================
886
887 Returns the constraint with the given code, if if exists.
888 If no such constraint exists, it throws a ConstraintError
889
890 @return: the constraint corresponding to the given code
891 @rtype: L{intermine.constraints.CodedConstraint}
892 """
893 if code in self.constraint_dict:
894 return self.constraint_dict[code]
895 else:
896 raise ConstraintError("There is no constraint with the code '"
897 + code + "' on this query")
898
900 """
901 Add a join statement to the query
902 =================================
903
904 example::
905
906 query.add_join("Gene.proteins", "OUTER")
907
908 A join statement is used to determine if references should
909 restrict the result set by only including those references
910 exist. For example, if one had a query with the view::
911
912 "Gene.name", "Gene.proteins.name"
913
914 Then in the normal case (that of an INNER join), we would only
915 get Genes that also have at least one protein that they reference.
916 Simply by asking for this output column you are placing a
917 restriction on the information you get back.
918
919 If in fact you wanted all genes, regardless of whether they had
920 proteins associated with them or not, but if they did
921 you would rather like to know _what_ proteins, then you need
922 to specify this reference to be an OUTER join::
923
924 query.add_join("Gene.proteins", "OUTER")
925
926 Now you will get many more rows of results, some of which will
927 have "null" values where the protein name would have been,
928
929 This method will also attempt to validate the join by calling
930 Query.verify_join_paths(). Joins must have a valid path, the
931 style can be either INNER or OUTER (defaults to OUTER,
932 as the user does not need to specify inner joins, since all
933 references start out as inner joins), and the path
934 must be a reference.
935
936 @raise ModelError: if the path is invalid
937 @raise TypeError: if the join style is invalid
938
939 @rtype: L{intermine.pathfeatures.Join}
940 """
941 join = Join(*args, **kwargs)
942 join.path = self.prefix_path(join.path)
943 if self.do_verification: self.verify_join_paths([join])
944 self.joins.append(join)
945 return self
946
948 """Alias for add_join(column, "OUTER")"""
949 return self.add_join(str(column), "OUTER")
950
952 """
953 Check that the joins are valid
954 ==============================
955
956 Joins must have valid paths, and they must refer to references.
957
958 @raise ModelError: if the paths are invalid
959 @raise QueryError: if the paths are not references
960 """
961 if joins is None: joins = self.joins
962 for join in joins:
963 path = self.model.make_path(join.path, self.get_subclass_dict())
964 if not path.is_reference():
965 raise QueryError("'" + join.path + "' is not a reference")
966
968 """
969 Add a path description to the query
970 ===================================
971
972 example::
973
974 query.add_path_description("Gene.proteins.proteinDomains", "Protein Domain")
975
976 This allows you to alias the components of long paths to
977 improve the way they display column headers in a variety of circumstances.
978 In the above example, if the view included the unwieldy path
979 "Gene.proteins.proteinDomains.primaryIdentifier", it would (depending on the
980 mine) be displayed as "Protein Domain > DB Identifer". These
981 setting are taken into account by the webservice when generating
982 column headers for flat-file results with the columnheaders parameter given, and
983 always supplied when requesting jsontable results.
984
985 @rtype: L{intermine.pathfeatures.PathDescription}
986
987 """
988 path_description = PathDescription(*args, **kwargs)
989 path_description.path = self.prefix_path(path_description.path)
990 if self.do_verification: self.verify_pd_paths([path_description])
991 self.path_descriptions.append(path_description)
992 return path_description
993
995 """
996 Check that the path of the path description is valid
997 ====================================================
998
999 Checks for consistency with the data model
1000
1001 @raise ModelError: if the paths are invalid
1002 """
1003 if pds is None: pds = self.path_descriptions
1004 for pd in pds:
1005 self.model.validate_path(pd.path, self.get_subclass_dict())
1006
1007 @property
1009 """
1010 Returns the list of constraints that have a code
1011 ================================================
1012
1013 Query.coded_constraints S{->} list(intermine.constraints.CodedConstraint)
1014
1015 This returns an up to date list of the constraints that can
1016 be used in a logic expression. The only kind of constraint
1017 that this excludes, at present, is SubClassConstraints
1018
1019 @rtype: list(L{intermine.constraints.CodedConstraint})
1020 """
1021 return sorted(self.constraint_dict.values(), key=lambda con: con.code)
1022
1024 """
1025 Returns the logic expression for the query
1026 ==========================================
1027
1028 This returns the up to date logic expression. The default
1029 value is the representation of all coded constraints and'ed together.
1030
1031 If the logic is empty and there are no constraints, returns an
1032 empty string.
1033
1034 The LogicGroup object stringifies to a string that can be parsed to
1035 obtain itself (eg: "A and (B or C or D)").
1036
1037 @rtype: L{intermine.constraints.LogicGroup}
1038 """
1039 if self._logic is None:
1040 if len(self.coded_constraints) > 0:
1041 return reduce(lambda x, y: x+y, self.coded_constraints)
1042 else:
1043 return ""
1044 else:
1045 return self._logic
1046
1048 """
1049 Sets the Logic given the appropriate input
1050 ==========================================
1051
1052 example::
1053
1054 Query.set_logic("A and (B or C)")
1055
1056 This sets the logic to the appropriate value. If the value is
1057 already a LogicGroup, it is accepted, otherwise
1058 the string is tokenised and parsed.
1059
1060 The logic is then validated with a call to validate_logic()
1061
1062 raise LogicParseError: if there is a syntax error in the logic
1063 """
1064 if isinstance(value, constraints.LogicGroup):
1065 logic = value
1066 else:
1067 try:
1068 logic = self._logic_parser.parse(value)
1069 except constraints.EmptyLogicError:
1070 if self.coded_constraints:
1071 raise
1072 else:
1073 return self
1074 if self.do_verification: self.validate_logic(logic)
1075 self._logic = logic
1076 return self
1077
1079 """
1080 Validates the query logic
1081 =========================
1082
1083 Attempts to validate the logic by checking
1084 that every coded_constraint is included
1085 at least once
1086
1087 @raise QueryError: if not every coded constraint is represented
1088 """
1089 if logic is None: logic = self._logic
1090 logic_codes = set(logic.get_codes())
1091 for con in self.coded_constraints:
1092 if con.code not in logic_codes:
1093 raise QueryError("Constraint " + con.code + repr(con)
1094 + " is not mentioned in the logic: " + str(logic))
1095
1097 """
1098 Gets the sort order when none has been specified
1099 ================================================
1100
1101 This method is called to determine the sort order if
1102 none is specified
1103
1104 @raise QueryError: if the view is empty
1105
1106 @rtype: L{intermine.pathfeatures.SortOrderList}
1107 """
1108 try:
1109 v0 = self.views[0]
1110 for j in self.joins:
1111 if j.style == "OUTER":
1112 if v0.startswith(j.path):
1113 return ""
1114 return SortOrderList((self.views[0], SortOrder.ASC))
1115 except IndexError:
1116 raise QueryError("Query view is empty")
1117
1119 """
1120 Return a sort order for the query
1121 =================================
1122
1123 This method returns the sort order if set, otherwise
1124 it returns the default sort order
1125
1126 @raise QueryError: if the view is empty
1127
1128 @rtype: L{intermine.pathfeatures.SortOrderList}
1129 """
1130 if self._sort_order_list.is_empty():
1131 return self.get_default_sort_order()
1132 else:
1133 return self._sort_order_list
1134
1136 """
1137 Adds a sort order to the query
1138 ==============================
1139
1140 example::
1141
1142 Query.add_sort_order("Gene.name", "DESC")
1143
1144 This method adds a sort order to the query.
1145 A query can have multiple sort orders, which are
1146 assessed in sequence.
1147
1148 If a query has two sort-orders, for example,
1149 the first being "Gene.organism.name asc",
1150 and the second being "Gene.name desc", you would have
1151 the list of genes grouped by organism, with the
1152 lists within those groupings in reverse alphabetical
1153 order by gene name.
1154
1155 This method will try to validate the sort order
1156 by calling validate_sort_order()
1157
1158 Also available as Query.order_by
1159 """
1160 so = SortOrder(str(path), direction)
1161 so.path = self.prefix_path(so.path)
1162 if self.do_verification: self.validate_sort_order(so)
1163 self._sort_order_list.append(so)
1164 return self
1165
1167 """
1168 Check the validity of the sort order
1169 ====================================
1170
1171 Checks that the sort order paths are:
1172 - valid paths
1173 - in the view
1174
1175 @raise QueryError: if the sort order is not in the view
1176 @raise ModelError: if the path is invalid
1177
1178 """
1179 if not so_elems:
1180 so_elems = self._sort_order_list
1181 from_paths = self._from_paths()
1182 for so in so_elems:
1183 p = self.model.make_path(so.path, self.get_subclass_dict())
1184 if p.prefix() not in from_paths:
1185 raise QueryError("Sort order element %s is not in the query" % so.path)
1186
1197
1199 """
1200 Return the current mapping of class to subclass
1201 ===============================================
1202
1203 This method returns a mapping of classes used
1204 by the model for assessing whether certain paths are valid. For
1205 intance, if you subclass MicroArrayResult to be FlyAtlasResult,
1206 you can refer to the .presentCall attributes of fly atlas results.
1207 MicroArrayResults do not have this attribute, and a path such as::
1208
1209 Gene.microArrayResult.presentCall
1210
1211 would be marked as invalid unless the dictionary is provided.
1212
1213 Users most likely will not need to ever call this method.
1214
1215 @rtype: dict(string, string)
1216 """
1217 subclass_dict = {}
1218 for c in self.constraints:
1219 if isinstance(c, constraints.SubClassConstraint):
1220 subclass_dict[c.path] = c.subclass
1221 return subclass_dict
1222
1223 - def results(self, row="object", start=0, size=None, summary_path=None):
1224 """
1225 Return an iterator over result rows
1226 ===================================
1227
1228 Usage::
1229
1230 >>> query = service.model.Gene.select("symbol", "length")
1231 >>> total = 0
1232 >>> for gene in query.results():
1233 ... print gene.symbol # handle strings
1234 ... total += gene.length # handle numbers
1235 >>> for row in query.results(row="rr"):
1236 ... print row["symbol"] # handle strings by dict index
1237 ... total += row["length"] # handle numbers by dict index
1238 ... print row["Gene.symbol"] # handle strings by full dict index
1239 ... total += row["Gene.length"] # handle numbers by full dict index
1240 ... print row[0] # handle strings by list index
1241 ... total += row[1] # handle numbers by list index
1242 >>> for d in query.results(row="dict"):
1243 ... print row["Gene.symbol"] # handle strings
1244 ... total += row["Gene.length"] # handle numbers
1245 >>> for l in query.results(row="list"):
1246 ... print row[0] # handle strings
1247 ... total += row[1] # handle numbers
1248 >>> import csv
1249 >>> csv_reader = csv.reader(q.results(row="csv"), delimiter=",", quotechar='"')
1250 >>> for row in csv_reader:
1251 ... print row[0] # handle strings
1252 ... length_sum += int(row[1]) # handle numbers
1253 >>> tsv_reader = csv.reader(q.results(row="tsv"), delimiter="\t")
1254 >>> for row in tsv_reader:
1255 ... print row[0] # handle strings
1256 ... length_sum += int(row[1]) # handle numbers
1257
1258 This is the general method that allows access to any of the available
1259 result formats. The example above shows the ways these differ in terms
1260 of accessing fields of the rows, as well as dealing with different
1261 data types. Results can either be retrieved as typed values (jsonobjects,
1262 rr ['ResultRows'], dict, list), or as lists of strings (csv, tsv) which then require
1263 further parsing. The default format for this method is "objects", where
1264 information is grouped by its relationships. The other main format is
1265 "rr", which stands for 'ResultRows', and can be accessed directly through
1266 the L{rows} method.
1267
1268 Note that when requesting object based results (the default), if your query
1269 contains any kind of collection, it is highly likely that start and size won't do what
1270 you think, as they operate only on the underlying
1271 rows used to build up the returned objects. If you want rows
1272 back, you are recommeded to use the simpler rows method.
1273
1274 If no views have been specified, all attributes of the root class
1275 are selected for output.
1276
1277 @param row: The format for each result. One of "object", "rr",
1278 "dict", "list", "tsv", "csv", "jsonrows", "jsonobjects"
1279 @type row: string
1280 @param start: the index of the first result to return (default = 0)
1281 @type start: int
1282 @param size: The maximum number of results to return (default = all)
1283 @type size: int
1284 @param summary_path: A column name to optionally summarise. Specifying a path
1285 will force "jsonrows" format, and return an iterator over a list
1286 of dictionaries. Use this when you are interested in processing
1287 a summary in order of greatest count to smallest.
1288 @type summary_path: str or L{intermine.model.Path}
1289
1290 @rtype: L{intermine.webservice.ResultIterator}
1291
1292 @raise WebserviceError: if the request is unsuccessful
1293 """
1294
1295 to_run = self.clone()
1296
1297 if len(to_run.views) == 0:
1298 to_run.add_view(to_run.root)
1299
1300 if "object" in row:
1301 for c in self.coded_constraints:
1302 p = to_run.column(c.path)._path
1303 from_p = p if p.end_class is not None else p.prefix()
1304 if not filter(lambda v: v.startswith(str(from_p)), to_run.views):
1305 if p.is_attribute():
1306 to_run.add_view(p)
1307 else:
1308 to_run.add_view(p.append("id"))
1309
1310 path = to_run.get_results_path()
1311 params = to_run.to_query_params()
1312 params["start"] = start
1313 if size:
1314 params["size"] = size
1315 if summary_path:
1316 params["summaryPath"] = to_run.prefix_path(summary_path)
1317 row = "jsonrows"
1318
1319 view = to_run.views
1320 cld = to_run.root
1321 return to_run.service.get_results(path, params, row, view, cld)
1322
1323 - def rows(self, start=0, size=None):
1324 """
1325 Return the results as rows of data
1326 ==================================
1327
1328 This is a shortcut for results("rr")
1329
1330 Usage::
1331
1332 >>> for row in query.rows(start=10, size=10):
1333 ... print row["proteins.name"]
1334
1335 @param start: the index of the first result to return (default = 0)
1336 @type start: int
1337 @param size: The maximum number of results to return (default = all)
1338 @type size: int
1339 @rtype: iterable<intermine.webservice.ResultRow>
1340 """
1341 return self.results(row="rr", start=start, size=size)
1342
1343 - def summarise(self, summary_path, **kwargs):
1344 """
1345 Return a summary of the results for this column.
1346 ================================================
1347
1348 Usage::
1349 >>> query = service.select("Gene.*", "organism.*").where("Gene", "IN", "my-list")
1350 >>> print query.summarise("length")["average"]
1351 ... 12345.67890
1352 >>> print query.summarise("organism.name")["Drosophila simulans"]
1353 ... 98
1354
1355 This method allows you to get statistics summarising the information
1356 from just one column of a query. For numerical columns you get dictionary with
1357 four keys ('average', 'stdev', 'max', 'min'), and for non-numerical
1358 columns you get a dictionary where each item is a key and the values
1359 are the number of occurrences of this value in the column.
1360
1361 Any key word arguments will be passed to the underlying results call -
1362 so you can limit the result size to the top 100 items by passing "size = 100"
1363 as part of the call.
1364
1365 @see: L{intermine.query.Query.results}
1366
1367 @param summary_path: The column to summarise (either in long or short form)
1368 @type summary_path: str or L{intermine.model.Path}
1369
1370 @rtype: dict
1371 This method is sugar for particular combinations of calls to L{results}.
1372 """
1373 p = self.model.make_path(self.prefix_path(summary_path), self.get_subclass_dict())
1374 results = self.results(summary_path = summary_path, **kwargs)
1375 if p.end.type_name in Model.NUMERIC_TYPES:
1376 return dict([ (k, float(v)) for k, v in results.next().iteritems()])
1377 else:
1378 return dict([ (r["item"], r["count"]) for r in results])
1379
1380 - def one(self, row="jsonobjects"):
1381 """Return one result, and raise an error if the result size is not 1"""
1382 if row == "jsonobjects":
1383 if self.count() == 1:
1384 return self.first(row)
1385 else:
1386 ret = None
1387 for obj in self.results():
1388 if ret is not None:
1389 raise QueryError("More than one result received")
1390 else:
1391 ret = obj
1392 if ret is None:
1393 raise QueryError("No results received")
1394
1395 return ret
1396 else:
1397 c = self.count()
1398 if (c != 1):
1399 raise QueryError("Result size is not one: got %d results" % (c))
1400 else:
1401 return self.first(row)
1402
1403 - def first(self, row="jsonobjects", start=0, **kw):
1404 """Return the first result, or None if the results are empty"""
1405 if row == "jsonobjects":
1406 size = None
1407 else:
1408 size = 1
1409 try:
1410 return self.results(row, start=start, size=size, **kw).next()
1411 except StopIteration:
1412 return None
1413
1415 """
1416 Get a list of result rows
1417 =========================
1418
1419 This method is a shortcut so that you do not have to
1420 do a list comprehension yourself on the iterator that
1421 is normally returned. If you have a very large result
1422 set (and these can get up to 100's of thousands or rows
1423 pretty easily) you will not want to
1424 have the whole list in memory at once, but there may
1425 be other circumstances when you might want to keep the whole
1426 list in one place.
1427
1428 It takes all the same arguments and parameters as Query.results
1429
1430 Also available as Query.all
1431
1432 @see: L{intermine.query.Query.results}
1433
1434 """
1435 rows = self.results(*args, **kwargs)
1436 return [r for r in rows]
1437
1440
1442 """
1443 Return the total number of rows this query returns
1444 ==================================================
1445
1446 Obtain the number of rows a particular query will
1447 return, without having to fetch and parse all the
1448 actual data. This method makes a request to the server
1449 to report the count for the query, and is sugar for a
1450 results call.
1451
1452 Also available as Query.size
1453
1454 @rtype: int
1455 @raise WebserviceError: if the request is unsuccessful.
1456 """
1457 count_str = ""
1458 for row in self.results(row = "count"):
1459 count_str += row
1460 try:
1461 return int(count_str)
1462 except ValueError:
1463 raise ResultError("Server returned a non-integer count: " + count_str)
1464
1466 """
1467 Returns the uri to use to create a list from this query
1468 =======================================================
1469
1470 Query.get_list_upload_uri() -> str
1471
1472 This method is used internally when performing list operations
1473 on queries.
1474
1475 @rtype: str
1476 """
1477 return self.service.root + self.service.QUERY_LIST_UPLOAD_PATH
1478
1480 """
1481 Returns the uri to use to create a list from this query
1482 =======================================================
1483
1484 Query.get_list_append_uri() -> str
1485
1486 This method is used internally when performing list operations
1487 on queries.
1488
1489 @rtype: str
1490 """
1491 return self.service.root + self.service.QUERY_LIST_APPEND_PATH
1492
1493
1495 """
1496 Returns the path section pointing to the REST resource
1497 ======================================================
1498
1499 Query.get_results_path() -> str
1500
1501 Internally, this just calls a constant property
1502 in intermine.service.Service
1503
1504 @rtype: str
1505 """
1506 return self.service.QUERY_PATH
1507
1508
1510 """
1511 Returns the child objects of the query
1512 ======================================
1513
1514 This method is used during the serialisation of queries
1515 to xml. It is unlikely you will need access to this as a whole.
1516 Consider using "path_descriptions", "joins", "constraints" instead
1517
1518 @see: Query.path_descriptions
1519 @see: Query.joins
1520 @see: Query.constraints
1521
1522 @return: the child element of this query
1523 @rtype: list
1524 """
1525 return sum([self.path_descriptions, self.joins, self.constraints], [])
1526
1528 """
1529 Implementation of trait that allows use of these objects as queries (casting).
1530 """
1531 return self
1532
1534 """
1535 Implementation of trait that allows use of these objects in list constraints
1536 """
1537 l = self.service.create_list(self)
1538 return ConstraintNode(path, op, l.name)
1539
1541 """
1542 Returns the parameters to be passed to the webservice
1543 =====================================================
1544
1545 The query is responsible for producing its own query
1546 parameters. These consist simply of:
1547 - query: the xml representation of the query
1548
1549 @rtype: dict
1550
1551 """
1552 xml = self.to_xml()
1553 params = {'query' : xml }
1554 return params
1555
1557 """
1558 Returns a DOM node representing the query
1559 =========================================
1560
1561 This is an intermediate step in the creation of the
1562 xml serialised version of the query. You probably
1563 won't need to call this directly.
1564
1565 @rtype: xml.minidom.Node
1566 """
1567 impl = getDOMImplementation()
1568 doc = impl.createDocument(None, "query", None)
1569 query = doc.documentElement
1570
1571 query.setAttribute('name', self.name)
1572 query.setAttribute('model', self.model.name)
1573 query.setAttribute('view', ' '.join(self.views))
1574 query.setAttribute('sortOrder', str(self.get_sort_order()))
1575 query.setAttribute('longDescription', self.description)
1576 if len(self.coded_constraints) > 1:
1577 query.setAttribute('constraintLogic', str(self.get_logic()))
1578
1579 for c in self.children():
1580 element = doc.createElement(c.child_type)
1581 for name, value in c.to_dict().items():
1582 if isinstance(value, (set, list)):
1583 for v in value:
1584 subelement = doc.createElement(name)
1585 text = doc.createTextNode(v)
1586 subelement.appendChild(text)
1587 element.appendChild(subelement)
1588 else:
1589 element.setAttribute(name, value)
1590 query.appendChild(element)
1591 return query
1592
1594 """
1595 Return an XML serialisation of the query
1596 ========================================
1597
1598 This method serialises the current state of the query to an
1599 xml string, suitable for storing, or sending over the
1600 internet to the webservice.
1601
1602 @return: the serialised xml string
1603 @rtype: string
1604 """
1605 n = self.to_Node()
1606 return n.toxml()
1607
1622
1624 """
1625 Performs a deep clone
1626 =====================
1627
1628 This method will produce a clone that is independent,
1629 and can be altered without affecting the original,
1630 but starts off with the exact same state as it.
1631
1632 The only shared elements should be the model
1633 and the service, which are shared by all queries
1634 that refer to the same webservice.
1635
1636 @return: same class as caller
1637 """
1638 newobj = self.__class__(self.model)
1639 for attr in ["joins", "views", "_sort_order_list", "_logic", "path_descriptions", "constraint_dict", "uncoded_constraints"]:
1640 setattr(newobj, attr, deepcopy(getattr(self, attr)))
1641
1642 for attr in ["name", "description", "service", "do_verification", "constraint_factory", "root"]:
1643 setattr(newobj, attr, getattr(self, attr))
1644 return newobj
1645
1647 """
1648 A Class representing a predefined query
1649 =======================================
1650
1651 Templates are ways of saving queries
1652 and allowing others to run them
1653 simply. They are the main interface
1654 to querying in the webapp
1655
1656 SYNOPSIS
1657 --------
1658
1659 example::
1660
1661 service = Service("http://www.flymine.org/query/service")
1662 template = service.get_template("Gene_Pathways")
1663 for row in template.results(A={"value":"eve"}):
1664 process_row(row)
1665 ...
1666
1667 A template is a subclass of query that comes predefined. They
1668 are typically retrieved from the webservice and run by specifying
1669 the values for their existing constraints. They are a concise
1670 and powerful way of running queries in the webapp.
1671
1672 Being subclasses of query, everything is true of them that is true
1673 of a query. They are just less work, as you don't have to design each
1674 one. Also, you can store your own templates in the web-app, and then
1675 access them as a private webservice method, from anywhere, making them
1676 a kind of query in the cloud - for this you will need to authenticate
1677 by providing log in details to the service.
1678
1679 The most significant difference is how constraint values are specified
1680 for each set of results.
1681
1682 @see: L{Template.results}
1683
1684 """
1686 """
1687 Constructor
1688 ===========
1689
1690 Instantiation is identical that of queries. As with queries,
1691 these are best obtained from the intermine.webservice.Service
1692 factory methods.
1693
1694 @see: L{intermine.webservice.Service.get_template}
1695 """
1696 super(Template, self).__init__(*args, **kwargs)
1697 self.constraint_factory = constraints.TemplateConstraintFactory()
1698 @property
1700 """
1701 Return the list of constraints you can edit
1702 ===========================================
1703
1704 Template.editable_constraints -> list(intermine.constraints.Constraint)
1705
1706 Templates have a concept of editable constraints, which
1707 is a way of hiding complexity from users. An underlying query may have
1708 five constraints, but only expose the one that is actually
1709 interesting. This property returns this subset of constraints
1710 that have the editable flag set to true.
1711 """
1712 isEditable = lambda x: x.editable
1713 return filter(isEditable, self.constraints)
1714
1716 """
1717 Returns the query parameters needed for the webservice
1718 ======================================================
1719
1720 Template.to_query_params() -> dict(string, string)
1721
1722 Overrides the method of the same name in query to provide the
1723 parameters needed by the templates results service. These
1724 are slightly more complex:
1725 - name: The template's name
1726 - for each constraint: (where [i] is an integer incremented for each constraint)
1727 - constraint[i]: the path
1728 - op[i]: the operator
1729 - value[i]: the value
1730 - code[i]: the code
1731 - extra[i]: the extra value for ternary constraints (optional)
1732
1733
1734 @rtype: dict
1735 """
1736 p = {'name' : self.name}
1737 i = 1
1738 for c in self.editable_constraints:
1739 if not c.switched_on: next
1740 for k, v in c.to_dict().items():
1741 if k == "extraValue": k = "extra"
1742 if k == "path": k = "constraint"
1743 p[k + str(i)] = v
1744 i += 1
1745 return p
1746
1748 """
1749 Returns the path section pointing to the REST resource
1750 ======================================================
1751
1752 Template.get_results_path() S{->} str
1753
1754 Internally, this just calls a constant property
1755 in intermine.service.Service
1756
1757 This overrides the method of the same name in Query
1758
1759 @return: the path to the REST resource
1760 @rtype: string
1761 """
1762 return self.service.TEMPLATEQUERY_PATH
1763
1765 """
1766 Gets a template to run
1767 ======================
1768
1769 Template.get_adjusted_template(con_values) S{->} Template
1770
1771 When templates are run, they are first cloned, and their
1772 values are changed to those desired. This leaves the original
1773 template unchanged so it can be run again with different
1774 values. This method does the cloning and changing of constraint
1775 values
1776
1777 @raise ConstraintError: if the constraint values specify values for a non-editable constraint.
1778
1779 @rtype: L{Template}
1780 """
1781 clone = self.clone()
1782 for code, options in con_values.items():
1783 con = clone.get_constraint(code)
1784 if not con.editable:
1785 raise ConstraintError("There is a constraint '" + code
1786 + "' on this query, but it is not editable")
1787 try:
1788 for key, value in options.items():
1789 setattr(con, key, value)
1790 except AttributeError:
1791 setattr(con, "value", options)
1792 return clone
1793
1794 - def results(self, row="object", start=0, size=None, **con_values):
1795 """
1796 Get an iterator over result rows
1797 ================================
1798
1799 This method returns the same values with the
1800 same options as the method of the same name in
1801 Query (see intermine.query.Query). The main difference in in the
1802 arguments.
1803
1804 The template result methods also accept a key-word pair
1805 set of arguments that are used to supply values
1806 to the editable constraints. eg::
1807
1808 template.results(
1809 A = {"value": "eve"},
1810 B = {"op": ">", "value": 5000}
1811 )
1812
1813 The keys should be codes for editable constraints (you can inspect these
1814 with Template.editable_constraints) and the values should be a dictionary
1815 of constraint properties to replace. You can replace the values for
1816 "op" (operator), "value", and "extra_value" and "values" in the case of
1817 ternary and multi constraints.
1818
1819 @rtype: L{intermine.webservice.ResultIterator}
1820 """
1821 clone = self.get_adjusted_template(con_values)
1822 return super(Template, clone).results(row, start, size)
1823
1825 """
1826 Get a list of result rows
1827 =========================
1828
1829 This method performs the same as the method of the
1830 same name in Query, and it shares the semantics of
1831 Template.results().
1832
1833 @see: L{intermine.query.Query.get_results_list}
1834 @see: L{intermine.query.Template.results}
1835
1836 @rtype: list
1837
1838 """
1839 clone = self.get_adjusted_template(con_values)
1840 return super(Template, clone).get_results_list(row, start, size)
1841
1846
1847 - def rows(self, start=0, size=None, **con_values):
1851
1852 - def count(self, **con_values):
1853 """
1854 Return the total number of rows this template returns
1855 =====================================================
1856
1857 Obtain the number of rows a particular query will
1858 return, without having to fetch and parse all the
1859 actual data. This method makes a request to the server
1860 to report the count for the query, and is sugar for a
1861 results call.
1862
1863 @rtype: int
1864 @raise WebserviceError: if the request is unsuccessful.
1865 """
1866 clone = self.get_adjusted_template(con_values)
1867 return super(Template, clone).count()
1868
1872
1875
1878
1881