1 from xml.dom import minidom
2 import urllib
3 from urlparse import urlparse
4 import base64
5 import UserDict
6
7
8
9
10
11
12
13
14
15
16 try:
17 import simplejson as json
18 except ImportError:
19 try:
20 import json
21 except ImportError:
22 raise ImportError("Could not find any JSON module to import - "
23 + "please install simplejson or jsonlib to continue")
24
25
26 from intermine.query import Query, Template
27 from intermine.model import Model, Attribute, Reference, Collection, Column
28 from intermine.lists.listmanager import ListManager
29 from intermine.errors import ServiceError, WebserviceError
30 from intermine.results import InterMineURLOpener, ResultIterator
31 from intermine import idresolution
32
33 """
34 Webservice Interaction Routines for InterMine Webservices
35 =========================================================
36
37 Classes for dealing with communication with an InterMine
38 RESTful webservice.
39
40 """
41
42 __author__ = "Alex Kalderimis"
43 __organization__ = "InterMine"
44 __license__ = "LGPL"
45 __contact__ = "dev@intermine.org"
46
47 -class Registry(object, UserDict.DictMixin):
48 """
49 A Class representing an InterMine registry.
50 ===========================================
51
52 Registries are web-services that mines can automatically register
53 themselves with, and thus enable service discovery by clients.
54
55 SYNOPSIS
56 --------
57
58 example::
59
60 from intermine.webservice import Registry
61
62 # Connect to the default registry service
63 # at www.intermine.org/registry
64 registry = Registry()
65
66 # Find all the available mines:
67 for name, mine in registry.items():
68 print name, mine.version
69
70 # Dict-like interface for accessing mines.
71 flymine = registry["flymine"]
72
73 # The mine object is a Service
74 for gene in flymine.select("Gene.*").results():
75 process(gene)
76
77 This class is meant to aid with interoperation between
78 mines by allowing them to discover one-another, and
79 allow users to always have correct connection information.
80 """
81
82 MINES_PATH = "/mines.json"
83
84 - def __init__(self, registry_url="http://www.intermine.org/registry"):
85 self.registry_url = registry_url
86 opener = InterMineURLOpener()
87 data = opener.open(registry_url + Registry.MINES_PATH).read()
88 mine_data = json.loads(data)
89 self.__mine_dict = dict(( (mine["name"], mine) for mine in mine_data["mines"]))
90 self.__synonyms = dict(( (name.lower(), name) for name in self.__mine_dict.keys() ))
91 self.__mine_cache = {}
92
94 return name.lower() in self.__synonyms
95
97 lc = name.lower()
98 if lc in self.__synonyms:
99 if lc not in self.__mine_cache:
100 self.__mine_cache[lc] = Service(self.__mine_dict[self.__synonyms[lc]]["webServiceRoot"])
101 return self.__mine_cache[lc]
102 else:
103 raise KeyError("Unknown mine: " + name)
104
106 raise NotImplementedError("You cannot add items to a registry")
107
109 raise NotImplementedError("You cannot remove items from a registry")
110
112 return self.__mine_dict.keys()
113
115 """
116 A class representing connections to different InterMine WebServices
117 ===================================================================
118
119 The intermine.webservice.Service class is the main interface for the user.
120 It will provide access to queries and templates, as well as doing the
121 background task of fetching the data model, and actually requesting
122 the query results.
123
124 SYNOPSIS
125 --------
126
127 example::
128
129 from intermine.webservice import Service
130 service = Service("http://www.flymine.org/query/service")
131
132 template = service.get_template("Gene_Pathways")
133 for row in template.results(A={"value":"zen"}):
134 do_something_with(row)
135 ...
136
137 query = service.new_query()
138 query.add_view("Gene.symbol", "Gene.pathway.name")
139 query.add_constraint("Gene", "LOOKUP", "zen")
140 for row in query.results():
141 do_something_with(row)
142 ...
143
144 new_list = service.create_list("some/file/with.ids", "Gene")
145 list_on_server = service.get_list("On server")
146 in_both = new_list & list_on_server
147 in_both.name = "Intersection of these lists"
148 for row in in_both:
149 do_something_with(row)
150 ...
151
152 OVERVIEW
153 --------
154 The two methods the user will be most concerned with are:
155 - L{Service.new_query}: constructs a new query to query a service with
156 - L{Service.get_template}: gets a template from the service
157 - L{ListManager.create_list}: creates a new list on the service
158
159 For list management information, see L{ListManager}.
160
161 TERMINOLOGY
162 -----------
163 X{Query} is the term for an arbitrarily complex structured request for
164 data from the webservice. The user is responsible for specifying the
165 structure that determines what records are returned, and what information
166 about each record is provided.
167
168 X{Template} is the term for a predefined "Query", ie: one that has been
169 written and saved on the webservice you will access. The definition
170 of the query is already done, but the user may want to specify the
171 values of the constraints that exist on the template. Templates are accessed
172 by name, and while you can easily introspect templates, it is assumed
173 you know what they do when you use them
174
175 X{List} is a saved result set containing a set of objects previously identified
176 in the database. Lists can be created and managed using this client library.
177
178 @see: L{intermine.query}
179 """
180 QUERY_PATH = '/query/results'
181 LIST_ENRICHMENT_PATH = '/list/enrichment'
182 WIDGETS_PATH = '/widgets'
183 QUERY_LIST_UPLOAD_PATH = '/query/tolist'
184 QUERY_LIST_APPEND_PATH = '/query/append/tolist'
185 MODEL_PATH = '/model'
186 TEMPLATES_PATH = '/templates/xml'
187 TEMPLATEQUERY_PATH = '/template/results'
188 LIST_PATH = '/lists'
189 LIST_CREATION_PATH = '/lists'
190 LIST_RENAME_PATH = '/lists/rename'
191 LIST_APPENDING_PATH = '/lists/append'
192 LIST_TAG_PATH = '/list/tags'
193 SAVEDQUERY_PATH = '/savedqueries/xml'
194 VERSION_PATH = '/version/ws'
195 RELEASE_PATH = '/version/release'
196 SCHEME = 'http://'
197 SERVICE_RESOLUTION_PATH = "/check/"
198 IDS_PATH = "/ids"
199
200 - def __init__(self, root,
201 username=None, password=None, token=None,
202 prefetch_depth=1, prefetch_id_only=False):
203 """
204 Constructor
205 ===========
206
207 Construct a connection to a webservice::
208
209 url = "http://www.flymine.org/query/service"
210
211 # An unauthenticated connection - access to all public data
212 service = Service(url)
213
214 # An authenticated connection - access to private and public data
215 service = Service(url, token="ABC123456")
216
217
218 @param root: the root url of the webservice (required)
219 @param username: your login name (optional)
220 @param password: your password (required if a username is given)
221 @param token: your API access token(optional - used in preference to username and password)
222
223 @raise ServiceError: if the version cannot be fetched and parsed
224 @raise ValueError: if a username is supplied, but no password
225
226 There are two alternative authentication systems supported by InterMine
227 webservices. The first is username and password authentication, which
228 is supported by all webservices. Newer webservices (version 6+)
229 also support API access token authentication, which is the recommended
230 system to use. Token access is more secure as you will never have
231 to transmit your username or password, and the token can be easily changed
232 or disabled without changing your webapp login details.
233
234 """
235 o = urlparse(root)
236 if not o.scheme: root = "http://" + root
237 if not root.endswith("/service"): root = root + "/service"
238
239 self.root = root
240 self.prefetch_depth = prefetch_depth
241 self.prefetch_id_only = prefetch_id_only
242
243 self._templates = None
244 self._model = None
245 self._version = None
246 self._release = None
247 self._widgets = None
248 self._list_manager = ListManager(self)
249 self.__missing_method_name = None
250 if token:
251 self.opener = InterMineURLOpener(token=token)
252 elif username:
253 if token:
254 raise ValueError("Both username and token credentials supplied")
255
256 if not password:
257 raise ValueError("Username given, but no password supplied")
258
259 self.opener = InterMineURLOpener((username, password))
260 else:
261 self.opener = InterMineURLOpener()
262
263 try:
264 self.version
265 except WebserviceError, e:
266 raise ServiceError("Could not validate service - is the root url (%s) correct? %s" % (root, e))
267
268 if token and self.version < 6:
269 raise ServiceError("This service does not support API access token authentication")
270
271
272 self.query = self.new_query
273
274
275
276 LIST_MANAGER_METHODS = frozenset(["get_list", "get_all_lists",
277 "get_all_list_names",
278 "create_list", "get_list_count", "delete_lists", "l"])
279
281 """
282 Get a new ListManager to use with this service.
283 ===============================================
284
285 This method is primarily useful as a context manager
286 when creating temporary lists, since on context exit all
287 temporary lists will be cleaned up::
288
289 with service.list_manager() as manager:
290 temp_a = manager.create_list(file_a, "Gene")
291 temp_b = manager.create_list(file_b, "Gene")
292 for gene in (temp_a & temp_b):
293 print gene.primaryIdentifier, "is in both"
294
295 @rtype: ListManager
296 """
297 return ListManager(self)
298
301
307
309 try:
310 self._list_manager.delete_temporary_lists()
311 except ReferenceError:
312 pass
313
314 @property
316 """
317 Returns the webservice version
318 ==============================
319
320 The version specifies what capabilities a
321 specific webservice provides. The most current
322 version is 3
323
324 may raise ServiceError: if the version cannot be fetched
325
326 @rtype: int
327 """
328 if self._version is None:
329 try:
330 url = self.root + self.VERSION_PATH
331 self._version = int(self.opener.open(url).read())
332 except ValueError, e:
333 raise ServiceError("Could not parse a valid webservice version: " + str(e))
334 return self._version
335
340
341 @property
343 """
344 Returns the datawarehouse release
345 =================================
346
347 Service.release S{->} string
348
349 The release is an arbitrary string used to distinguish
350 releases of the datawarehouse. This usually coincides
351 with updates to the data contained within. While a string,
352 releases usually sort in ascending order of recentness
353 (eg: "release-26", "release-27", "release-28"). They can also
354 have less machine readable meanings (eg: "beta")
355
356 @rtype: string
357 """
358 if self._release is None:
359 self._release = urllib.urlopen(self.root + self.RELEASE_PATH).read()
360 return self._release
361
363 """
364 Construct a new Query object for the given webservice
365 =====================================================
366
367 This is the standard method for instantiating new Query
368 objects. Queries require access to the data model, as well
369 as the service itself, so it is easiest to access them through
370 this factory method.
371
372 @return: L{intermine.query.Query}
373 """
374 return Query.from_xml(xml, self.model, root=root)
375
376 - def select(self, *columns, **kwargs):
377 """
378 Construct a new Query object with the given columns selected.
379 =============================================================
380
381 As new_query, except that instead of a root class, a list of
382 output column expressions are passed instead.
383 """
384 if "xml" in kwargs:
385 return self.load_query(kwargs["xml"])
386 if len(columns) == 1:
387 view = columns[0]
388 if isinstance(view, Attribute):
389 return Query(self.model, self).select("%s.%s" % (view.declared_in.name, view))
390 if isinstance(view, Reference):
391 return Query(self.model, self).select("%s.%s.*" % (view.declared_in.name, view))
392 elif not isinstance(view, Column) and not str(view).endswith("*"):
393 path = self.model.make_path(view)
394 if not path.is_attribute():
395 return Query(self.model, self).select(str(view) + ".*")
396 return Query(self.model, self).select(*columns)
397
398 new_query = select
399
401 """
402 Returns a template of the given name
403 ====================================
404
405 Tries to retrieve a template of the given name
406 from the webservice. If you are trying to fetch
407 a private template (ie. one you made yourself
408 and is not available to others) then you may need to authenticate
409
410 @see: L{intermine.webservice.Service.__init__}
411
412 @param name: the template's name
413 @type name: string
414
415 @raise ServiceError: if the template does not exist
416 @raise QueryParseError: if the template cannot be parsed
417
418 @return: L{intermine.query.Template}
419 """
420 try:
421 t = self.templates[name]
422 except KeyError:
423 raise ServiceError("There is no template called '"
424 + name + "' at this service")
425 if not isinstance(t, Template):
426 t = Template.from_xml(t, self.model, self)
427 self.templates[name] = t
428 return t
429
430 @property
453
454 - def resolve_ids(self, data_type, identifiers, extra = '', case_sensitive = False, wildcards = False):
455 """
456 Submit an Identifier Resolution Job
457 ===================================
458
459 Request that a set of identifiers be resolved to objects in
460 the data store.
461
462 @param data_type: The type of these identifiers (eg. 'Gene')
463 @type data_type: String
464
465 @param identifiers: The ids to resolve (eg. ['eve', 'zen', 'pparg'])
466 @type identifiers: iterable of string
467
468 @param extra: A disambiguating value (eg. "Drosophila melanogaster")
469 @type extra: String
470
471 @param case_sensitive: Whether to treat IDs case sensitively.
472 @type case_sensitive: Boolean
473
474 @param wildcards: Whether or not to interpret wildcards (eg: "eve*")
475 @type wildcards: Boolean
476
477 @return: {idresolution.Job} The job.
478 """
479 if self.version < 10:
480 raise ServiceError("This feature requires API version 10+")
481 if not data_type:
482 raise ServiceError("No data-type supplied")
483 if not identifiers:
484 raise ServiceError("No identifiers supplied")
485
486 data = json.dumps({
487 "type": data_type,
488 "identifiers": list(identifiers),
489 "extra": extra,
490 "caseSensitive": case_sensitive,
491 "wildCards": wildcards
492 })
493 text = self.opener.post_content(self.root + self.IDS_PATH, data, InterMineURLOpener.JSON)
494 ret = json.loads(text)
495 if ret['error'] is not None:
496 raise ServiceError(ret['error'])
497 if ret['uid'] is None:
498 raise Exception("No uid found in " + ret)
499
500 return idresolution.Job(self, ret['uid'])
501
503 """
504 Flushes any cached data.
505 """
506 self._templates = None
507 self._model = None
508 self._version = None
509 self._release = None
510 self._widgets = None
511
512 @property
514 """
515 The dictionary of templates from the webservice
516 ===============================================
517
518 Service.templates S{->} dict(intermine.query.Template|string)
519
520 For efficiency's sake, Templates are not parsed until
521 they are required, and until then they are stored as XML
522 strings. It is recommended that in most cases you would want
523 to use L{Service.get_template}.
524
525 You can use this property however to test for template existence though::
526
527 if name in service.templates:
528 template = service.get_template(name)
529
530 @rtype: dict
531
532 """
533 if self._templates is None:
534 sock = self.opener.open(self.root + self.TEMPLATES_PATH)
535 dom = minidom.parse(sock)
536 sock.close()
537 templates = {}
538 for e in dom.getElementsByTagName('template'):
539 name = e.getAttribute('name')
540 if name in templates:
541 raise ServiceError("Two templates with same name: " + name)
542 else:
543 templates[name] = e.toxml()
544 self._templates = templates
545 return self._templates
546
547 @property
549 """
550 The data model for the webservice you are querying
551 ==================================================
552
553 Service.model S{->} L{intermine.model.Model}
554
555 This is used when constructing queries to provide them
556 with information on the structure of the data model
557 they are accessing. You are very unlikely to want to
558 access this object directly.
559
560 raises ModelParseError: if the model cannot be read
561
562 @rtype: L{intermine.model.Model}
563
564 """
565 if self._model is None:
566 model_url = self.root + self.MODEL_PATH
567 self._model = Model(model_url, self)
568 return self._model
569
570 - def get_results(self, path, params, rowformat, view, cld=None):
571 """
572 Return an Iterator over the rows of the results
573 ===============================================
574
575 This method is called internally by the query objects
576 when they are called to get results. You will not
577 normally need to call it directly
578
579 @param path: The resource path (eg: "/query/results")
580 @type path: string
581 @param params: The query parameters for this request as a dictionary
582 @type params: dict
583 @param rowformat: One of "rr", "object", "count", "dict", "list", "tsv", "csv", "jsonrows", "jsonobjects"
584 @type rowformat: string
585 @param view: The output columns
586 @type view: list
587
588 @raise WebserviceError: for failed requests
589
590 @return: L{intermine.webservice.ResultIterator}
591 """
592 return ResultIterator(self, path, params, rowformat, view, cld)
593