Source code for gravityspy.table.events

# -*- coding: utf-8 -*-
# Copyright (C) Scott Coughlin (2017-)
#
# This file is part of gravityspy.
#
# gravityspy is free software: you can redistribute it and/or modify
# it under the terms of the GNU General Public License as published by
# the Free Software Foundation, either version 3 of the License, or
# (at your option) any later version.
#
# gravityspy is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with gravityspy.  If not, see <http://www.gnu.org/licenses/>.

from gwtrigfind import find_trigger_files
from gwpy.segments import DataQualityFlag
from gwpy.table import GravitySpyTable
from gwpy.utils import mp as mp_utils
from sklearn.cluster import KMeans

from ..utils import log
from ..utils import utils
from ..api.project import GravitySpyProject
from ..ml.train_classifier import make_model

import panoptes_client
import numpy
import pandas
import subprocess
import string
import random
import os

[docs]class Events(GravitySpyTable): """This class provides method for classifying events with gravityspy """
[docs] @classmethod def read(cls, *args, **kwargs): """Classify triggers in this table Parameters: `gwpy.table.GravitySpyTable` Returns: `Events` table """ etg = kwargs.pop('etg', 'OMICRON') tab = super(Events, cls).read(*args, **kwargs) tab = tab.to_pandas() if 'gravityspy_id' not in tab.columns: tab['gravityspy_id'] = tab.apply(id_generator, 1) tab['image_status'] = 'testing' tab['data_quality'] = 'no_flag' tab['upload_flag'] = 0 tab['citizen_score'] = 0.0 tab['links_subjects'] = 0 tab['url1'] = '' tab['url2'] = '' tab['url3'] = '' tab['url4'] = '' if etg == 'OMICRON': tab['event_id'] = tab['event_id'].apply(int) tab['process_id'] = tab['process_id'].apply(int) tab = cls.from_pandas(tab) if etg == 'OMICRON': tab['event_time'] = (tab['peak_time'] + (0.000000001)*tab['peak_time_ns']) tab['event_time'].format = '%.9f' else: raise ValueError("No trigger reading has " "been defined for this ETG") return tab
[docs] @classmethod def fetch(cls, *args, **kwargs): tab = super(Events, cls).fetch(*args, **kwargs) return cls(tab)
[docs] def classify(self, path_to_cnn, **kwargs): """Classify triggers in this table Parameters: path_to_cnn: file name of the CNN you would like to use **kwargs: nproc : number of parallel event times to be processing at once Returns: `Events` table """ if 'event_time' not in self.keys(): raise ValueError("This method only works if you have defined " "a column event_time for your " "Event Trigger Generator.") # Parse key word arguments config = kwargs.pop('config', utils.GravitySpyConfigFile()) plot_directory = kwargs.pop('plot_directory', 'plots') timeseries = kwargs.pop('timeseries', None) source = kwargs.pop('source', None) channel_name = kwargs.pop('channel_name', None) frametype = kwargs.pop('frametype', None) # make a list of event times inputs = zip(self['event_time'], self['ifo'], self['gravityspy_id']) inputs = ((etime, ifo, gid, config, plot_directory, timeseries, source, channel_name, frametype) for etime, ifo, gid in inputs) # calculate maximum number of processes nproc = kwargs.pop('nproc', 1) # make q_scans output = mp_utils.multiprocess_with_queues(nproc, _make_single_qscan, inputs) qvalues = [] # raise exceptions (from multiprocessing, single process raises inline) for f, x in output: if isinstance(x, Exception): x.args = ('Failed to make q scan at time %s: %s' % (f, str(x)),) raise x else: qvalues.append(x) self['q_value'] = qvalues results = utils.label_q_scans(plot_directory=plot_directory, path_to_cnn=path_to_cnn, **kwargs) results = results.to_pandas() results['Filename1'] = results['Filename1'].apply(lambda x, y : os.path.join(y, x), args=(plot_directory,)) results['Filename2'] = results['Filename2'].apply(lambda x, y : os.path.join(y, x), args=(plot_directory,)) results['Filename3'] = results['Filename3'].apply(lambda x, y : os.path.join(y, x), args=(plot_directory,)) results['Filename4'] = results['Filename4'].apply(lambda x, y : os.path.join(y, x), args=(plot_directory,)) results = Events.from_pandas(results.merge(self.to_pandas(), on=['gravityspy_id'])) return results
[docs] def to_sql(self, table='glitches_v2d0', engine=None, **kwargs): """Obtain omicron triggers to run gravityspy on Parameters: table (str): name of SQL table """ from sqlalchemy.engine import create_engine # connect if needed if engine is None: conn_kw = {} for key in ('db', 'host', 'user', 'passwd'): try: conn_kw[key] = kwargs.pop(key) except KeyError: pass engine = create_engine(get_connection_str(**conn_kw)) self.to_pandas().to_sql(table, engine, index=False, if_exists='append') return
[docs] def update_sql(self, table='glitches_v2d0', engine=None): """Obtain omicron triggers to run gravityspy on Parameters: table (str): name of SQL tabl """ from sqlalchemy.engine import create_engine # connect if needed if engine is None: conn_kw = {} for key in ('db', 'host', 'user', 'passwd'): try: conn_kw[key] = kwargs.pop(key) except KeyError: pass engine = create_engine(get_connection_str(**conn_kw)) column_dict = self.to_pandas().to_dict(orient='records')[0] sql_command = 'UPDATE {0} SET '.format(table) for column_name in column_dict: if isinstance(column_dict[column_name], str): sql_command = sql_command + '''\"{0}\" = \'{1}\', '''.format(column_name, column_dict[column_name]) else: sql_command = sql_command + '''\"{0}\" = {1}, '''.format(column_name, column_dict[column_name]) sql_command = sql_command[:-2] + ' WHERE \"gravityspy_id\" = \'' + self['gravityspy_id'].iloc[0] + "'" engine.execute(sql_command) return
[docs] def upload_to_zooniverse(self, subject_set_id=None): """Obtain omicron triggers to run gravityspy on Parameters: subject_set_id (optional, int) : subject set id to upload to Returns: `Events` table """ # First filter out images that have already been uploaded tab = self[self['upload_flag'] == 1] if subject_set_id is None: subset_ids = numpy.unique(tab['subjectset']) else: subset_ids = numpy.atleast_1d(numpy.array(subject_set_id)) panoptes_client.Panoptes.connect() project = panoptes_client.Project.find(slug='zooniverse/gravity-spy') for subset_id in subset_ids: subjectset = panoptes_client.SubjectSet.find(subset_id) subjects = [] if subject_set_id is None: tab1 = tab[tab['subjectset'] == subset_id] for fn1, fn2, fn3, fn4, gid in tab1['Filename1', 'Filename2', 'Filename3', 'Filename4', 'gravityspy_id']: subject = panoptes_client.Subject() subject.links.project = project subject.add_location(str(fn1)) subject.add_location(str(fn2)) subject.add_location(str(fn3)) subject.add_location(str(fn4)) subject.metadata['date'] = '20180825' subject.metadata['subject_id'] = str(gid) subject.metadata['Filename1'] = fn1.split('/')[-1] subject.metadata['Filename2'] = fn2.split('/')[-1] subject.metadata['Filename3'] = fn3.split('/')[-1] subject.metadata['Filename4'] = fn4.split('/')[-1] subject.save() subjects.append(subject) self[self['gravityspy_id'] == gid]['links_subject'] = int(subject.id) self[self['gravityspy_id'] == gid]['url1'] = subject.raw['locations'][0]['image/png'].split('?')[0] self[self['gravityspy_id'] == gid]['url2'] = subject.raw['locations'][1]['image/png'].split('?')[0] self[self['gravityspy_id'] == gid]['url3'] = subject.raw['locations'][2]['image/png'].split('?')[0] self[self['gravityspy_id'] == gid]['url4'] = subject.raw['locations'][3]['image/png'].split('?')[0] self['upload_flag'][self['gravityspy_id'] == gid] = 1 subjectset.add(subjects) return self
[docs] def update_scores(self, path_to_cnn, nproc=1, **kwargs): """Obtain omicron triggers to run gravityspy on Parameters: path_to_cnn (str): filename of model Returns: `Events` table with columns containing new scores """ if not all(elem in self.keys() for elem in ['Filename1', 'Filename2', 'Filename3', 'Filename4']): raise ValueError("This method only works if the file paths " "of the images of the images are known.") results = utils.label_select_images(filename1=self['Filename1'], filename2=self['Filename2'], filename3=self['Filename3'], filename4=self['Filename4'], path_to_cnn=path_to_cnn, **kwargs) return Events(results)
[docs] def determine_workflow_and_subjectset(self, project_info_pickle): """Obtain omicron triggers to run gravityspy on Parameters: path_to_cnn (str): filename of file with Gravity Spy project info Returns: `Events` table with columns workflow and subjectset """ if 'ml_confidence' not in self.keys() or 'ml_label' not in self.keys(): raise ValueError("This method only works if the confidence and label " "of the image in known.") gspyproject = GravitySpyProject.load_project_from_cache( project_info_pickle ) workflows_for_each_class = gspyproject.get_level_structure(IDfilter='O2') # Determine subject set and workflow this should go to. level_of_images = [] subjectset_of_images = [] for label, confidence in zip(self['ml_label'], self['ml_confidence']): for iworkflow in ['1610', '1934', '1935', '7765', '7766', '7767']: if label in workflows_for_each_class[iworkflow].keys(): if workflows_for_each_class[iworkflow][label][2][1] <= \ confidence <= \ workflows_for_each_class[iworkflow][label][2][0]: level_of_images.append(int(workflows_for_each_class[iworkflow][label][0])) subjectset_of_images.append(workflows_for_each_class[iworkflow][label][1]) break self["workflow"] = level_of_images self["subjectset"] = subjectset_of_images return self
[docs] def create_collection(self, name=None, private=True, default_subject=None): """Obtain omicron triggers to run gravityspy on Parameters: name (str, optional): name of collection private (bool, optional): would you like this collection to be private or public default_subject (int, optional): subject id to be the cover image of collection Returns: `str` url link to the created collection """ if name is None: # will name it after the label of event table name = self['Label'][0] if default_subject is None: default_subject = self['links_subjects'][0] collection_url = ('https://www.zooniverse.org/' 'projects/zooniverse/gravity-spy/collections/') with panoptes_client.Panoptes() as client: client.connect() collection = panoptes_client.Collection() collection.links.project = '1104' collection.display_name = '{0}'.format(name) collection.private = private urltmp = collection.save() collection_url = collection_url + urltmp['collections'][0]['slug'] collection.add(list(self['links_subjects'])) collection.set_default_subject(default_subject) return collection_url
[docs] def cluster(self, nclusters, random_state=30): """Create new clusters from feature space vectors Parameters: nclusters (int): how many clusters to try to group these triggers into Returns: `Events` table """ if '0' not in self.columns: raise ValueError("You are trying to cluster but you do not have " "the feature space information in this table.") features = self.to_pandas().values[:, 0:200] kmeans_1 = KMeans(nclusters, random_state=random_state).fit(features) clusters = kmeans_1.labels_ self['clusters'] = clusters return self
[docs] @classmethod def get_triggers(cls, start, end, channel, dqflag, verbose=True, **kwargs): """Obtain omicron triggers to run gravityspy on Parameters: start (int): start of time to look for triggers end (int): end time to look for triggers channel (str): channel to look for triggers dqflag (str): name of segment during which to keep triggers Returns: `Events` table """ duration_max = kwargs.pop('duration_max', None) duration_min = kwargs.pop('duration_min', None) frequency_max = kwargs.pop('frequency_max', 2048) frequency_min = kwargs.pop('frequency_min', 10) snr_max = kwargs.pop('snr_max', None) snr_min = kwargs.pop('snr_min', 7.5) detector = channel.split(':')[0] logger = log.Logger('Gravity Spy: Fetching Omicron Triggers') # Obtain segments that are analysis ready analysis_ready = DataQualityFlag.query('{0}:{1}'.format(detector, dqflag), float(start), float(end)) # Display segments for which this flag is true logger.info("Segments for which the {0} Flag " "is active: {1}".format(dqflag, analysis_ready.active)) # get Omicron triggers files = find_trigger_files(channel,'Omicron', float(start),float(end)) triggers = cls.read(files, tablename='sngl_burst', format='ligolw') logger.info("Number of triggers " "before any filtering: {0}".format(len(triggers))) masks = numpy.ones(len(triggers), dtype=bool) logger.info("duration filter " "[{0}, {1}]".format(duration_min, duration_max)) logger.info("frequency filter " "[{0}, {1}]".format(frequency_min, frequency_max)) logger.info("snr filter " "[{0}, {1}]".format(snr_min, snr_max)) if not duration_max is None: masks &= (triggers['duration'] <= duration_max) if not duration_min is None: masks &= (triggers['duration'] >= duration_min) if not frequency_max is None: masks &= (triggers['peak_frequency'] <= frequency_max) if not frequency_min is None: masks &= (triggers['peak_frequency'] >= frequency_min) if not snr_max is None: masks &= (triggers['snr'] <= snr_max) if not snr_min is None: masks &= (triggers['snr'] >= snr_min) triggers = triggers[masks] # Set peakGPS logger.info("Number of triggers after " "snr, frequency, and duration filters " "cuts but before {0} flag filtering: " "{1}".format(dqflag, len(triggers))) # Filter the raw omicron triggers against the ANALYSIS READY flag. vetoed = triggers['event_time'].in_segmentlist(analysis_ready.active) triggers = triggers[vetoed] logger.info("Final trigger length: {0}".format(len(triggers))) return triggers
[docs]def id_generator(x, size=10, chars=(string.ascii_uppercase + string.digits + string.ascii_lowercase)): """Obtain omicron triggers run gravityspy on Parameters: x (str): the item you would like a random id to be generated for Returns: """ return ''.join(random.SystemRandom().choice(chars) for _ in range(size))
[docs]def get_connection_str(db='gravityspy', host='gravityspy.ciera.northwestern.edu', user=None, passwd=None): """Create string to pass to create_engine """ if (not user) or (not passwd): user = os.getenv('GRAVITYSPY_DATABASE_USER', None) passwd = os.getenv('GRAVITYSPY_DATABASE_PASSWD', None) if (not user) or (not passwd): raise ValueError('Remember to either pass ' 'or export GRAVITYSPY_DATABASE_USER ' 'and export GRAVITYSPY_DATABASE_PASSWD in order ' 'to access the Gravity Spy Data: ' 'https://secrets.ligo.org/secrets/144/' ' description is username and secret is password.') return 'postgresql://{0}:{1}@{2}:5432/{3}'.format(user, passwd, host, db)
# define multiprocessing method def _make_single_qscan(inputs): event_time = inputs[0] ifo = inputs[1] gid = inputs[2] config = inputs[3] plot_directory = inputs[4] timeseries = inputs[5] source = inputs[6] channel_name = inputs[7] frametype = inputs[8] # Parse Ini File plot_time_ranges = config.plot_time_ranges plot_normalized_energy_range = config.plot_normalized_energy_range try: if timeseries is not None: specsgrams, q_value = utils.make_q_scans(event_time=event_time, config=config, timeseries=timeseries) if source is not None: specsgrams, q_value = utils.make_q_scans(event_time=event_time, config=config, source=source) if channel_name is not None: specsgrams, q_value = utils.make_q_scans(event_time=event_time, config=config, channel_name=channel_name, frametype=frametype) utils.save_q_scans(plot_directory, specsgrams, plot_normalized_energy_range, plot_time_ranges, ifo, event_time, id_string=gid) return event_time, q_value except Exception as exc: # pylint: disable=broad-except if nproc == 1: raise else: return event_time, exc