Source code for redback.get_data.swift

from __future__ import annotations

import os
import time
from typing import Union
import urllib
import urllib.request

import astropy.io.ascii
import numpy as np
import pandas as pd
import requests

import redback.get_data.directory
import redback.get_data.utils
import redback.redback_errors
from redback.get_data.getter import GRBDataGetter
from redback.utils import fetch_driver, check_element
from redback.utils import logger

dirname = os.path.dirname(__file__)


[docs]class SwiftDataGetter(GRBDataGetter): VALID_TRANSIENT_TYPES = ["afterglow", "prompt"] VALID_DATA_MODES = ['flux', 'flux_density', 'prompt'] VALID_INSTRUMENTS = ['BAT+XRT', 'XRT'] XRT_DATA_KEYS = ['Time [s]', "Pos. time err [s]", "Neg. time err [s]", "Flux [erg cm^{-2} s^{-1}]", "Pos. flux err [erg cm^{-2} s^{-1}]", "Neg. flux err [erg cm^{-2} s^{-1}]"] INTEGRATED_FLUX_KEYS = ["Time [s]", "Pos. time err [s]", "Neg. time err [s]", "Flux [erg cm^{-2} s^{-1}]", "Pos. flux err [erg cm^{-2} s^{-1}]", "Neg. flux err [erg cm^{-2} s^{-1}]", "Instrument"] FLUX_DENSITY_KEYS = ['Time [s]', "Pos. time err [s]", "Neg. time err [s]", 'Flux [mJy]', 'Pos. flux err [mJy]', 'Neg. flux err [mJy]'] PROMPT_DATA_KEYS = ["Time [s]", "flux_15_25 [counts/s/det]", "flux_15_25_err [counts/s/det]", "flux_25_50 [counts/s/det]", "flux_25_50_err [counts/s/det]", "flux_50_100 [counts/s/det]", "flux_50_100_err [counts/s/det]", "flux_100_350 [counts/s/det]", "flux_100_350_err [counts/s/det]", "flux_15_350 [counts/s/det]", "flux_15_350_err [counts/s/det]"] SWIFT_PROMPT_BIN_SIZES = ['1s', '2ms', '8ms', '16ms', '64ms', '256ms']
[docs] def __init__( self, grb: str, transient_type: str, data_mode: str, instrument: str = 'BAT+XRT', bin_size: str = None) -> None: """Constructor class for a data getter. The instance will be able to download the specified Swift data. :param grb: Telephone number of GRB, e.g., 'GRB140903A' or '140903A' are valid inputs. :type grb: str :param transient_type: Type of the transient. Should be 'prompt' or 'afterglow'. :type transient_type: str :param data_mode: Data mode must be from `redback.get_data.swift.SwiftDataGetter.VALID_DATA_MODES`. :type data_mode: str :param instrument: Instrument(s) to use. Must be from `redback.get_data.swift.SwiftDataGetter.VALID_INSTRUMENTS`. :type instrument: str :param bin_size: Bin size. Must be from `redback.get_data.swift.SwiftDataGetter.SWIFT_PROMPT_BIN_SIZES`. :type bin_size: str """ super().__init__(grb=grb, transient_type=transient_type) self.grb = grb self.instrument = instrument self.data_mode = data_mode self.bin_size = bin_size self.directory_path, self.raw_file_path, self.processed_file_path = self.create_directory_structure()
@property def data_mode(self) -> str: """Ensures the data mode to be from `SwiftDataGetter.VALID_DATA_MODES`. :return: The data mode :rtype: str """ return self._data_mode @data_mode.setter def data_mode(self, data_mode: str) -> None: """ :param data_mode: The data mode. :type data_mode: str """ if data_mode not in self.VALID_DATA_MODES: raise ValueError("Swift does not have {} data".format(self.data_mode)) self._data_mode = data_mode @property def instrument(self) -> str: """ Ensures the data mode to be from `SwiftDataGetter.VALID_INSTRUMENTS`. :return: The instrument :rtype: str """ return self._instrument @instrument.setter def instrument(self, instrument: str) -> None: """ :param instrument: The instrument :type: str """ if instrument not in self.VALID_INSTRUMENTS: raise ValueError("Swift does not have {} instrument mode".format(self.instrument)) self._instrument = instrument @property def trigger(self) -> str: """Gets the trigger number based on the GRB name. :return: The trigger number. :rtype: str """ logger.info('Getting trigger number') return redback.get_data.utils.get_trigger_number(self.stripped_grb)
[docs] def get_swift_id_from_grb(self) -> str: """ Gets the Swift ID from the GRB number. :return: The Swift ID :rtype: str """ data = astropy.io.ascii.read(f'{dirname.rstrip("get_data/")}/tables/summary_general_swift_bat.txt') triggers = list(data['col2']) event_names = list(data['col1']) swift_id = triggers[event_names.index(self.grb)] if len(swift_id) == 6: swift_id += "000" swift_id = swift_id.zfill(11) return swift_id
@property def grb_website(self) -> str: """ :return: The GRB website depending on the data mode and instrument. :rtype: str """ if self.transient_type == 'prompt': return f"https://swift.gsfc.nasa.gov/results/batgrbcat/{self.grb}/data_product/" \ f"{self.get_swift_id_from_grb()}-results/lc/{self.bin_size}_lc_ascii.dat" if self.instrument == 'BAT+XRT': return f'http://www.swift.ac.uk/burst_analyser/00{self.trigger}/' elif self.instrument == 'XRT': return f'https://www.swift.ac.uk/xrt_curves/00{self.trigger}/flux.qdp'
[docs] def get_data(self) -> pd.DataFrame: """ Downloads the raw data and produces a processed .csv file. :return: The processed data :rtype: pandas.DataFrame """ if self.instrument == "BAT+XRT": logger.warning( "You are downloading BAT and XRT data, " "you will need to truncate the data for some models.") elif self.instrument == "XRT": logger.warning( "You are only downloading XRT data, you may not capture" " the tail of the prompt emission.") return super(SwiftDataGetter, self).get_data()
[docs] def create_directory_structure(self) -> redback.get_data.directory.DirectoryStructure: """ :return: A namedtuple with the directory path, raw file path, and processed file path. :rtype: redback.get_data.directory.DirectoyStructure """ if self.transient_type == 'afterglow': return redback.get_data.directory.afterglow_directory_structure( grb=self.grb, data_mode=self.data_mode, instrument=self.instrument) elif self.transient_type == 'prompt': return redback.get_data.directory.swift_prompt_directory_structure( grb=self.grb, bin_size=self.bin_size)
[docs] def collect_data(self) -> None: """Downloads the data from the Swift website and saves it into the raw file path.""" if os.path.isfile(self.raw_file_path): logger.warning('The raw data file already exists. Returning.') return response = requests.get(self.grb_website) if 'No Light curve available' in response.text: raise redback.redback_errors.WebsiteExist( f'Problem loading the website for GRB{self.stripped_grb}. ' f'Are you sure GRB {self.stripped_grb} has Swift data?') if self.instrument == 'XRT' or self.transient_type == "prompt": self.download_directly() elif self.transient_type == 'afterglow': if self.data_mode == 'flux': self.download_integrated_flux_data() elif self.data_mode == 'flux_density': self.download_flux_density_data()
[docs] def download_flux_density_data(self) -> None: """Downloads flux density data from the Swift website. Uses the PhantomJS headless browser to click through the website. Properly quits the driver. """ driver = fetch_driver() try: driver.get(self.grb_website) driver.find_element("xpath", "//select[@name='xrtsub']/option[text()='no']").click() time.sleep(20) driver.find_element("id","xrt_DENSITY_makeDownload").click() time.sleep(20) grb_url = driver.current_url # scrape the data urllib.request.urlretrieve(url=grb_url, filename=self.raw_file_path) logger.info(f'Congratulations, you now have raw data for {self.grb}') except Exception as e: logger.warning(f'Cannot load the website for {self.grb} \n' f'Failed with exception: \n' f'{e}') finally: # Close the driver and all opened windows driver.quit() urllib.request.urlcleanup()
[docs] def download_integrated_flux_data(self) -> None: """Downloads integrated flux density data from the Swift website. Uses the PhantomJS headless browser to click through the website. Properly quits the driver. """ driver = fetch_driver() try: driver.get(self.grb_website) # select option for BAT bin_size bat_binning = 'batxrtbin' if check_element(driver, bat_binning): driver.find_element("xpath", "//select[@name='batxrtbin']/option[text()='SNR 4']").click() # select option for subplot subplot = "batxrtsub" if check_element(driver, subplot): driver.find_element("xpath","//select[@name='batxrtsub']/option[text()='no']").click() # Select option for flux density flux_density1 = "batxrtband1" flux_density0 = "batxrtband0" if (check_element(driver, flux_density1)) and (check_element(driver, flux_density0)): driver.find_element("xpath",".//*[@id='batxrtband1']").click() driver.find_element("xpath",".//*[@id='batxrtband0']").click() # Generate data file driver.find_element("xpath",".//*[@id='batxrt_XRTBAND_makeDownload']").click() time.sleep(20) grb_url = driver.current_url driver.quit() urllib.request.urlretrieve(grb_url, self.raw_file_path) logger.info(f'Congratulations, you now have raw data for {self.grb}') except Exception as e: logger.warning(f'Cannot load the website for {self.grb} \n' f'Failed with exception: \n' f'{e}') finally: # Close the driver and all opened windows driver.quit() urllib.request.urlcleanup()
[docs] def download_directly(self) -> None: """Downloads prompt or XRT data directly without using PhantomJS if possible.""" try: urllib.request.urlretrieve(self.grb_website, self.raw_file_path) logger.info(f'Congratulations, you now have raw {self.instrument} {self.transient_type} ' f'data for {self.grb}') except Exception as e: logger.warning(f'Cannot load the website for {self.grb} \n' f'Failed with exception: \n' f'{e}') finally: urllib.request.urlcleanup()
[docs] def convert_raw_data_to_csv(self) -> Union[pd.DataFrame, None]: """Converts the raw data into processed data and saves it into the processed file path. :return: The processed data :rtype: pandas.DataFrame """ if os.path.isfile(self.processed_file_path): logger.warning('The processed data file already exists. Returning.') return pd.read_csv(self.processed_file_path) if self.instrument == 'XRT': return self.convert_xrt_data_to_csv() elif self.transient_type == 'afterglow': return self.convert_raw_afterglow_data_to_csv() elif self.transient_type == 'prompt': return self.convert_raw_prompt_data_to_csv()
[docs] def convert_xrt_data_to_csv(self) -> pd.DataFrame: """Converts the raw XRT data into processed data and saves it into the processed file path. The column names are in `SwiftDataGetter.XRT_DATA_KEYS` :return: The processed data. :rtype: pandas.DataFrame """ data = np.loadtxt(self.raw_file_path, comments=['!', 'READ', 'NO']) data = {key: data[:, i] for i, key in enumerate(self.XRT_DATA_KEYS)} data = pd.DataFrame(data) data = data[data["Pos. flux err [erg cm^{-2} s^{-1}]"] != 0.] data.to_csv(self.processed_file_path, index=False, sep=',') return data
[docs] def convert_raw_afterglow_data_to_csv(self) -> pd.DataFrame: """Converts the raw afterglow data into processed data and saves it into the processed file path. :return: The processed data. :rtype: pandas.DataFrame """ if self.data_mode == 'flux': return self.convert_integrated_flux_data_to_csv() if self.data_mode == 'flux_density': return self.convert_flux_density_data_to_csv()
[docs] def convert_raw_prompt_data_to_csv(self) -> pd.DataFrame: """Converts the raw prompt data into processed data and saves it into the processed file path. The column names are in `SwiftDataGetter.PROMPT_DATA_KEYS` :return: The processed data. :rtype: pandas.DataFrame """ data = np.loadtxt(self.raw_file_path) df = pd.DataFrame(data=data, columns=self.PROMPT_DATA_KEYS) df.to_csv(self.processed_file_path, index=False, sep=',') return df
[docs] def convert_integrated_flux_data_to_csv(self) -> pd.DataFrame: """Converts the flux data into processed data and saves it into the processed file path. The column names are in `SwiftDataGetter.INTEGRATED_FLUX_KEYS` :return: The processed data. :rtype: pandas.DataFrame """ data = {key: [] for key in self.INTEGRATED_FLUX_KEYS} with open(self.raw_file_path) as f: started = False for num, line in enumerate(f.readlines()): if line.startswith('NO NO NO'): started = True if not started: continue if line.startswith('!'): instrument = line[2:].replace('\n', '') if line[0].isnumeric() or line[0] == '-': line_items = line.split('\t') data['Instrument'] = instrument for key, item in zip(self.INTEGRATED_FLUX_KEYS, line_items): data[key].append(item.replace('\n', '')) df = pd.DataFrame(data=data) df.to_csv(self.processed_file_path, index=False, sep=',') return df
[docs] def convert_flux_density_data_to_csv(self) -> pd.DataFrame: """Converts the flux data into processed data and saves it into the processed file path. The column names are in `SwiftDataGetter.FLUX_DENSITY_KEYS` :return: The processed data. :rtype: pandas.DataFrame """ data = {key: [] for key in self.FLUX_DENSITY_KEYS} with open(self.raw_file_path) as f: started = False for num, line in enumerate(f.readlines()): if line.startswith('NO NO NO'): started = True if not started: continue if line[0].isnumeric() or line[0] == '-': line_items = line.split('\t') for key, item in zip(self.FLUX_DENSITY_KEYS, line_items): data[key].append(item.replace('\n', '')) data['Flux [mJy]'] = [float(x) * 1000 for x in data['Flux [mJy]']] data['Pos. flux err [mJy]'] = [float(x) * 1000 for x in data['Pos. flux err [mJy]']] data['Neg. flux err [mJy]'] = [float(x) * 1000 for x in data['Neg. flux err [mJy]']] df = pd.DataFrame(data=data) df.to_csv(self.processed_file_path, index=False, sep=',') return df