Commit 7b6248db authored by Bharath Ramsundar's avatar Bharath Ramsundar Committed by GitHub
Browse files

Merge pull request #859 from Dgelemi/untargz_function

Uncompress .tar.gz file with python function
parents 2031c49d 8fce9969
Loading
Loading
Loading
Loading
+2 −2
Original line number Diff line number Diff line
@@ -24,8 +24,8 @@ def load_qm9(featurizer='CoulombMatrix', split='random', reload=True):
      deepchem.utils.download_url(
          'http://deepchem.io.s3-website-us-west-1.amazonaws.com/datasets/gdb9.tar.gz'
      )
      os.system('tar -zxvf ' + os.path.join(data_dir, 'gdb9.tar.gz') + ' -C ' +
                data_dir)
      deepchem.utils.untargz_file(
          os.path.join(data_dir, 'gdb9.tar.gz'), data_dir)
  else:
    dataset_file = os.path.join(data_dir, "qm9.csv")
    if not os.path.exists(dataset_file):
+21 −0
Original line number Diff line number Diff line
@@ -11,6 +11,8 @@ import numpy as np
import os
import pandas as pd
import tempfile
import tarfile
import sys

from rdkit import Chem
from rdkit.Chem.Scaffolds import MurckoScaffold
@@ -83,6 +85,25 @@ def download_url(url, dest_dir=get_data_dir(), name=None):
  urlretrieve(url, os.path.join(dest_dir, name))


def untargz_file(file, dest_dir=get_data_dir(), name=None):
  """Untar and unzip a .tar.gz file to disk.
  
  Parameters
  ----------
  file: str
    the filepath to decompress
  dest_dir: str
    the directory to save the file in
  name: str
    the file name to save it as.  If omitted, it will use the file name 
  """
  if name is None:
    name = file
  tar = tarfile.open(name)
  tar.extractall(path=dest_dir)
  tar.close()


class ScaffoldGenerator(object):
  """
  Generate molecular scaffolds.