snorkel-team
diff --git a/‎.gitignore‎
Lines changed: 1 addition & 0 deletions b/‎.gitignore‎
Lines changed: 1 addition & 0 deletions
diff --git a/‎.gitmodules‎
Lines changed: 0 additions & 4 deletions b/‎.gitmodules‎
Lines changed: 0 additions & 4 deletions
diff --git a/‎.travis.yml‎
Lines changed: 41 additions & 74 deletions b/‎.travis.yml‎
Lines changed: 41 additions & 74 deletions
diff --git a/‎MANIFEST.in‎
Lines changed: 6 additions & 0 deletions b/‎MANIFEST.in‎
Lines changed: 6 additions & 0 deletions
diff --git a/‎README.md‎
Lines changed: 114 additions & 42 deletions b/‎README.md‎
Lines changed: 114 additions & 42 deletions
diff --git a/‎docs/README.md‎
Lines changed: 1 addition & 1 deletion b/‎docs/README.md‎
Lines changed: 1 addition & 1 deletion
@@ -11,6 +11,7 @@ downloads/
 **/checkpoint
 **/checkpoints/
 __pycache__
+*.egg-info/
 *corenlp.log
 
 # Sphinx
 
@@ -3,101 +3,68 @@
 
 dist: trusty
 sudo: false  # to use container-based infra, see: http://docs.travis-ci.com/user/migrating-from-legacy/
-
-language:
-  - python
-python:
-  - "2.7"
-  - "3.6"
-jdk:
-  - oraclejdk8
+language: generic
+env:
+  matrix:
+    - PYTHON_VERSION=2.7
+    - PYTHON_VERSION=3.6
 
 cache:
   directories:
     - download
-    - $HOME/.cache/pip
-    - $HOME/miniconda/envs/test      # to avoid repetitively setting up Ana/Miniconda environment
-    - parser                         # to avoid repetitively downloading CoreNLP
-
-addons:
-  apt:
-    packages:
-    # CoreNLP needs Java 8
-    - oracle-java8-installer
 
-# Following trick is necessary to get a binary distribution of numpy, scipy, etc. which takes too long to build every time
-# See: http://stackoverflow.com/q/30588634
-# See: https://github.com/Theano/Theano/blob/master/.travis.yml (for caching)
-# See: http://conda.pydata.org/docs/travis.html
 before_install:
-  - deactivate  # leaving Travis' virtualenv first since otherwise Jupyter/IPython gets confused with conda inside a virtualenv (See: https://github.com/ipython/ipython/issues/8898)
-  - mkdir -p download
-  - cd download
-  - rm -rf ~/miniconda
-  - if [[ "$TRAVIS_PYTHON_VERSION" == "2.7" ]]; then  
-      travis_retry wget -c https://repo.continuum.io/miniconda/Miniconda2-latest-Linux-x86_64.sh -O miniconda.sh;
-    else
-      travis_retry wget -c https://repo.continuum.io/miniconda/Miniconda3-latest-Linux-x86_64.sh -O miniconda.sh;
-    fi
-  - chmod +x miniconda.sh
-  - bash miniconda.sh -b -f -p ~/miniconda
-  - cd ..
-  - export PATH=~/miniconda/bin:$PATH
-  - conda update --yes conda
-
-  # Make sure Java 8 is used
-  - export PATH="/usr/lib/jvm/java-8-oracle/bin:$PATH"
-  - export JAVA_HOME=/usr/lib/jvm/java-8-oracle
-  - java -version
-
-  # Set environment variables
-  - source set_env.sh
+  - travis_retry
+    wget https://repo.continuum.io/miniconda/Miniconda3-4.5.1-Linux-x86_64.sh
+    --output-document=miniconda.sh
+  - bash miniconda.sh -b -p $HOME/miniconda
+  - source $HOME/miniconda/etc/profile.d/conda.sh
+  - conda config --set always_yes yes --set changeps1 no
+  - conda info --all
 
 install:
-  # Install binary distribution of scientific python modules
-  - test -e ~/miniconda/envs/test/bin/activate || ( rm -rf ~/miniconda/envs/test; conda create --yes -n test python=$TRAVIS_PYTHON_VERSION )
-  - source activate test
-  - conda install --yes numpy scipy matplotlib pip
-
-  # Install Numba
-  - conda install --yes numba
-
-  # Install all remaining dependencies as per our README
-  - pip install -r python-package-requirement.txt
-  - test -e parser/corenlp.sh || ./install-parser.sh
-
-  # Use runipy to run Jupyter/IPython notebooks from command-line
-  - pip install runipy
+  - sed --in-place 's/- python/- python='"$PYTHON_VERSION"'/' environment.yml
+  - conda env create --quiet --file=environment.yml
+  - conda activate snorkel
+  - pip install .
+  - conda install --quiet tensorflow   # Installs Tensorflow to test optional components
+  - conda list
 
 script:
 
-  # Run test modules
+  # Run generative model test modules
   - python test/learning/test_gen_learning.py
   - python test/learning/test_supervised.py
   - python test/learning/test_categorical.py
-  - runipy test/learning/test_TF_notebook.ipynb
-  - runipy test/learning/test_parallel_grid_search.ipynb
+
+  # Run PyTorch test modules
+  - python test/learning/pytorch/test_lstm.py
+  - python test/learning/pytorch/test_model_reloading.py
+  - python test/learning/pytorch/test_determinism.py
+
+  # Run Tensorflow test modules
+  - runipy test/learning/tensorflow/test_TF_notebook.ipynb
+  - runipy test/learning/tensorflow/test_parallel_grid_search.ipynb
 
   # Runs intro tutorial notebooks
-  - cd tutorials
-  - runipy intro/Intro_Tutorial_1.ipynb
-  - runipy intro/Intro_Tutorial_2.ipynb
-  - runipy intro/Intro_Tutorial_3.ipynb
+  - runipy tutorials/intro/Intro_Tutorial_1.ipynb
+  - runipy tutorials/intro/Intro_Tutorial_2.ipynb
+  - runipy tutorials/intro/Intro_Tutorial_3.ipynb
 
   # Run advanced notebooks
-  - runipy advanced/Categorical_Classes.ipynb
-  - runipy advanced/Structure_Learning.ipynb
+  - runipy tutorials/advanced/Categorical_Classes.ipynb
+  - runipy tutorials/advanced/Structure_Learning.ipynb
 
   # Run CDR tutorials
-  - runipy cdr/CDR_Tutorial_1.ipynb
-  - runipy cdr/CDR_Tutorial_2.ipynb
-  - runipy cdr/CDR_Tutorial_3.ipynb
+  - runipy tutorials/cdr/CDR_Tutorial_1.ipynb
+  - runipy tutorials/cdr/CDR_Tutorial_2.ipynb
+  - runipy tutorials/cdr/CDR_Tutorial_3.ipynb
 
   # TODO check outputs, upload results, etc.
   # for more ideas, see: https://github.com/rossant/ipycache/issues/7
 
-after_success:
-  - killall java
-
-after_failure:
-  - killall java
+  # Build Sphinx documentation
+  # # Disabled due to the following error:
+  # # make: *** docs: No such file or directory.  Stop.
+  # - conda install --channel=conda-forge sphinx=1.7.4
+  # - make --directory=docs html
@@ -0,0 +1,6 @@
+include README.md
+include LICENSE
+include environment.yml
+include snorkel/vis/tree-chart.html
+include snorkel/vis/tree-chart.js
+recursive-include snorkel/viewer *
@@ -1,7 +1,7 @@
 <img src="figs/logo_01.png" width="150"/>
 
 
-**_v0.6.3_**
+**_v0.7.0-beta_**
 
 [![Build Status](https://travis-ci.org/HazyResearch/snorkel.svg?branch=master)](https://travis-ci.org/HazyResearch/snorkel)
 [![Documentation](https://readthedocs.org/projects/snorkel/badge/)](http://snorkel.readthedocs.io/en/master/)
@@ -14,8 +14,8 @@
 
 ## Getting Started
 
-* Installation instructions [below](#installation)
-* Get started with the tutorials [below](#learning-how-to-use-snorkel)
+* Get set up quickly [below](#quick-start)
+* Try the tutorials with [these instructions](#tutorials)
 * Documentation [here](http://snorkel.readthedocs.io/en/master/)
 
 ## Motivation
@@ -48,74 +48,142 @@ However, **_Snorkel is very much a work in progress_**, so we're eager for any a
 * _[Learning to Compose Domain-Specific Transformations for Data Augmentation](https://arxiv.org/abs/1709.01643)_ (NIPS 2017)
 * _[Gaussian Quadrature for Kernel Features](https://arxiv.org/abs/1709.02605)_ (NIPS 2017)
 
-## Learning how to use Snorkel
-The [introductory tutorial](https://github.com/HazyResearch/snorkel/tree/master/tutorials/intro) covers the entire Snorkel workflow, showing how to extract spouse relations from news articles.
-The tutorial is available in the following directory:
-```
-tutorials/intro
+## Quick Start
+
+This section has the commands to quickly get started running Snorkel.
+For more detailed installation instructions, see the [Installation section](#installation) below.
+These instructions assume that you already have [conda](https://conda.io/) installed.
+
+First, download and extract a copy of the Snorkel directory from a [GitHub release](https://github.com/HazyResearch/snorkel/releases) (version 0.7.0 or greater).
+Then navigate to the root of the `snorkel` directory in a terminal and run the following:
+
+```sh
+# Install the environment
+conda env create --file=environment.yml
+
+# Activate the environment
+conda activate snorkel
+
+# Install snorkel in the environment
+pip install .
+
+# Activate jupyter widgets
+jupyter nbextension enable --py widgetsnbextension
+
+# Initiate a jupyter notebook server
+jupyter notebook
 ```
-You can also check out all the great **[materials](https://simtk.org/frs/?group_id=1263)** from the recent Mobilize Center-hosted [Snorkel workshop](http://mobilize.stanford.edu/events/snorkelworkshop2017/)!
 
-Then, for more content, check out the other tutorials avaliable [here](https://github.com/HazyResearch/snorkel/tree/master/tutorials).
+Then a Jupyter notebook tab will open in your browser. From here you can run existing Snorkel notebooks or create your own.
+
+### Tutorials
+
+From within the Jupyter browser, navigate to the [`tutorials`](tutorials) directory and try out one of the existing notebooks!
+
+The [introductory tutorial](tutorials/intro) in `tutorials/intro` covers the entire Snorkel workflow, showing how to extract spouse relations from news articles.
+You can also check out all the great [materials](https://simtk.org/frs/?group_id=1263) from the recent Mobilize Center-hosted [Snorkel workshop](http://mobilize.stanford.edu/events/snorkelworkshop2017/)!
 
 ## Release Notes
+
+### Major changes in v0.7:
+* [PyTorch](https://pytorch.org/) classifiers
+* Installation now via [Conda](https://conda.io/) and `pip`
+* Now [spaCy](https://spacy.io/) is the default parser (v1), with support for v2
+* And many more fixes, additions, and new material!
+
+### Older versions
+
+<details>
+
 ### Major changes in v0.6:
+
 * Support for categorical classification, including "dynamically-scoped" or _blocked_ categoricals (see [tutorial](tutorials/advanced/Categorical_Classes.ipynb))
 * Support for structure learning (see [tutorial](tutorials/advanced/Structure_Learning.ipynb), ICML 2017 paper)
 * Support for labeled data in generative model
 * Refactor of TensorFlow bindings; fixes grid search and model saving / reloading issues (see `snorkel/learning`)
 * New, simplified Intro tutorial ([here](tutorials/intro))
-* Refactored parser class and support for [spaCy](https://spacy.io/) as new default parser
+* Refactored parser class and support for [spaCy](https://spacy.io/) as new parser
 * Support for easy use of the [BRAT annotation tool](http://brat.nlplab.org/) (see [tutorial](tutorials/advanced/BRAT_Annotations.ipynb))
 * Initial Spark integration, for scale out of LF application (see [tutorial](tutorials/snark/Snark%20Tutorial.ipynb))
 * Tutorial on using crowdsourced data [here](tutorials/crowdsourcing/Crowdsourced_Sentiment_Analysis.ipynb)
 * Integration with [Apache Tika](http://tika.apache.org/) via the [Tika Python](http://github.com/chrismattmann/tika-python.git) binding.
 * And many more fixes, additions, and new material!
 
+</details>
+
 ## Installation
-Snorkel uses Python 2.7 or Python 3 and requires [a few python packages](python-package-requirement.txt) which can be installed using [`conda`](https://www.continuum.io/downloads) and `pip`.
 
-### Setting Up Conda
-Installation is easiest if you download and install [`conda`](https://www.continuum.io/downloads).
-You can create a new conda environment with e.g.:
-```
-conda create -n py2Env python=2.7 anaconda
-```
-And then run the correct environment:
-```
-source activate py2Env
-```
+Starting with version 0.7.0, Snorkel should be installed as a Python package using `pip`.
+However, installing Snorkel via `pip` will not install dependencies, which are required for Snorkel to run.
+To manage its dependencies, Snorkel uses [conda](https://conda.io/), which allows specifying an environment via an `environment.yml` file.
 
-### Installing dependencies
-First install [NUMBA](https://numba.pydata.org/), a package for high-performance numeric computing in Python via Conda:
-```bash
-conda install numba
-```
+This documentation covers two common cases (usage and development) for setting up conda environments for Snorkel.
+In both cases, the environment can be activated using `conda activate snorkel` and deactivated using `conda deactivate`
+(for versions of conda prior to 4.4, replace `conda` with `source` in these commands).
+Users just looking to try out a Snorkel tutorial notebook should see the quick-start instructions above.
 
-Then install the remaining package requirements:
-```bash
-pip install --requirement python-package-requirement.txt
-```
+### Using Snorkel as a Package
+
+This setup is intended for users who would like to use Snorkel in their own applications by importing the package.
+In such cases, users should define a custom `environment.yml` to manage their project's dependencies.
+We recommend starting with the [`environment.yml`](environment.yml) in this repository.
+The below modifications can help customize it for your needs:
+
+<details>
+
+1. Specifying versions for the listed packages, such as changing `python` to `python=3.6.5`.
+Versioned specification of your environment is critical to reproducibility and ensuring dependency updates do not break your pipeline.
+When first setting your package versions, you likely want to start with the latest versions available on the [conda-forge](https://anaconda.org/conda-forge/) channel, unless you have a reason to do otherwise.
+2. Adding other packages to your environment as required by your use case.
+Consider maintaining alphabetical sorting of packages in `environment.yml` to assist with maintainability.
+In addition, we recommend installing packages via pip, only if they are not available in the conda-forge channel.
+3. Add the `snorkel` package installation to your `environment.yml`, under the `- pip` section.
+Of course, we suggest versioning snorkel, which you can do via a release number or commit hash (to access more bleeding edge functionality)
+  ```yml
+    # Versioned via release tag
+    - git+https://github.com/HazyResearch/[email protected]
+    # Versioned via commit hash (commit hash below is fake to ensure you change it)
+    - git+https://github.com/HazyResearch/snorkel@7eb7076f70078c06bef9752f22acf92fd86e616a
+  ```
+Finally, consider versioning the `numbskull` and `treedlib` pip dependencies by changing `master` to their latest commit hash on GitHub.
+
+</details>
+
+### Development Environment
+
+This setup is intended for users who have cloned this repository and would like to access the environment for development.
+This approach installs the `snorkel` package in development mode, meaning that changes you make to the source code will automatically be applied to the `snorkel` package in the environment.
+
+```sh
+# From the root direcectory of this repo run the following command.
+conda env create --file=environment.yml
+
+# Activate the conda environment (if using a version of conda below 4.4, use "source" instead of "conda")
+conda activate snorkel
 
-Finally, enable `ipywidgets`:
-```bash
-jupyter nbextension enable --py widgetsnbextension --sys-prefix
+# Install snorkel in development mode
+pip install --editable .
 ```
 
-_Note: If you are using conda and experience issues with `lxml`, try running `conda install libxml2`._
+### Additional installation notes
 
-_Note: Currently the `Viewer` is supported on the following versions:_
-* `jupyter`: 4.1
-* `jupyter notebook`: 4.2
+<details>
 
-In some tutorials, etc. we also use [Stanford CoreNLP](http://stanfordnlp.github.io/CoreNLP/) for pre-processing text; you will be prompted to install this when you run `run.sh`.
+Snorkel can be installed directly from its GitHub repository via:
 
-## Running
-After installing, just run:
 ```
-./run.sh
+# WARNING: read installation section before running this command! This command
+# does not install any dependencies. It installs the latest master version but
+# you can change master to tag or commit
+pip install git+https://github.com/HazyResearch/snorkel@master
 ```
 
+_Note: Currently the `Viewer` is supported on the following versions:_
+* `jupyter`: 4.1
+* `jupyter notebook`: 4.2
+
+</details>
+
 ## Q & A
 **Many questions about Snorkel get answered in the issues section--along with general discussions and conversations of interest.
 We tag these all as "Q&A" and save them [here](https://github.com/HazyResearch/snorkel/issues?utf8=%E2%9C%93&q=is%3Aissue+label%3A%22Q%26A%22+)**
@@ -130,6 +198,8 @@ If submitting an issue about a bug, however, **please provide a pointer to a not
 
 Snorkel is built specifically with usage in **Jupyter/IPython notebooks** in mind; an incomplete set of best practices for the notebooks:
 
+<details>
+
 It's usually most convenient to write most code in an external `.py` file, and load as a module that's automatically reloaded; use:
 ```python
 %load_ext autoreload
@@ -140,3 +210,5 @@ A more convenient option is to add these lines to your IPython config file, in `
 c.InteractiveShellApp.extensions = ['autoreload']     
 c.InteractiveShellApp.exec_lines = ['%autoreload 2']
 ```
+
+</details>
@@ -11,4 +11,4 @@ make html
 
 **Note: Most problems are caused by dependence on libraries that readthedocs can't
 load (ones that rely on C libs) like `numpy` or `scipy`; just add these (and all
-submodules loaded) to the `MOCK_MODULES` array in `conf.py`.**
+submodules loaded) to the `MOCK_MODULES` array in `conf.py`.**