Difference between revisions of "Team:EPF Lausanne/Software"

Line 932: Line 932:
  
 
</html>
 
</html>
{{:Team:EPF_Lausanne/Test/footer}}
+
{{:Team:EPF_Lausanne/Footer}}

Revision as of 23:37, 16 September 2015

EPFL 2015 iGEM bioLogic Logic Orthogonal gRNA Implemented Circuits EPFL 2015 iGEM bioLogic Logic Orthogonal gRNA Implemented Circuits

Software

The following table resume the programs written by members of our team. All the software is released under the GNU General Public License (GPLv3). This means that the software is free, open source and can be modified and redistributed under the terms of the license.

Original source code is downloadable directly from the following table.

Name Description
code2html Script that automatically generates HTML and CSS code from source files in Python, C++ or BASH. Download
ODE Solver Class solving a system of non-linear ODEs given the initial condition. Download
ODE Fit Class fitting the parameter of a system of ODEs to experimental data. Download
Human Blaster Script blasting gRNAs versus the human genome. Download


code2html

The following Python script allows to generate HTML (and CSS) code from source files in C++ and Python languages. It is based on Pygment, a Python syntax highlighter. All code in our Wiki is formatted using this script.

This script accepts two command line arguments: the first argument is the name of the file to convert, the second one (optional) is to ask for separate HTML and CSS files.

The style is hard coded, but it can be changed easily by modifying the style string. Pygment documentation lists available themes and explains how to create new ones.


"""
Copyright (C) 2015 iGEM Team EPF_Lausanne

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>



Command:

######################################

    python code2html INPUTFILE [CSS]

######################################

INPUTFILE: name (with path) of the file to convert to html
CSS: write "true" (ot "t", "yes", "y") in order to obtain separate .html and .css files ("false" by default)
"""

from pygments import highlight
from pygments.lexers import PythonLexer
from pygments.lexers import CppLexer
from pygments.lexers import BashLexer
from pygments.formatters import HtmlFormatter

# Code formatting style
style = "monokai"

# C++ extensions
cpp = ["cpp","cxx","cc","h"]

# Python extensions
py = ["py"]

# Bash extensions
bash = ["sh","bash"]

def load_file_as_sting(fname):
    """
    Open the file FNAME and save all its content in an unformatted string
    """

    content = ""

    with open(fname,'r') as f: # Open the file (read only)
        content = f.read() # Read file and store it in an unformatted string
        # The file is automatically closed

    return content

def save_string_as_file(fname,string):
    """
    Save the unformatted string STRING into the file FNAME
    """

    with open(fname,'w') as f: # Open the file (write only)
        f.write(string)
        # The file is automatically closed

def lexer_formatter(language,css=False):
    """
    Return the lexer for the appropriate language and the HTML formatter
    """

    L = None

    if language in py:
        # Python Lexer
        L = PythonLexer()

    elif language in cpp:
        # C++ Lexer
        L = CppLexer()

    elif language in bash:
        # Bash Lexer
        L = BashLexer()

    else:
        raise NameError("Invalid language.")

    HF = HtmlFormatter(full=not css,style=style)

    return L, HF


def code_to_htmlcss(code,language):
    """
    Transform CODE into html and css (separate files)
    """

    # Obtain lexer and HtmlFormatter
    L, HF = lexer_formatter(language,css=True)

    # Create html code
    html = highlight(code,L,HF)

    # Create css code
    css = HF.get_style_defs('.highlight')

    return html,css

def code_to_html(code,language):
    """
    Transform CODE into html and css (all in the same file)
    """

    # Obtain lexer and HtmlFormatter
    L, HF = lexer_formatter(language)

    # Create fill html code
    html = highlight(code,L,HF)

    return html

import sys

if __name__ == "__main__":
    """
    Command:

    ######################################

        python code2html INPUTFILE [CSS]

    ######################################

    INPUTFILE: name (with path) of the file to convert to html
    CSS: write "true" (ot "t", "yes", "y") in order to obtain separate .html and .css files ("false" by default)
    """

    # Command line arguments
    args = sys.argv

    # Check command line arguments
    ncla = len(args) # number of command line arguments

    if ncla != 2 and ncla != 3 :
        raise TypeError("Invalid number of command line arguments.")

    css_bool = False

    if ncla == 3 and args[-1].lower() in ["true",'t',"yes",'y']:
        css_bool = True # Export css separately

    # Input file
    fname_code = sys.argv[1] # Name of the file containing the code to convert in html

    # Input file extension
    language = fname_code.split('.')[-1]

    # Output files
    fname_html = fname_code.split('.')[0] + ".html" # Name of the file where the html code will be stored
    fname_css = fname_code.split('.')[0] + ".css" # Name of the file where the css code will be stored

    # Save code into a unformatted string
    code = load_file_as_sting(fname_code)

    if css_bool == False: # Convert to standalone html
        html = code_to_html(code,language)
    else: # Convert to html and css separately
        html,css = code_to_htmlcss(code,language)

    # Save html
    save_string_as_file(fname_html,html)

    if css_bool == True:
        # Save css
        save_string_as_file(fname_css,css)

ODE Solver

Our kinetic model leads to a system of coupled fist-order, nonlinear, ordinary differential equations (ODEs). In order to solve this system we used an explicit Runge-Kutta method of order 4 (5) with adaptative step size control and dense output due to Dormand and Prince, implemented by E. Hairer and G. Wanner [1] in the SciPy Python library. To facilitate the use of this integrator, we created an utility class which is suited for our needs.

The Solver class needs the function defining the system of ODEs we want to solve, an initial condition and the interval on which we want to integrate. Note that the time step \(\Delta t\) (which is also an argument of the constructor of the Solver class) is not the discretization step, because our algorithm is adaptative: \(\Delta t\) is the maximal allowed step and define the points where the solution of the ODEs system will be computed.


"""
Copyright (C) 2015 iGEM Team EPF_Lausanne

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>
"""

import numpy as np
from scipy.integrate import *

class Solver:
    """
    Class that allows the solution of a system of non-linear ODEs. The system is specified by the function fun

        dy/dt = fun(t,y)

    where t is a number and y and dy/dt are numpy arrays or lists.

    The solution is performed with the dopri5 method, an explicit Runge-Kutta method of order (4)5.
    The method is due to Dormand & Prince, and is implemented by E. Hairer and G. Wanner.

    See
        http://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.integrate.ode.html
    for more details.

    NOTE:
    Our Solver can take a function of the form

        f(t,y,pars)

    where PARS are parameters. PARS can be eventually passed to the constructor of the Solver.
    """

    def __init__(self,dt,fun,t0,T,y0,pars=[]):
        self.dt = dt # Time step
        self.fun = fun # Function representing the ODE
        self.t0 = t0 # Initial time
        self.T = T # Final time
        self.y0 = y0 # Initial condition

        self.pars = pars # Parameters of the system of ODEs

    def solve(self):
        """
        Solve the system of ODEs

            dy/dt = fun(t,y)

        on the interval [t0,T], with the initial condition y(0)=y0.

        Returns two lists, time and solution, containing time points and the solution at these time points.
        """

        # Choose integrator type
        r = ode(self.fun).set_integrator('dopri5')

        # Initialize the integrator
        r.set_initial_value(self.y0, self.t0)

        # Set parameters for the ODE function
        r.set_f_params(*self.pars)

        # Initialize solution list and time points list
        solution = np.asarray(self.y0)
        time = np.asarray(self.t0)

        while r.successful() and r.t < self.T:
            r.integrate(r.t + self.dt) # Perform one integration step, i.e. obtain the solution y at time t+dt

            time = np.append(time,r.t) # Append the new time
            solution = np.vstack((solution,r.y)) # Append the new solution

        return time, solution # Return time and solution vectors

    def solve_for_t(self,t):
        """
        Solve the system of ODEs

            dy/dt = fun(t,y)

        on the interval [t0,T], with the initial condition y(0)=y0.

        Returns two lists, solution and time, containing time points and the solution at these time points.

        The solution is computed at the points specified in t, i.e. the time step dt is ignored.
        """

        # Choose integrator type: dopri5 in this case
        r = ode(self.fun).set_integrator('dopri5')

        # Initialize the integrator
        r.set_initial_value(self.y0, self.t0)

        # Set parameters for the ODE function
        r.set_f_params(*self.pars)

        # Initialize solution list and time points list
        solution = []
        time = []

        for tt in t:
            r.integrate(tt) # Perform one integration step

            time.append(tt) # Append the new time
            solution.append(r.y) # Append the new solution

        return np.asarray(time), np.asarray(solution) # Return time and solution vectors


if __name__ == "__main__":
    """
    Our test functions:
       rapid_equilibrium (standard function)
       rapid_equilibrium_from_string() (returns a function compiled from a string)
    """

    import matplotlib.pylab as plt
    from test import * # Import test functions for the ODE integrator

    dt = 0.1

    t0 = 0
    T = 100
    y0 = [1,0,0]

    # Store the funtion compiled from a string
    rapid_equilibrium_s = rapid_equilibrium_from_string()

    mysolver = Solver(dt,rapid_equilibrium,t0,T,y0)
    mysolver_string = Solver(dt,rapid_equilibrium_s,t0,T,y0)

    t,y = mysolver.solve()
    tt,yt = mysolver.solve_for_t(np.linspace(t0,T,10))
    ts,ys = mysolver_string.solve()

    plt.plot(t,y)
    plt.plot(tt,yt,'x')
    plt.plot(ts,ys)
    plt.show()

References

[1] E. Hairer et al., Solving Ordinary Differential Equations, 2nd edition, Springer-Verlag, 1993.

ODE Fit

The system of fist-order, nonlinear, ordinary differential equations (ODEs) which represents the kinetic model of ours system contains a vast variety of parameters. The majority of them has been found on the literature, while the other has simply been estimated. Some parameters, however, are so specific to our system that need to be fitted to experimental data.

Fitting parameters of a system of ODEs to experimental curves is a non-trivial task: the system of ODEs need to be solved for each value of the parameters during the optimization. In order to solve the system of ODEs for a fixed set of parameters we used our ODE Solver. Parameter optimization is performed by minimizing the square difference between experimental points and the numerical solution.

Our ODEFit class needs the function defining the system of ODEs we want to optimize, experimental data and initial guess for the initial conditions and for the parameters. Note that in order to have a general code, initial conditions are also free parameters since experimental data does not allow for their exact determination.


"""
Copyright (C) 2015 iGEM Team EPF_Lausanne

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>
"""

import numpy as np
import warnings as w
from integrator import Solver
from scipy.optimize import minimize

class ODEFit:
    """
    Class that allows the fit of the solution (experimental data) of a system of ODEs

        dy/dt = fun(t,y,pars)

    where PARS are the parameters to fit.

    This class relay on the ODE Solver class in order to solve the system of ODEs.
    """

    def __init__(self,fun,t,y,y0,pars0):
        """
        The constructor need the function FUN defining the system of ODEs.

        t and y are experimental data (y can contain a list with the data of different species).

        y0 is the initial guess for the initial conditions.
        pars0 is the initial guess for the parameters.


        NOTE:
        The initial condition y0 is free, i.e. it is a parameter as well.
        """

        # Function defining the system of ODEs
        self.fun = fun

        # Initial guess for parameters and initial conditions
        self.pars0 =  pars0 + y0 # Merge all parameters to optimize in a single list

        # Data to fit (convert to Numpy arrays)
        self.t = np.asarray(t)
        self.y = np.asarray(y)

        # Number of ODEs composing the system
        self.n = self.y.shape[-1]

        # Fitted parameters (initialized to initial guess)
        self.pars = self.pars0

        # Index of separation between real parameters and (free) initial conditions
        self.index = len(self.pars)-self.n

        # Keep trak of FIT routine call (if True, self.pars contains the optimized parameters)
        self.fitted = False

    def _solve_for_pars(self,pars):
        """
        Solve the system of ODEs for a given set of parameters.

        Solution is computed at experimental data points.
        """

        # pars[:self.index] : fun parameters
        # pars[self.index:] : inital conditions (last entries)
        mysolver = Solver(0.1,self.fun,self.t[0],self.t[-1],pars[self.index:],pars[:self.index])

        # Solve the system of ODEs at experimental data points
        t,y = mysolver.solve_for_t(self.t)

        return np.asarray(t),np.asarray(y)

    def _resid(self,y_model,y_data):
        """
        Compute the square of the difference between the ODE solution and the data to fit.
        """
        return ((y_model-y_data)**2).sum()

    def _score(self,pars):
        """
        The score is defined as the sum of residuals for each ODE composing the system.

        The score function is minimized in order to optimize the parameters (least squares of residuals).
        """

        # Solve the system of ODEs for a given set of parameters
        t,y_model = self._solve_for_pars(pars)

        score = 0

        # Compute the score of the current set of parameters
        for i in range(self.n):
            score += self._resid(self.y[:,i],y_model[:,i]) # The score is the sum of residuals for each ODE

        return score

    def fit(self):
        """
        Fit data in order to obtain the best parameter estimation.

        The best parameter set is obtained by minimizing the score function, i.e. the sum of residuals.
        """

        opt = minimize(self._score,self.pars0)

        # Extract the optimized parameters
        self.pars = opt.x

        # The fit function has been called
        self.fitted = True

        if opt.success == True: # Check the success of the minimization process
            return opt.x[:self.index], opt.x[self.index:]
        else:
            raise RuntimeError(opt.message)

    def solve_fitted(self,dt=0.1):
        """
        Solve the system of ODEs with the optimal parameters with a dense output.

        dt is the discretization step, which can be changed manually.
        """

        if self.fitted == False: # Warning: the FIT function has never been called
            w.warn("Warning: model not fitted, parameters are the initial ones.")

        # Solve for the optimal set of parameters
        mysolver = Solver(dt,self.fun,self.t[0],self.t[-1],self.pars[self.index:],self.pars[:self.index])

        # Solve for a dense grid, not for data points
        return mysolver.solve()

if __name__ == "__main__":
    from integrator import *
    from test import *
    import numpy as np
    from numpy.random import randn
    import matplotlib.pylab as plt

    def randomize(lst,scale=100.):
        """
        Randomize the data contained in lst (displacement along y axis)
        """
        for i in range(len(lst)):
            lst[i] += randn() / scale

        return lst

    def random_data(dt,fun,t0,T,y0,sel=10):
        """
        Create random data for the system of ODEs
        """

        mysolver = Solver(dt,fun,t0,T,y0)

        # Solve the system of ODEs
        t,y = mysolver.solve()

        t_stripped = []
        y_stripped = []

        # Select one out of SEL points from the solution
        for i in range(len(y)):
            if i % sel == 0:
                t_stripped.append(t[i])
                y_stripped.append(y[i])

        y_random = []

        # Randomize the stripped set of data points (mimiks experimental noise)
        for i in y_stripped:
            y_random.append(randomize(i))

        return t_stripped, y_random

    # Test function representing the system of ODEs
    test_function = rapid_equilibrium

    dt = 0.1

    t0 = 0
    T = 100
    y0 = [1,0,0]

    # Creates noisy data
    t,y = random_data(dt,test_function,t0,T,y0)

    y = np.asarray(y)

    # Initial guess for the parameters
    pars0 = [1,0.01]

    # Crate a ODEFit object: data points are the noisy solution
    myfit = ODEFit(test_function,t,y,y0,pars0)

    # Fit noisy data points
    pars = myfit.fit()[0] # Extracts only the real parameters and not initial conditions

    # Print parameters
    print(pars)

    # Solve the system of ODEs using optimal parameters
    tt,yy = myfit.solve_fitted()

    plt.plot(t,y,'x') # Plot noisy data
    plt.plot(tt,yy) # Plot fiting function
    plt.show()

Human Blaster

The Human blaster allows the user to check if a gRNA complementary sequence is present in the human genome.

This program can help iGEM teams that seek to implement the use of gRNAs in their project.


"""
Copyright (C) 2015 iGEM Team EPF_Lausanne

This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.

This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
GNU General Public License for more details.

You should have received a copy of the GNU General Public License
along with this program.  If not, see <http://www.gnu.org/licenses/>



HumanBlaster.py -- blasts gRNAs against human genome

Requirements :
    - Install selenium on your computer:
        http://selenium-python.readthedocs.org/en/latest/installation.html
    - Save the gRNAs in a file named gRNAs like this : name:sequence
"""

from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import os

def skip(inp, nb):
    """
    Skip NB lines on INP file.
    """
    for _ in range(nb):
        inp.readline()

def seqReader(inp, initSkip):
    """
    Analyse the blast sequence.

    This function will display the percentage of similarity with a sequence in
    the human genome.

    You can change the sensibility of the script by changing the conditions
        cond = (int(k[6]) == 1 and int(k[7]) >= 23)
        cond = (int(k[7]) == 23 and int(k[6]) <= 1)
    """

    skip(inp, initSkip) # Skip initial lines

    line = inp.readline()

    name = str(line.split(":")[1][1:-1])

    skip(inp, 4) # Skip four lines

    l = inp.readline()

    war = 0
    while l != "" and l[0] != "#":
        k = l.split("\t")
        if len(k) < 7:
            return ""
        if (name[:2] == "CC"):
            cond = (int(k[6]) == 1 and int(k[7]) >= 23)
        else:
            cond = (int(k[7]) == 23 and int(k[6]) <= 1)

        if cond:
            print (name + ": " + k[1].split("|")[3] + " with " + k[2] + "% similarity, " + k[4] + " mismatches.")

        l = inp.readline()

    return l

with open("gRNAs", 'r') as input1, open("FASTA_gRNAs", 'w') as output:
    text = ""
    for i in input1:
        name = i.split(":")[0]
        seq = i.split(":")[1][:-1]
        text += ">" + name + "_TGG" + "\n" + seq + "TGG\n"
        text += ">" + name + "_GGG" + "\n" + seq + "GGG\n"
        text += ">" + name + "_AGG" + "\n" + seq + "AGG\n"
        text += ">" + name + "_CGG" + "\n" + seq + "CGG\n"
        output.write(">" + name + "_TGG" + "\n" + seq + "TGG\n")
        output.write(">" + name + "_GGG" + "\n" + seq + "GGG\n")
        output.write(">" + name + "_AGG" + "\n" + seq + "AGG\n")
        output.write(">" + name + "_CGG" + "\n" + seq + "CGG\n")

        text += ">" + "CCG_" + name + "\n" + "CCG" + seq + "\n"
        text += ">" + "CCC_" + name + "\n" + "CCC" + seq + "\n"
        text += ">" + "CCA_" + name + "\n" + "CCA" + seq + "\n"
        text += ">" + "CCT_" + name + "\n" + "CCT" + seq + "\n"
        output.write(">" + "CCG_" + name + "\n" + "CCG" + seq + "\n")
        output.write(">" + "CCC_" + name + "\n" + "CCC" + seq + "\n")
        output.write(">" + "CCA_" + name + "\n" + "CCA" + seq + "\n")
        output.write(">" + "CCT_" + name + "\n" + "CCT" + seq + "\n")



    driver = webdriver.Firefox() # Will open Firefox
    driver.get("http://blast.ncbi.nlm.nih.gov/Blast.cgi?PROGRAM=blastn&PAGE_TYPE=BlastSearch&LINK_LOC=blasthome") #Address
    assert "Nucleotide BLAST: Search nucleotide databases using a nucleotide query" in driver.title
    elem = driver.find_element_by_id("seq") #Selects the place where the sequences will be written
    elem.send_keys(text) #Writes them
    elem = driver.find_element_by_id("qtitle")
    elem.send_keys("Our Blast") #Change here if you want to have another name for the BLAST
    driver.find_element_by_id("Rhc").click() #Chooses human genome
    driver.find_element_by_id("b1").click() #Starts the BLAST
    #Will wait 500000 seconds or until the download page is ready
    try:
        element = WebDriverWait(driver, 500000).until(EC.presence_of_element_located((By.XPATH, "/html/body/div[1]/div[2]/div[3]/div[2]/form/table/tbody/tr[2]/td[1]/div[2]/a[5]")))
    finally:
        driver.close() #Will leave if it takes too long (13 hours max, should be enough)
    download = driver.find_element_by_id("showDownload").click() #Will start the download
    download = driver.find_element_by_id("hitText").click()

print("You need to save the file in this folder using the name resultat.txt")
continuation = input('When you are done press 1\n')

if continuation:
    with open("resultat.txt", 'r') as inp:
        l = seqReader(inp, 2)
        while (l != "" and l[0] == "#"):
            l = seqReader(inp, 1)
EPFL 2015 iGEM bioLogic Logic Orthogonal gRNA Implemented Circuits