Difference between revisions of "Team:EPF Lausanne/Software"
Line 932: | Line 932: | ||
</html> | </html> | ||
− | {{:Team:EPF_Lausanne/ | + | {{:Team:EPF_Lausanne/Footer}} |
Revision as of 23:37, 16 September 2015
The following table resume the programs written by members of our team. All the software is released under the GNU General Public License (GPLv3). This means that the software is free, open source and can be modified and redistributed under the terms of the license.
Original source code is downloadable directly from the following table.
Name | Description | |
---|---|---|
code2html | Script that automatically generates HTML and CSS code from source files in Python, C++ or BASH. | Download |
ODE Solver | Class solving a system of non-linear ODEs given the initial condition. | Download |
ODE Fit | Class fitting the parameter of a system of ODEs to experimental data. | Download |
Human Blaster | Script blasting gRNAs versus the human genome. | Download |
code2html
The following Python script allows to generate HTML (and CSS) code from source files in C++ and Python languages. It is based on Pygment, a Python syntax highlighter. All code in our Wiki is formatted using this script.
This script accepts two command line arguments: the first argument is the name of the file to convert, the second one (optional) is to ask for separate HTML and CSS files.
The style is hard coded, but it can be changed easily by modifying the style string. Pygment documentation lists available themes and explains how to create new ones.
"""
Copyright (C) 2015 iGEM Team EPF_Lausanne
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>
Command:
######################################
python code2html INPUTFILE [CSS]
######################################
INPUTFILE: name (with path) of the file to convert to html
CSS: write "true" (ot "t", "yes", "y") in order to obtain separate .html and .css files ("false" by default)
"""
from pygments import highlight
from pygments.lexers import PythonLexer
from pygments.lexers import CppLexer
from pygments.lexers import BashLexer
from pygments.formatters import HtmlFormatter
# Code formatting style
style = "monokai"
# C++ extensions
cpp = ["cpp","cxx","cc","h"]
# Python extensions
py = ["py"]
# Bash extensions
bash = ["sh","bash"]
def load_file_as_sting(fname):
"""
Open the file FNAME and save all its content in an unformatted string
"""
content = ""
with open(fname,'r') as f: # Open the file (read only)
content = f.read() # Read file and store it in an unformatted string
# The file is automatically closed
return content
def save_string_as_file(fname,string):
"""
Save the unformatted string STRING into the file FNAME
"""
with open(fname,'w') as f: # Open the file (write only)
f.write(string)
# The file is automatically closed
def lexer_formatter(language,css=False):
"""
Return the lexer for the appropriate language and the HTML formatter
"""
L = None
if language in py:
# Python Lexer
L = PythonLexer()
elif language in cpp:
# C++ Lexer
L = CppLexer()
elif language in bash:
# Bash Lexer
L = BashLexer()
else:
raise NameError("Invalid language.")
HF = HtmlFormatter(full=not css,style=style)
return L, HF
def code_to_htmlcss(code,language):
"""
Transform CODE into html and css (separate files)
"""
# Obtain lexer and HtmlFormatter
L, HF = lexer_formatter(language,css=True)
# Create html code
html = highlight(code,L,HF)
# Create css code
css = HF.get_style_defs('.highlight')
return html,css
def code_to_html(code,language):
"""
Transform CODE into html and css (all in the same file)
"""
# Obtain lexer and HtmlFormatter
L, HF = lexer_formatter(language)
# Create fill html code
html = highlight(code,L,HF)
return html
import sys
if __name__ == "__main__":
"""
Command:
######################################
python code2html INPUTFILE [CSS]
######################################
INPUTFILE: name (with path) of the file to convert to html
CSS: write "true" (ot "t", "yes", "y") in order to obtain separate .html and .css files ("false" by default)
"""
# Command line arguments
args = sys.argv
# Check command line arguments
ncla = len(args) # number of command line arguments
if ncla != 2 and ncla != 3 :
raise TypeError("Invalid number of command line arguments.")
css_bool = False
if ncla == 3 and args[-1].lower() in ["true",'t',"yes",'y']:
css_bool = True # Export css separately
# Input file
fname_code = sys.argv[1] # Name of the file containing the code to convert in html
# Input file extension
language = fname_code.split('.')[-1]
# Output files
fname_html = fname_code.split('.')[0] + ".html" # Name of the file where the html code will be stored
fname_css = fname_code.split('.')[0] + ".css" # Name of the file where the css code will be stored
# Save code into a unformatted string
code = load_file_as_sting(fname_code)
if css_bool == False: # Convert to standalone html
html = code_to_html(code,language)
else: # Convert to html and css separately
html,css = code_to_htmlcss(code,language)
# Save html
save_string_as_file(fname_html,html)
if css_bool == True:
# Save css
save_string_as_file(fname_css,css)
ODE Solver
Our kinetic model leads to a system of coupled fist-order, nonlinear, ordinary differential equations (ODEs). In order to solve this system we used an explicit Runge-Kutta method of order 4 (5) with adaptative step size control and dense output due to Dormand and Prince, implemented by E. Hairer and G. Wanner [1] in the SciPy Python library. To facilitate the use of this integrator, we created an utility class which is suited for our needs.
The Solver class needs the function defining the system of ODEs we want to solve, an initial condition and the interval on which we want to integrate. Note that the time step \(\Delta t\) (which is also an argument of the constructor of the Solver class) is not the discretization step, because our algorithm is adaptative: \(\Delta t\) is the maximal allowed step and define the points where the solution of the ODEs system will be computed.
"""
Copyright (C) 2015 iGEM Team EPF_Lausanne
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>
"""
import numpy as np
from scipy.integrate import *
class Solver:
"""
Class that allows the solution of a system of non-linear ODEs. The system is specified by the function fun
dy/dt = fun(t,y)
where t is a number and y and dy/dt are numpy arrays or lists.
The solution is performed with the dopri5 method, an explicit Runge-Kutta method of order (4)5.
The method is due to Dormand & Prince, and is implemented by E. Hairer and G. Wanner.
See
http://docs.scipy.org/doc/scipy-0.14.0/reference/generated/scipy.integrate.ode.html
for more details.
NOTE:
Our Solver can take a function of the form
f(t,y,pars)
where PARS are parameters. PARS can be eventually passed to the constructor of the Solver.
"""
def __init__(self,dt,fun,t0,T,y0,pars=[]):
self.dt = dt # Time step
self.fun = fun # Function representing the ODE
self.t0 = t0 # Initial time
self.T = T # Final time
self.y0 = y0 # Initial condition
self.pars = pars # Parameters of the system of ODEs
def solve(self):
"""
Solve the system of ODEs
dy/dt = fun(t,y)
on the interval [t0,T], with the initial condition y(0)=y0.
Returns two lists, time and solution, containing time points and the solution at these time points.
"""
# Choose integrator type
r = ode(self.fun).set_integrator('dopri5')
# Initialize the integrator
r.set_initial_value(self.y0, self.t0)
# Set parameters for the ODE function
r.set_f_params(*self.pars)
# Initialize solution list and time points list
solution = np.asarray(self.y0)
time = np.asarray(self.t0)
while r.successful() and r.t < self.T:
r.integrate(r.t + self.dt) # Perform one integration step, i.e. obtain the solution y at time t+dt
time = np.append(time,r.t) # Append the new time
solution = np.vstack((solution,r.y)) # Append the new solution
return time, solution # Return time and solution vectors
def solve_for_t(self,t):
"""
Solve the system of ODEs
dy/dt = fun(t,y)
on the interval [t0,T], with the initial condition y(0)=y0.
Returns two lists, solution and time, containing time points and the solution at these time points.
The solution is computed at the points specified in t, i.e. the time step dt is ignored.
"""
# Choose integrator type: dopri5 in this case
r = ode(self.fun).set_integrator('dopri5')
# Initialize the integrator
r.set_initial_value(self.y0, self.t0)
# Set parameters for the ODE function
r.set_f_params(*self.pars)
# Initialize solution list and time points list
solution = []
time = []
for tt in t:
r.integrate(tt) # Perform one integration step
time.append(tt) # Append the new time
solution.append(r.y) # Append the new solution
return np.asarray(time), np.asarray(solution) # Return time and solution vectors
if __name__ == "__main__":
"""
Our test functions:
rapid_equilibrium (standard function)
rapid_equilibrium_from_string() (returns a function compiled from a string)
"""
import matplotlib.pylab as plt
from test import * # Import test functions for the ODE integrator
dt = 0.1
t0 = 0
T = 100
y0 = [1,0,0]
# Store the funtion compiled from a string
rapid_equilibrium_s = rapid_equilibrium_from_string()
mysolver = Solver(dt,rapid_equilibrium,t0,T,y0)
mysolver_string = Solver(dt,rapid_equilibrium_s,t0,T,y0)
t,y = mysolver.solve()
tt,yt = mysolver.solve_for_t(np.linspace(t0,T,10))
ts,ys = mysolver_string.solve()
plt.plot(t,y)
plt.plot(tt,yt,'x')
plt.plot(ts,ys)
plt.show()
References
[1] E. Hairer et al., Solving Ordinary Differential Equations, 2nd edition, Springer-Verlag, 1993.ODE Fit
The system of fist-order, nonlinear, ordinary differential equations (ODEs) which represents the kinetic model of ours system contains a vast variety of parameters. The majority of them has been found on the literature, while the other has simply been estimated. Some parameters, however, are so specific to our system that need to be fitted to experimental data.
Fitting parameters of a system of ODEs to experimental curves is a non-trivial task: the system of ODEs need to be solved for each value of the parameters during the optimization. In order to solve the system of ODEs for a fixed set of parameters we used our ODE Solver. Parameter optimization is performed by minimizing the square difference between experimental points and the numerical solution.
Our ODEFit class needs the function defining the system of ODEs we want to optimize, experimental data and initial guess for the initial conditions and for the parameters. Note that in order to have a general code, initial conditions are also free parameters since experimental data does not allow for their exact determination.
"""
Copyright (C) 2015 iGEM Team EPF_Lausanne
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>
"""
import numpy as np
import warnings as w
from integrator import Solver
from scipy.optimize import minimize
class ODEFit:
"""
Class that allows the fit of the solution (experimental data) of a system of ODEs
dy/dt = fun(t,y,pars)
where PARS are the parameters to fit.
This class relay on the ODE Solver class in order to solve the system of ODEs.
"""
def __init__(self,fun,t,y,y0,pars0):
"""
The constructor need the function FUN defining the system of ODEs.
t and y are experimental data (y can contain a list with the data of different species).
y0 is the initial guess for the initial conditions.
pars0 is the initial guess for the parameters.
NOTE:
The initial condition y0 is free, i.e. it is a parameter as well.
"""
# Function defining the system of ODEs
self.fun = fun
# Initial guess for parameters and initial conditions
self.pars0 = pars0 + y0 # Merge all parameters to optimize in a single list
# Data to fit (convert to Numpy arrays)
self.t = np.asarray(t)
self.y = np.asarray(y)
# Number of ODEs composing the system
self.n = self.y.shape[-1]
# Fitted parameters (initialized to initial guess)
self.pars = self.pars0
# Index of separation between real parameters and (free) initial conditions
self.index = len(self.pars)-self.n
# Keep trak of FIT routine call (if True, self.pars contains the optimized parameters)
self.fitted = False
def _solve_for_pars(self,pars):
"""
Solve the system of ODEs for a given set of parameters.
Solution is computed at experimental data points.
"""
# pars[:self.index] : fun parameters
# pars[self.index:] : inital conditions (last entries)
mysolver = Solver(0.1,self.fun,self.t[0],self.t[-1],pars[self.index:],pars[:self.index])
# Solve the system of ODEs at experimental data points
t,y = mysolver.solve_for_t(self.t)
return np.asarray(t),np.asarray(y)
def _resid(self,y_model,y_data):
"""
Compute the square of the difference between the ODE solution and the data to fit.
"""
return ((y_model-y_data)**2).sum()
def _score(self,pars):
"""
The score is defined as the sum of residuals for each ODE composing the system.
The score function is minimized in order to optimize the parameters (least squares of residuals).
"""
# Solve the system of ODEs for a given set of parameters
t,y_model = self._solve_for_pars(pars)
score = 0
# Compute the score of the current set of parameters
for i in range(self.n):
score += self._resid(self.y[:,i],y_model[:,i]) # The score is the sum of residuals for each ODE
return score
def fit(self):
"""
Fit data in order to obtain the best parameter estimation.
The best parameter set is obtained by minimizing the score function, i.e. the sum of residuals.
"""
opt = minimize(self._score,self.pars0)
# Extract the optimized parameters
self.pars = opt.x
# The fit function has been called
self.fitted = True
if opt.success == True: # Check the success of the minimization process
return opt.x[:self.index], opt.x[self.index:]
else:
raise RuntimeError(opt.message)
def solve_fitted(self,dt=0.1):
"""
Solve the system of ODEs with the optimal parameters with a dense output.
dt is the discretization step, which can be changed manually.
"""
if self.fitted == False: # Warning: the FIT function has never been called
w.warn("Warning: model not fitted, parameters are the initial ones.")
# Solve for the optimal set of parameters
mysolver = Solver(dt,self.fun,self.t[0],self.t[-1],self.pars[self.index:],self.pars[:self.index])
# Solve for a dense grid, not for data points
return mysolver.solve()
if __name__ == "__main__":
from integrator import *
from test import *
import numpy as np
from numpy.random import randn
import matplotlib.pylab as plt
def randomize(lst,scale=100.):
"""
Randomize the data contained in lst (displacement along y axis)
"""
for i in range(len(lst)):
lst[i] += randn() / scale
return lst
def random_data(dt,fun,t0,T,y0,sel=10):
"""
Create random data for the system of ODEs
"""
mysolver = Solver(dt,fun,t0,T,y0)
# Solve the system of ODEs
t,y = mysolver.solve()
t_stripped = []
y_stripped = []
# Select one out of SEL points from the solution
for i in range(len(y)):
if i % sel == 0:
t_stripped.append(t[i])
y_stripped.append(y[i])
y_random = []
# Randomize the stripped set of data points (mimiks experimental noise)
for i in y_stripped:
y_random.append(randomize(i))
return t_stripped, y_random
# Test function representing the system of ODEs
test_function = rapid_equilibrium
dt = 0.1
t0 = 0
T = 100
y0 = [1,0,0]
# Creates noisy data
t,y = random_data(dt,test_function,t0,T,y0)
y = np.asarray(y)
# Initial guess for the parameters
pars0 = [1,0.01]
# Crate a ODEFit object: data points are the noisy solution
myfit = ODEFit(test_function,t,y,y0,pars0)
# Fit noisy data points
pars = myfit.fit()[0] # Extracts only the real parameters and not initial conditions
# Print parameters
print(pars)
# Solve the system of ODEs using optimal parameters
tt,yy = myfit.solve_fitted()
plt.plot(t,y,'x') # Plot noisy data
plt.plot(tt,yy) # Plot fiting function
plt.show()
Human Blaster
The Human blaster allows the user to check if a gRNA complementary sequence is present in the human genome.
This program can help iGEM teams that seek to implement the use of gRNAs in their project.
"""
Copyright (C) 2015 iGEM Team EPF_Lausanne
This program is free software: you can redistribute it and/or modify
it under the terms of the GNU General Public License as published by
the Free Software Foundation, either version 3 of the License, or
(at your option) any later version.
This program is distributed in the hope that it will be useful,
but WITHOUT ANY WARRANTY; without even the implied warranty of
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
GNU General Public License for more details.
You should have received a copy of the GNU General Public License
along with this program. If not, see <http://www.gnu.org/licenses/>
HumanBlaster.py -- blasts gRNAs against human genome
Requirements :
- Install selenium on your computer:
http://selenium-python.readthedocs.org/en/latest/installation.html
- Save the gRNAs in a file named gRNAs like this : name:sequence
"""
from selenium import webdriver
from selenium.webdriver.common.keys import Keys
from selenium.webdriver.support.ui import Select
from selenium.webdriver.common.by import By
from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
import os
def skip(inp, nb):
"""
Skip NB lines on INP file.
"""
for _ in range(nb):
inp.readline()
def seqReader(inp, initSkip):
"""
Analyse the blast sequence.
This function will display the percentage of similarity with a sequence in
the human genome.
You can change the sensibility of the script by changing the conditions
cond = (int(k[6]) == 1 and int(k[7]) >= 23)
cond = (int(k[7]) == 23 and int(k[6]) <= 1)
"""
skip(inp, initSkip) # Skip initial lines
line = inp.readline()
name = str(line.split(":")[1][1:-1])
skip(inp, 4) # Skip four lines
l = inp.readline()
war = 0
while l != "" and l[0] != "#":
k = l.split("\t")
if len(k) < 7:
return ""
if (name[:2] == "CC"):
cond = (int(k[6]) == 1 and int(k[7]) >= 23)
else:
cond = (int(k[7]) == 23 and int(k[6]) <= 1)
if cond:
print (name + ": " + k[1].split("|")[3] + " with " + k[2] + "% similarity, " + k[4] + " mismatches.")
l = inp.readline()
return l
with open("gRNAs", 'r') as input1, open("FASTA_gRNAs", 'w') as output:
text = ""
for i in input1:
name = i.split(":")[0]
seq = i.split(":")[1][:-1]
text += ">" + name + "_TGG" + "\n" + seq + "TGG\n"
text += ">" + name + "_GGG" + "\n" + seq + "GGG\n"
text += ">" + name + "_AGG" + "\n" + seq + "AGG\n"
text += ">" + name + "_CGG" + "\n" + seq + "CGG\n"
output.write(">" + name + "_TGG" + "\n" + seq + "TGG\n")
output.write(">" + name + "_GGG" + "\n" + seq + "GGG\n")
output.write(">" + name + "_AGG" + "\n" + seq + "AGG\n")
output.write(">" + name + "_CGG" + "\n" + seq + "CGG\n")
text += ">" + "CCG_" + name + "\n" + "CCG" + seq + "\n"
text += ">" + "CCC_" + name + "\n" + "CCC" + seq + "\n"
text += ">" + "CCA_" + name + "\n" + "CCA" + seq + "\n"
text += ">" + "CCT_" + name + "\n" + "CCT" + seq + "\n"
output.write(">" + "CCG_" + name + "\n" + "CCG" + seq + "\n")
output.write(">" + "CCC_" + name + "\n" + "CCC" + seq + "\n")
output.write(">" + "CCA_" + name + "\n" + "CCA" + seq + "\n")
output.write(">" + "CCT_" + name + "\n" + "CCT" + seq + "\n")
driver = webdriver.Firefox() # Will open Firefox
driver.get("http://blast.ncbi.nlm.nih.gov/Blast.cgi?PROGRAM=blastn&PAGE_TYPE=BlastSearch&LINK_LOC=blasthome") #Address
assert "Nucleotide BLAST: Search nucleotide databases using a nucleotide query" in driver.title
elem = driver.find_element_by_id("seq") #Selects the place where the sequences will be written
elem.send_keys(text) #Writes them
elem = driver.find_element_by_id("qtitle")
elem.send_keys("Our Blast") #Change here if you want to have another name for the BLAST
driver.find_element_by_id("Rhc").click() #Chooses human genome
driver.find_element_by_id("b1").click() #Starts the BLAST
#Will wait 500000 seconds or until the download page is ready
try:
element = WebDriverWait(driver, 500000).until(EC.presence_of_element_located((By.XPATH, "/html/body/div[1]/div[2]/div[3]/div[2]/form/table/tbody/tr[2]/td[1]/div[2]/a[5]")))
finally:
driver.close() #Will leave if it takes too long (13 hours max, should be enough)
download = driver.find_element_by_id("showDownload").click() #Will start the download
download = driver.find_element_by_id("hitText").click()
print("You need to save the file in this folder using the name resultat.txt")
continuation = input('When you are done press 1\n')
if continuation:
with open("resultat.txt", 'r') as inp:
l = seqReader(inp, 2)
while (l != "" and l[0] == "#"):
l = seqReader(inp, 1)