load data from data frame:
col1 col2 weight
ничего космос 1.0
first two columns consider to be vertex names, third - weight
data<-read.table("/home/bliss/Study/Bauman/Magistr&Disser/Data/dat.txt",header=T,sep="\t")
gr<-graph.data.frame(as.data.frame(data),directed=T)
get vertex id by value: which(V(gr)$name=="космос")
get edge weight using vertex names: E(gr, P=c(which(V(gr)$name=="голубой")-1,which(V(gr)$name=="платок")-1))$weight
Vertex idx starts from 0, not 1!
Sunday, November 14, 2010
Tuesday, November 9, 2010
R: working with graphs
use igraph
library(igraph)
create empty graph: graph.empty()
create graph with edges (1,2); (2,3); (5,6): graph(c(1,2,2,3,5,6), directed=TRUE)
get nodes number: vcount(graph)
get edges number: ecount(graph)
add/delete edges: add.edges; delete.edges
Shortest paths: http://igraph.sourceforge.net/doc/R/shortest.paths.html
library(igraph)
create empty graph: graph.empty()
create graph with edges (1,2); (2,3); (5,6): graph(c(1,2,2,3,5,6), directed=TRUE)
get nodes number: vcount(graph)
get edges number: ecount(graph)
add/delete edges: add.edges; delete.edges
Shortest paths: http://igraph.sourceforge.net/doc/R/shortest.paths.html
shortest.paths(graph, v=V(graph), mode = c("all", "out", "in"),
weights = NULL, algorithm = c("automatic", "unweighted",
"dijkstra", "bellman-ford",
"johnson"))
get.shortest.paths(graph, from, to=V(graph), mode = c("all", "out",
"in"), weights = NULL)
get.all.shortest.paths(graph, from, to = V(graph), mode = c("all", "out", "in"))
average.path.length(graph, directed=TRUE, unconnected=TRUE)
path.length.hist (graph, directed = TRUE, verbose = igraph.par("verbose"))
Monday, November 8, 2010
Python Regular Expressions
E.g. we'd like to parse such html-code using regexp:
<tr><td><font color="#bbbbbb">5587 </font></td><td>изумление</td><td>S</td><td>13.98</td><td>20.65</td>
## <td>#N/A</td><td>#N/A</td><td>#N/A</td><td>#N/A</td><td>#N/A</td><td>#N/A</td><td>#N/A</td><td></td></tr>
The code will be:
rows=re.finditer('(\<tr.+?tr\>)',html) ##nejadnyi (v otlichie ot .+ ischet stroki ne maxim dliny)
for row in rows:
cells=re.finditer('(\<td.+?td\>)(\<td.+?td\>)(\<td.+?td\>)(\<td.+?td\>)(\<td.+?td\>)(\<td.+?td\>)(\<td.+?td\>)(\<td.+?td\>)(\<td.+?td\>)(\<td.+?td\>)(\<td.+?td\>)(\<td.+?td\>)',row.groups()[0])
Round brackets '(', ')' means group, you may iterate or name them.
\ - read as it is
.+ - find any string (any symbols), finds string with maximum length and takes a lot of sources
.+? - find any string (any symbols), not maximum length , better one to parse constructions like
<tr>...</tr>..<tr>...</tr>
'(\<tr.+tr\>)', finds ({ <tr>...</tr>..<tr>...</tr>}), only one
'(\<tr.+?tr\>)', finds ({ <tr>...</tr>},{<tr>...</tr>})
Primitive function to remove html tags:
def remove_tags(html): pattern=re.compile('<.*?>')
result=pattern.sub("",html)
return result
Find string that doesn't contain symbol (e.g. '{'):
re.finditer('({[^}]+})', str)
<tr><td><font color="#bbbbbb">5587 </font></td><td>изумление</td><td>S</td><td>13.98</td><td>20.65</td>
## <td>#N/A</td><td>#N/A</td><td>#N/A</td><td>#N/A</td><td>#N/A</td><td>#N/A</td><td>#N/A</td><td></td></tr>
The code will be:
rows=re.finditer('(\<tr.+?tr\>)
for row in rows:
cells=re.finditer('(\<td.+?td\>)(\<td.+?td\>)(\<td.+?td\>)(\<td.+?td\>)(\<td.+?td\>)(\<td.+?td\>)(\<td.+?td\>)(\<td.+?td\>)(\<td.+?td\>)(\<td.+?td\>)(\<td.+?td\>)(\<td.+?td\>)
Round brackets '(', ')' means group, you may iterate or name them.
\ - read as it is
.+ - find any string (any symbols), finds string with maximum length and takes a lot of sources
.+? - find any string (any symbols), not maximum length , better one to parse constructions like
<tr>...</tr>..<tr>...</tr>
'(\<tr.+tr\>)
'(\<tr.+?tr\>)
Primitive function to remove html tags:
def remove_tags(html): pattern=re.compile('<.*?>')
result=pattern.sub("",html)
return result
Find string that doesn't contain symbol (e.g. '{'):
re.finditer('({[^}]+})', str)
Wednesday, November 3, 2010
Subscribe to:
Comments (Atom)
