Created
June 5, 2020 21:15
-
-
Save danieledler/75118f4446f387b05b2b5c7fb00041da to your computer and use it in GitHub Desktop.
Convert network with string node ids to integer ids with node names in the Pajek format
This file contains hidden or bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
| import pandas as pd | |
| def convert_to_integer_pajek(input_file, output_file, columns=['from', 'to', 'weight']): | |
| """ | |
| Convert network in link list format with string node ids to Pajek format with integer node ids | |
| Assumes first row is a header with names matching `columns` | |
| """ | |
| net = pd.read_csv(input_file, delimiter=' ').dropna() | |
| nodes = set(net[columns[0]]) | |
| nodes.update(net[columns[1]]) | |
| nodes = dict(zip(nodes, range(1, len(nodes)+1))) | |
| print(f"Convert network with {len(nodes)} nodes and {net.shape[0]} links...") | |
| with open(output_file, 'w') as fp: | |
| fp.write(f"*Vertices {len(nodes)}\n") | |
| for name, node_id in nodes.items(): | |
| fp.write(f"{node_id} \"{name}\"\n") | |
| fp.write(f"*Edges\n") | |
| for index, row in net.iterrows(): | |
| if index % 1000: | |
| print(f"{index/net.shape[0]:.2%}", end='\r') | |
| fp.write(f"{nodes[row[columns[0]]]} {nodes[row[columns[1]]]} {row[columns[2]]}\n") | |
| print(f"Done! Wrote '{output_file}'.") |
Sign up for free
to join this conversation on GitHub.
Already have an account?
Sign in to comment