]> git.openstreetmap.org Git - nominatim.git/blob - utils/imports.php
download and merge wikipedia access logs
[nominatim.git] / utils / imports.php
1 #!/usr/bin/php -Cq
2 <?php
3
4         require_once(dirname(dirname(__FILE__)).'/lib/init-cmd.php');
5         ini_set('memory_limit', '800M');
6
7         $aCMDOptions = array(
8                 "Create and setup nominatim search system",
9                 array('help', 'h', 0, 1, 0, 0, false, 'Show Help'),
10                 array('quiet', 'q', 0, 1, 0, 0, 'bool', 'Quiet output'),
11                 array('verbose', 'v', 0, 1, 0, 0, 'bool', 'Verbose output'),
12
13                 array('parse-tiger', '', 0, 1, 1, 1, 'realpath', 'Convert tiger edge files to nominatim sql import'),
14                 array('parse-tiger-2011', '', 0, 1, 1, 1, 'realpath', 'Convert tiger edge files to nominatim sql import - datafiles from 2011 or later (source: edges directory of tiger data)'),
15         );
16         getCmdOpt($_SERVER['argv'], $aCMDOptions, $aCMDResult, true, true);
17
18         $bDidSomething = false;
19
20         if (isset($aCMDResult['parse-tiger']))
21         {
22                 $bDidSomething = true;
23                 foreach(glob($aCMDResult['parse-tiger'].'/??_*', GLOB_ONLYDIR) as $sStateFolder)
24                 {
25                         preg_match('#([0-9]{2})_(.*)#',basename($sStateFolder), $aMatch);
26                         var_dump($aMatch);
27                         exit;
28                         foreach(glob($sStateFolder.'/?????_*', GLOB_ONLYDIR) as $sCountyFolder)
29                         {
30                                 set_time_limit(30);
31                                 preg_match('#([0-9]{5})_(.*)#',basename($sCountyFolder), $aMatch);
32                                 $sCountyID = $aMatch[1];
33                                 $sCountyName = str_replace('_', ' ', $aMatch[2]);
34                                 $sImportFile = $sCountyFolder.'/tl_2009_'.$sCountyID.'_edges.zip';
35                                 $sCountyName = str_replace("'", "''", $sCountyName);
36                                 $sCountyName = str_replace(" County", "", $sCountyName);
37                                 echo "'$sCountyID' : '$sCountyName' ,\n";
38                         }
39                 }
40                 exit;
41
42                 if (!file_exists(CONST_BasePath.'/data/tiger2009')) mkdir(CONST_BasePath.'/data/tiger2009');
43
44                 $sTempDir = tempnam('/tmp', 'tiger');
45                 unlink($sTempDir);
46                 mkdir($sTempDir);
47
48                 foreach(glob($aCMDResult['parse-tiger'].'/??_*', GLOB_ONLYDIR) as $sStateFolder)
49                 {
50                         foreach(glob($sStateFolder.'/?????_*', GLOB_ONLYDIR) as $sCountyFolder)
51                         {
52                                 set_time_limit(30);
53                                 preg_match('#([0-9]{5})_(.*)#',basename($sCountyFolder), $aMatch);
54                                 $sCountyID = $aMatch[1];
55                                 $sCountyName = str_replace('_', ' ', $aMatch[2]);
56                                 $sImportFile = $sCountyFolder.'/tl_2009_'.$sCountyID.'_edges.zip';
57                                 echo "$sCountyID, $sCountyName\n";
58                                 if (!file_exists($sImportFile))
59                                 {
60                                         echo "Missing: $sImportFile\n";
61                                 }
62                                 $sUnzipCmd = "unzip -d $sTempDir $sImportFile";
63                                 exec($sUnzipCmd);
64                                 if (!file_exists($sTempDir.'/tl_2009_'.$sCountyID.'_edges.shp'))
65                                 {
66                                         echo "Failed unzip ($sCountyID)\n";
67                                 }
68                                 else
69                                 {
70                                         $sParseCmd = CONST_BasePath.'/utils/tigerAddressImport.py '.$sTempDir.'/tl_2009_'.$sCountyID.'_edges.shp';
71                                         exec($sParseCmd);
72                                         if (!file_exists($sTempDir.'/tl_2009_'.$sCountyID.'_edges.osm1.osm'))
73                                         {
74                                                 echo "Failed parse ($sCountyID)\n";
75                                         }
76                                         else
77                                         {
78                                                 copy($sTempDir.'/tl_2009_'.$sCountyID.'_edges.osm1.osm', CONST_BasePath.'/data/tiger2009/'.$sCountyID.'.sql');
79                                         }
80                                 }
81                                 // Cleanup
82                                 foreach(glob($sTempDir.'/*') as $sTmpFile)
83                                 {
84                                         unlink($sTmpFile);
85                                 }
86                         }
87                 }
88         }
89
90
91         if (isset($aCMDResult['parse-tiger-2011']))
92         {
93                 if (!file_exists(CONST_BasePath.'/data/tiger2011')) mkdir(CONST_BasePath.'/data/tiger2011');
94
95                 $sTempDir = tempnam('/tmp', 'tiger');
96                 unlink($sTempDir);
97                 mkdir($sTempDir);
98
99
100                 $bDidSomething = true;
101                 foreach(glob($aCMDResult['parse-tiger-2011'].'/tl_20??_?????_edges.zip', 0) as $sImportFile)
102                 {
103                         set_time_limit(30);
104                         preg_match('#([0-9]{5})_(.*)#',basename($sImportFile), $aMatch);
105                         $sCountyID = $aMatch[1];
106                         echo "Processing ".$sCountyID."...\n";
107                         $sUnzipCmd = "unzip -d $sTempDir $sImportFile";
108                         exec($sUnzipCmd);
109                         $sShapeFile = $sTempDir.'/'.basename($sImportFile, '.zip').'.shp';
110                         if (!file_exists($sShapeFile))
111                         {
112                                 echo "Failed unzip ($sImportFile)\n";
113                         }
114                         else
115                         {
116                                 $sParseCmd = CONST_BasePath.'/utils/tigerAddressImport.py '.$sShapeFile;
117                                 exec($sParseCmd);
118                                 $sOsmFile = $sTempDir.'/'.basename($sImportFile, '.zip').'.osm1.osm';
119                                 if (!file_exists($sOsmFile))
120                                 {
121                                         echo "Failed parse ($sImportFile)\n";
122                                 }
123                                 else
124                                 {
125                                         copy($sOsmFile, CONST_BasePath.'/data/tiger2011/'.$sCountyID.'.sql');
126                                 }
127                         }
128                         // Cleanup
129                         foreach(glob($sTempDir.'/*') as $sTmpFile)
130                         {
131                                 unlink($sTmpFile);
132                         }
133
134                 }
135         }