Skip to content
GitLab
Explore
Sign in
Primary navigation
Search or go to…
Project
M
metadata-and-data
Manage
Activity
Members
Labels
Plan
Issues
Issue boards
Milestones
Wiki
Code
Merge requests
Repository
Branches
Commits
Tags
Repository graph
Compare revisions
Snippets
Build
Pipelines
Jobs
Pipeline schedules
Artifacts
Deploy
Releases
Container registry
Model registry
Operate
Environments
Monitor
Incidents
Analyze
Value stream analytics
Contributor analytics
CI/CD analytics
Repository analytics
Model experiments
Help
Help
Support
GitLab documentation
Compare GitLab plans
Community forum
Contribute to GitLab
Provide feedback
Terms and privacy
Keyboard shortcuts
?
Snippets
Groups
Projects
Show more breadcrumbs
web-et-numerique
web-et-numerique-internet
data.grandlyon.com
web-portal
components
indexers
metadata-and-data
Commits
0ed26f1d
Commit
0ed26f1d
authored
6 years ago
by
Alessandro Cerioni
Browse files
Options
Downloads
Patches
Plain Diff
Reducing verbosity. Making the code robust w/ respect to missing geometry.
parent
4fd86f7c
No related branches found
No related tags found
No related merge requests found
Changes
2
Hide whitespace changes
Inline
Side-by-side
Showing
2 changed files
5-pg-field-type-detector.py
+6
-26
6 additions, 26 deletions
5-pg-field-type-detector.py
utils/postgis_helper.py
+14
-12
14 additions, 12 deletions
utils/postgis_helper.py
with
20 additions
and
38 deletions
5-pg-field-type-detector.py
+
6
−
26
View file @
0ed26f1d
...
@@ -114,7 +114,7 @@ def elect_field_type( data ):
...
@@ -114,7 +114,7 @@ def elect_field_type( data ):
found_types
.
remove
(
'
NoneType
'
)
found_types
.
remove
(
'
NoneType
'
)
if
not
all
(
x
==
found_types
[
0
]
for
x
in
found_types
):
# NOT SAME TYPE: WHICH ONE TO CHOOSE?
if
not
all
(
x
==
found_types
[
0
]
for
x
in
found_types
):
# NOT SAME TYPE: WHICH ONE TO CHOOSE?
logging
.
warn
(
'
WARNING - MIXED TYPES %s %s
'
%
(
k
,
uuid
))
logging
.
warn
(
'
WARNING - MIXED TYPES %s %s
'
%
(
k
,
db_schema_table
))
#print('WARNING - MIXED TYPES', parsed_samples)
#print('WARNING - MIXED TYPES', parsed_samples)
logging
.
warn
(
'
WARNING - MIXED TYPES %s
'
%
found_types
)
logging
.
warn
(
'
WARNING - MIXED TYPES %s
'
%
found_types
)
...
@@ -200,27 +200,14 @@ def generate_field_catalog( pg, catalog=None ):
...
@@ -200,27 +200,14 @@ def generate_field_catalog( pg, catalog=None ):
for
table
in
pg
.
get_tables
(
schema_name
):
for
table
in
pg
.
get_tables
(
schema_name
):
# if str(table) != selected_table:
# if str(table) != selected_table:
# continue
# continue
db_schema_table
=
'
%s.%s
'
%
(
pg
.
dbname
,
table
)
logging
.
info
(
'
Analyzing table %s. %i docs analyzed so far.
'
%
(
db_schema_table
,
output
[
'
analyzed_docs
'
]))
# print(table)
# print(table)
# print( pg.count_entries(table) )
for
doc
in
pg
.
get_entries
(
table
):
for
doc
in
pg
.
get_entries
(
table
):
# print(doc)
properties
=
doc
[
'
properties
'
]
properties
=
doc
[
'
properties
'
]
db_schema_table
=
'
%s.%s
'
%
(
pg
.
dbname
,
table
)
#exit(0)
#dataset_uuid = doc['metadata-fr']['geonet:info']['uuid']
#dataset_title = doc['metadata-fr']['title']
flattened_properties
=
flatten_json
(
properties
)
flattened_properties
=
flatten_json
(
properties
)
#print(flattened_properties)
#exit(0)
#logging.INFO("Analyzing dataset %s %s" % (dataset_uuid, dataset_title))
#logging.debug('there')
# ---------------------------------------------------------------------------------------------
# ---------------------------------------------------------------------------------------------
for
k
,
v
in
flattened_properties
.
items
():
for
k
,
v
in
flattened_properties
.
items
():
...
@@ -244,15 +231,10 @@ def generate_field_catalog( pg, catalog=None ):
...
@@ -244,15 +231,10 @@ def generate_field_catalog( pg, catalog=None ):
output
[
'
analyzed_docs
'
]
+=
1
output
[
'
analyzed_docs
'
]
+=
1
#logging.info( "%s/%s" % (output['analyzed_docs'], total) )
logging
.
info
(
"
%s documents analyzed so far...
"
%
(
output
[
'
analyzed_docs
'
])
)
# useful for debugging:
# useful for debugging:
if
cfg
[
'
field_type_detector
'
][
'
debug
'
]
and
output
[
'
analyzed_docs
'
]
>
10
:
if
cfg
[
'
field_type_detector
'
][
'
debug
'
]
and
output
[
'
analyzed_docs
'
]
>
10
:
return
output
return
output
#print(output)
return
output
return
output
...
@@ -270,19 +252,17 @@ def main(cfg):
...
@@ -270,19 +252,17 @@ def main(cfg):
#print("(Some of the) output files are already present, and rewrite is disabled. Exiting!")
#print("(Some of the) output files are already present, and rewrite is disabled. Exiting!")
raise
Exception
(
"
(Some of the) output files are already present, and rewrite is disabled!
"
)
raise
Exception
(
"
(Some of the) output files are already present, and rewrite is disabled!
"
)
# TODO for every DB
# connection_string = ('postgresql+psycopg2://{username}:{password}@{hostname}/{dbname}').format(
# hostname=cfg['postgis']['host'], dbname=cfg['postgis']['databases'][0],
# username=cfg['postgis']['username'], password=cfg['postgis']['password'])
dbnames
=
cfg
[
'
postgis
'
][
'
databases
'
]
dbnames
=
cfg
[
'
postgis
'
][
'
databases
'
]
field_catalog
=
{}
field_catalog
=
{}
logging
.
info
(
'
Building catalog...
'
)
for
dbname
in
dbnames
:
for
dbname
in
dbnames
:
logging
.
info
(
'
Analyzing database %s...
'
%
dbname
)
pg_connection
=
Remote
(
hostname
=
cfg
[
'
postgis
'
][
'
host
'
],
dbname
=
dbname
,
username
=
cfg
[
'
postgis
'
][
'
username
'
],
password
=
cfg
[
'
postgis
'
][
'
password
'
])
pg_connection
=
Remote
(
hostname
=
cfg
[
'
postgis
'
][
'
host
'
],
dbname
=
dbname
,
username
=
cfg
[
'
postgis
'
][
'
username
'
],
password
=
cfg
[
'
postgis
'
][
'
password
'
])
field_catalog
=
generate_field_catalog
(
pg_connection
,
field_catalog
)
field_catalog
=
generate_field_catalog
(
pg_connection
,
field_catalog
)
logging
.
info
(
"
Catalog: built. %i docs were analyzed.
"
%
field_catalog
[
'
analyzed_docs
'
])
# writing results to disk
# writing results to disk
if
not
os
.
path
.
exists
(
working_directory
):
if
not
os
.
path
.
exists
(
working_directory
):
os
.
mkdir
(
working_directory
)
os
.
mkdir
(
working_directory
)
...
...
This diff is collapsed.
Click to expand it.
utils/postgis_helper.py
+
14
−
12
View file @
0ed26f1d
...
@@ -74,26 +74,28 @@ class Remote(object):
...
@@ -74,26 +74,28 @@ class Remote(object):
def
get_entries
(
self
,
table
):
def
get_entries
(
self
,
table
):
columns
,
geom
=
self
.
get_columns
(
table
)
columns
,
geom
=
self
.
get_columns
(
table
)
#
#print(geom)
#
fields
=
[
table
.
c
[
col
.
name
]
for
col
in
columns
]
fields
=
[
table
.
c
[
col
.
name
]
for
col
in
columns
]
if
not
geom
.
type
.
srid
==
4326
:
the_geom
=
table
.
c
[
geom
.
name
].
ST_Transform
(
4326
).
ST_AsGeoJSON
()
# bug
if
geom
is
not
None
:
else
:
if
not
geom
.
type
.
srid
==
4326
:
the_geom
=
table
.
c
[
geom
.
name
].
ST_AsGeoJSON
()
# bug
the_geom
=
table
.
c
[
geom
.
name
].
ST_Transform
(
4326
).
ST_AsGeoJSON
()
# bug
fields
.
append
(
the_geom
)
else
:
the_geom
=
table
.
c
[
geom
.
name
].
ST_AsGeoJSON
()
# bug
fields
.
append
(
the_geom
)
selected
=
select
(
fields
)
selected
=
select
(
fields
)
for
entry
in
self
.
engine
.
execute
(
selected
):
for
entry
in
self
.
engine
.
execute
(
selected
):
items
=
entry
.
items
()
items
=
entry
.
items
()
geometry
=
json
.
loads
(
items
.
pop
()[
1
])
properties
=
dict
(
items
)
properties
=
dict
(
items
)
document
=
{
document
=
{
'
type
'
:
'
Feature
'
,
'
type
'
:
'
Feature
'
,
'
geometry
'
:
geometry
,
'
properties
'
:
properties
'
properties
'
:
properties
}
}
if
geom
is
not
None
:
document
[
'
geometry
'
]
=
json
.
loads
(
items
.
pop
()[
1
])
# except:
# logging.warn("Invalid geometry.")
yield
document
yield
document
def
get_columns
(
self
,
table
):
def
get_columns
(
self
,
table
):
...
...
This diff is collapsed.
Click to expand it.
Preview
0%
Loading
Try again
or
attach a new file
.
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Save comment
Cancel
Please
register
or
sign in
to comment