Skip to content
This repository has been archived by the owner on Jun 3, 2024. It is now read-only.

Latest commit

 

History

History

hana-cloud

Folders and files

NameName
Last commit message
Last commit date

parent directory

..
 
 

SAP HANA Cloud, Data Lake

For more information see: https://help.sap.com/viewer/a896c6a184f21015b5bcf4c7a967df07/LATEST/en-US/228c19ac890046ecbe8e38a540c0cb6b.html

SQL code to be executed in SAP HANA Cloud, Data Lake IQ session

--DROP REMOTE SERVER MY_SOF_SERVER;
CREATE REMOTE SERVER MY_SOF_SERVER CLASS 'FILES_SERVICE' READ ONLY value 'on';

--DROP SCHEMA GDELT IN FILES_SERVICE;
CREATE SCHEMA GDELT IN FILES_SERVICE;

-- DROP TABLE GDELT.MENTIONS_PARTITIONED IN FILES_SERVICE;
CREATE TABLE GDELT.MENTIONS_PARTITIONED (
    GlobalEventID INTEGER,
    EventTimeDate BIGINT,
    MentionTimeDate BIGINT,
    MentionType INTEGER,
    MentionSourceName VARCHAR(1024),
    MentionIdentifier VARCHAR(1024),
    SentenceID INTEGER,
    Actor1CharOffset INTEGER,
    Actor2CharOffset INTEGER,
    ActionCharOffset INTEGER,
    InRawText INTEGER,
    Confidence INTEGER,
    MentionDocLen INTEGER,
    MentionDocTone DOUBLE,
    MentionDocTranslationInfo VARCHAR(1024),
    Extras VARCHAR(1024),
    YEAR INTEGER,
    MONTH INTEGER,
    DAY INTEGER,
    FILE VARCHAR(255)
) 
AUTO REFRESH 
IN FILES_SERVICE;

-- DROP TABLE MENTIONS_PARTITIONED;
CREATE EXISTING TABLE MENTIONS_PARTITIONED
AT 'sof..GDELT.MENTIONS_PARTITIONED';

-- ALTER TABLE GDELT.MENTIONS_PARTITIONED IN FILES_SERVICE DROP DATASOURCE MENTIONS_PARTITIONED;

/* Parquet Directory */
ALTER TABLE GDELT.MENTIONS_PARTITIONED IN FILES_SERVICE
ADD DATASOURCE AS MENTIONS_PARTITIONED
(YEAR FROM DIRECTORY $0, MONTH FROM DIRECTORY $1, DAY FROM DIRECTORY $2)
PARQUET('hdlfs:///gdeltv2/parquet/mentions/')
DATE('YYYYMMDDHHMISS')
ENCODING 'UTF_8';

/* Change the table to manual refresh, if needed */
ALTER TABLE GDELT.MENTIONS_PARTITIONED IN FILES_SERVICE MANUAL REFRESH;
REFRESH TABLE GDELT.MENTIONS_PARTITIONED IN FILES_SERVICE;

/* Query data */
SELECT MentionSourceName, ROUND(AVG(MentionDocTone),2) AS AvgTone
FROM MENTIONS_PARTITIONED
WHERE YEAR=DATEPART( YEAR, TODAY()) AND MONTH=DATEPART( MONTH, TODAY()) AND DAY=DATEPART( DAY, TODAY())
GROUP BY MentionSourceName
ORDER BY AvgTone DESC;