2020
|
Sejdiu, Gezim Efficient Distributed In-Memory Processing of RDF Datasets PhD Thesis Rheinische Friedrich-Wilhelms-Universität Bonn, 2020. Abstract | Links | BibTeX @phdthesis{gezim-sejdiu-2020-phd-thesis,
title = {Efficient Distributed In-Memory Processing of RDF Datasets},
author = {Gezim Sejdiu},
url = {http://hdl.handle.net/20.500.11811/8735},
year = {2020},
date = {2020-01-01},
school = {Rheinische Friedrich-Wilhelms-Universität Bonn},
abstract = {Over the past decade, vast amounts of machine-readable structured information have become available through the automation of research processes as well as the increasing popularity of knowledge graphs and semantic technologies. Today, we count more than 10,000 datasets made available online following Semantic Web standards. A major and yet unsolved challenge that research faces today is to perform scalable analysis of large-scale knowledge graphs in order to facilitate applications in various domains including life sciences, publishing, and the internet of things. The main objective of this thesis is to lay foundations for efficient algorithms performing analytics, i.e. exploration, quality assessment, and querying over semantic knowledge graphs at a scale that has not been possible before. First, we propose a novel approach for statistical calculations of large RDF datasets, which scales out to clusters of machines. In particular, we describe the first distributed in-memory approach for computing 32 different statistical criteria for RDF datasets using Apache Spark. Many applications such as data integration, search, and interlinking, may take full advantage of the data when having a priori statistical information about its internal structure and coverage. However, such applications may suffer from low quality and not being able to leverage the full advantage of the data when the size of data goes beyond the capacity of the resources available. Thus, we introduce a distributed approach of quality assessment of large RDF datasets. It is the first distributed, in-memory approach for computing different quality metrics for large RDF datasets using Apache Spark. We also provide a quality assessment pattern that can be used to generate new scalable metrics that can be applied to big data. Based on the knowledge of the internal statistics of a dataset and its quality, users typically want to query and retrieve large amounts of information. As a result, it has become difficult to efficiently process these large RDF datasets. Indeed, these processes require, both efficient storage strategies and query-processing engines, to be able to scale in terms of data size. Therefore, we propose a scalable approach to evaluate SPARQL queries over distributed RDF datasets by translating SPARQL queries into Spark executable code. We conducted several empirical evaluations to assess the scalability, effectiveness, and efficiency of our proposed approaches. More importantly, various use cases i.e. Ethereum analysis, Mining Big Data Logs, and Scalable Integration of POIs, have been developed and leverages by our approach. The empirical evaluations and concrete applications provide evidence that our methodology and techniques proposed during this thesis help to effectively analyze and process large-scale RDF datasets. All the proposed approaches during this thesis are integrated into the larger SANSA framework.},
keywords = {},
pubstate = {published},
tppubtype = {phdthesis}
}
Over the past decade, vast amounts of machine-readable structured information have become available through the automation of research processes as well as the increasing popularity of knowledge graphs and semantic technologies. Today, we count more than 10,000 datasets made available online following Semantic Web standards. A major and yet unsolved challenge that research faces today is to perform scalable analysis of large-scale knowledge graphs in order to facilitate applications in various domains including life sciences, publishing, and the internet of things. The main objective of this thesis is to lay foundations for efficient algorithms performing analytics, i.e. exploration, quality assessment, and querying over semantic knowledge graphs at a scale that has not been possible before. First, we propose a novel approach for statistical calculations of large RDF datasets, which scales out to clusters of machines. In particular, we describe the first distributed in-memory approach for computing 32 different statistical criteria for RDF datasets using Apache Spark. Many applications such as data integration, search, and interlinking, may take full advantage of the data when having a priori statistical information about its internal structure and coverage. However, such applications may suffer from low quality and not being able to leverage the full advantage of the data when the size of data goes beyond the capacity of the resources available. Thus, we introduce a distributed approach of quality assessment of large RDF datasets. It is the first distributed, in-memory approach for computing different quality metrics for large RDF datasets using Apache Spark. We also provide a quality assessment pattern that can be used to generate new scalable metrics that can be applied to big data. Based on the knowledge of the internal statistics of a dataset and its quality, users typically want to query and retrieve large amounts of information. As a result, it has become difficult to efficiently process these large RDF datasets. Indeed, these processes require, both efficient storage strategies and query-processing engines, to be able to scale in terms of data size. Therefore, we propose a scalable approach to evaluate SPARQL queries over distributed RDF datasets by translating SPARQL queries into Spark executable code. We conducted several empirical evaluations to assess the scalability, effectiveness, and efficiency of our proposed approaches. More importantly, various use cases i.e. Ethereum analysis, Mining Big Data Logs, and Scalable Integration of POIs, have been developed and leverages by our approach. The empirical evaluations and concrete applications provide evidence that our methodology and techniques proposed during this thesis help to effectively analyze and process large-scale RDF datasets. All the proposed approaches during this thesis are integrated into the larger SANSA framework. |
Jabeen, Hajira; Haziiev, Eskender; Sejdiu, Gezim; Lehmann, Jens DISE: A Distributed in-Memory SPARQL Processing Engine over Tensor Data Inproceedings IEEE 14th International Conference on Semantic Computing, ICSC
2020, San Diego, CA, USA, February 3-5, 2020, pp. 400–407, IEEE, 2020. Links | BibTeX @inproceedings{DBLP:conf/semco/JabeenHS020,
title = {DISE: A Distributed in-Memory SPARQL Processing Engine over Tensor Data},
author = {Hajira Jabeen and
Eskender Haziiev and
Gezim Sejdiu and
Jens Lehmann},
url = {https://doi.org/10.1109/ICSC.2020.00079},
doi = {10.1109/ICSC.2020.00079},
year = {2020},
date = {2020-01-01},
booktitle = {IEEE 14th International Conference on Semantic Computing, ICSC
2020, San Diego, CA, USA, February 3-5, 2020},
pages = {400--407},
publisher = {IEEE},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Jabeen, Hajira; Graux, Damien; Sejdiu, Gezim Scalable Knowledge Graph Processing Using SANSA Incollection Knowledge Graphs and Big Data Processing, 12072 , pp. 105–121, Springer, 2020. Links | BibTeX @incollection{DBLP:series/lncs/JabeenGS20,
title = {Scalable Knowledge Graph Processing Using SANSA},
author = {Hajira Jabeen and
Damien Graux and
Gezim Sejdiu},
url = {https://doi.org/10.1007/978-3-030-53199-7_7},
doi = {10.1007/978-3-030-53199-7_7},
year = {2020},
date = {2020-01-01},
booktitle = {Knowledge Graphs and Big Data Processing},
volume = {12072},
pages = {105--121},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}
|
Graux, Damien; Sejdiu, Gezim; Stadler, Claus; Napolitano, Giulio; Lehmann, Jens MINDS: A Translator to Embed Mathematical Expressions Inside SPARQL
Queries Inproceedings Semantic Systems. In the Era of Knowledge Graphs - 16th International
Conference on Semantic Systems, SEMANTiCS 2020, Amsterdam, The Netherlands,
September 7-10, 2020, Proceedings, pp. 104–117, Springer, 2020. Links | BibTeX @inproceedings{DBLP:conf/i-semantics/GrauxSSN020,
title = {MINDS: A Translator to Embed Mathematical Expressions Inside SPARQL
Queries},
author = {Damien Graux and
Gezim Sejdiu and
Claus Stadler and
Giulio Napolitano and
Jens Lehmann},
url = {https://doi.org/10.1007/978-3-030-59833-4_7},
doi = {10.1007/978-3-030-59833-4_7},
year = {2020},
date = {2020-01-01},
booktitle = {Semantic Systems. In the Era of Knowledge Graphs - 16th International
Conference on Semantic Systems, SEMANTiCS 2020, Amsterdam, The Netherlands,
September 7-10, 2020, Proceedings},
volume = {12378},
pages = {104--117},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
2019
|
Dadwal, Rajjat; Graux, Damien; Sejdiu, Gezim; Jabeen, Hajira; Lehmann, Jens Clustering Pipelines of large RDF POI Data Inproceedings Proceedings of 16th Extended Semantic Web Conference (ESWC 2019), Poster & Demos, 2019. Links | BibTeX @inproceedings{piping-clustering-eswc19-poster,
title = {Clustering Pipelines of large RDF POI Data},
author = {Rajjat Dadwal and Damien Graux and Gezim Sejdiu and Hajira Jabeen and Jens Lehmann},
url = {https://dgraux.github.io/publications/PipingClustering_ESWC_2019_Poster.pdf},
year = {2019},
date = {2019-01-01},
booktitle = {Proceedings of 16th Extended Semantic Web Conference (ESWC 2019), Poster & Demos},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Sejdiu, Gezim; Rula, Anisa; Lehmann, Jens; Jabeen, Hajira A Scalable Framework for Quality Assessment of RDF Datasets Inproceedings Proceedings of 18th International Semantic Web Conference, 2019. Links | BibTeX @inproceedings{sejdiu-2019-sansa-dist-quality-assessment-iswc,
title = {A Scalable Framework for Quality Assessment of RDF Datasets},
author = {Gezim Sejdiu and Anisa Rula and Jens Lehmann and Hajira Jabeen},
url = {http://jens-lehmann.org/files/2019/iswc_dist_quality_assessment.pdf},
year = {2019},
date = {2019-01-01},
booktitle = {Proceedings of 18th International Semantic Web Conference},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Sejdiu, Gezim; Graux, Damien; Khan, Imran; Lytra, Ioanna; Jabeen, Hajira; Lehmann, Jens Towards a Scalable Semantic-Based Distributed Approach for SPARQL
Query Evaluation Inproceedings Semantic Systems. The Power of AI and Knowledge Graphs - 15th International
Conference, SEMANTiCS 2019, Karlsruhe, Germany, September 9-12, 2019,
Proceedings, pp. 295–309, Springer, 2019. Links | BibTeX @inproceedings{DBLP:conf/i-semantics/SejdiuGKLJ019,
title = {Towards a Scalable Semantic-Based Distributed Approach for SPARQL
Query Evaluation},
author = {Gezim Sejdiu and
Damien Graux and
Imran Khan and
Ioanna Lytra and
Hajira Jabeen and
Jens Lehmann},
url = {https://doi.org/10.1007/978-3-030-33220-4_22},
doi = {10.1007/978-3-030-33220-4_22},
year = {2019},
date = {2019-01-01},
booktitle = {Semantic Systems. The Power of AI and Knowledge Graphs - 15th International
Conference, SEMANTiCS 2019, Karlsruhe, Germany, September 9-12, 2019,
Proceedings},
volume = {11702},
pages = {295--309},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Arndt, Natanael; Zänker, Sebastian; Sejdiu, Gezim; Tramp, Sebastian Jekyll RDF: Template-Based Linked Data Publication with Minimized
Effort and Maximum Scalability Inproceedings Web Engineering - 19th International Conference, ICWE 2019, Daejeon,
South Korea, June 11-14, 2019, Proceedings, pp. 331–346, Springer, 2019. Links | BibTeX @inproceedings{DBLP:conf/icwe/ArndtZST19,
title = {Jekyll RDF: Template-Based Linked Data Publication with Minimized
Effort and Maximum Scalability},
author = {Natanael Arndt and
Sebastian Zänker and
Gezim Sejdiu and
Sebastian Tramp},
url = {https://svn.aksw.org/papers/2019/ICWE_JekyllRDF/public.pdf},
doi = {10.1007/978-3-030-19274-7_24},
year = {2019},
date = {2019-01-01},
booktitle = {Web Engineering - 19th International Conference, ICWE 2019, Daejeon,
South Korea, June 11-14, 2019, Proceedings},
volume = {11496},
pages = {331--346},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Stadler, Claus; Sejdiu, Gezim; Graux, Damien; Lehmann, Jens Querying Large-scale RDF Datasets Using the SANSA Framework Inproceedings Proceedings of the ISWC 2019 Satellite Tracks (Posters & Demonstrations,
Industry, and Outrageous Ideas) co-located with 18th International
Semantic Web Conference (ISWC 2019), Auckland, New Zealand, October
26-30, 2019, pp. 285–288, CEUR-WS.org, 2019. Links | BibTeX @inproceedings{DBLP:conf/semweb/StadlerSG019,
title = {Querying Large-scale RDF Datasets Using the SANSA Framework},
author = {Claus Stadler and
Gezim Sejdiu and
Damien Graux and
Jens Lehmann},
url = {http://ceur-ws.org/Vol-2456/paper74.pdf},
year = {2019},
date = {2019-01-01},
booktitle = {Proceedings of the ISWC 2019 Satellite Tracks (Posters & Demonstrations,
Industry, and Outrageous Ideas) co-located with 18th International
Semantic Web Conference (ISWC 2019), Auckland, New Zealand, October
26-30, 2019},
volume = {2456},
pages = {285--288},
publisher = {CEUR-WS.org},
series = {CEUR Workshop Proceedings},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Stadler, Claus; Sejdiu, Gezim; Graux, Damien; Lehmann, Jens Sparklify: A Scalable Software Component for Efficient Evaluation
of SPARQL Queries over Distributed RDF Datasets Inproceedings The Semantic Web - ISWC 2019 - 18th International Semantic Web Conference,
Auckland, New Zealand, October 26-30, 2019, Proceedings, Part II, pp. 293–308, Springer, 2019. Links | BibTeX @inproceedings{DBLP:conf/semweb/StadlerSG019a,
title = {Sparklify: A Scalable Software Component for Efficient Evaluation
of SPARQL Queries over Distributed RDF Datasets},
author = {Claus Stadler and
Gezim Sejdiu and
Damien Graux and
Jens Lehmann},
url = {http://jens-lehmann.org/files/2019/iswc_sparklify.pdf},
doi = {10.1007/978-3-030-30796-7_19},
year = {2019},
date = {2019-01-01},
booktitle = {The Semantic Web - ISWC 2019 - 18th International Semantic Web Conference,
Auckland, New Zealand, October 26-30, 2019, Proceedings, Part II},
volume = {11779},
pages = {293--308},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Dadwal, Rajjat; Graux, Damien; Sejdiu, Gezim; Jabeen, Hajira; Lehmann, Jens Clustering Pipelines of Large RDF POI Data Inproceedings The Semantic Web: ESWC 2019 Satellite Events - ESWC 2019 Satellite
Events, Portorov z, Slovenia, June 2-6, 2019, Revised Selected
Papers, pp. 24–27, Springer, 2019. Links | BibTeX @inproceedings{DBLP:conf/esws/DadwalGSJ019,
title = {Clustering Pipelines of Large RDF POI Data},
author = {Rajjat Dadwal and
Damien Graux and
Gezim Sejdiu and
Hajira Jabeen and
Jens Lehmann},
url = {https://doi.org/10.1007/978-3-030-32327-1_5},
doi = {10.1007/978-3-030-32327-1_5},
year = {2019},
date = {2019-01-01},
booktitle = {The Semantic Web: ESWC 2019 Satellite Events - ESWC 2019 Satellite
Events, Portorov z, Slovenia, June 2-6, 2019, Revised Selected
Papers},
volume = {11762},
pages = {24--27},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Sui, Danning; Sejdiu, Gezim; Graux, Damien; Lehmann, Jens The Hubs and Authorities Transaction Network Analysis using the SANSA
framework Inproceedings Proceedings of the Posters and Demo Track of the 15th International
Conference on Semantic Systems co-located with 15th International
Conference on Semantic Systems (SEMANTiCS 2019), Karlsruhe, Germany,
September 9th - to - 12th, 2019, CEUR-WS.org, 2019. Links | BibTeX @inproceedings{DBLP:conf/i-semantics/SuiSG019,
title = {The Hubs and Authorities Transaction Network Analysis using the SANSA
framework},
author = {Danning Sui and
Gezim Sejdiu and
Damien Graux and
Jens Lehmann},
url = {http://ceur-ws.org/Vol-2451/paper-25.pdf},
year = {2019},
date = {2019-01-01},
booktitle = {Proceedings of the Posters and Demo Track of the 15th International
Conference on Semantic Systems co-located with 15th International
Conference on Semantic Systems (SEMANTiCS 2019), Karlsruhe, Germany,
September 9th - to - 12th, 2019},
volume = {2451},
publisher = {CEUR-WS.org},
series = {CEUR Workshop Proceedings},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
2018
|
Jabeen, Hajira; Dadwal, Rajjat; Sejdiu, Gezim; Lehmann, Jens Divided We Stand Out! Forging Cohorts fOr Numeric Outlier Detection
in Large Scale Knowledge Graphs (CONOD) Inproceedings Knowledge Engineering and Knowledge Management - 21st International
Conference, EKAW 2018, Nancy, France, November 12-16, 2018, Proceedings, pp. 534–548, Springer, 2018. Links | BibTeX @inproceedings{DBLP:conf/ekaw/JabeenDSL18,
title = {Divided We Stand Out! Forging Cohorts fOr Numeric Outlier Detection
in Large Scale Knowledge Graphs (CONOD)},
author = {Hajira Jabeen and
Rajjat Dadwal and
Gezim Sejdiu and
Jens Lehmann},
url = {http://jens-lehmann.org/files/2018/ekaw_conod.pdf},
doi = {10.1007/978-3-030-03667-6_34},
year = {2018},
date = {2018-01-01},
booktitle = {Knowledge Engineering and Knowledge Management - 21st International
Conference, EKAW 2018, Nancy, France, November 12-16, 2018, Proceedings},
volume = {11313},
pages = {534--548},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Graux, Damien; Sejdiu, Gezim; Jabeen, Hajira; Lehmann, Jens; Sui, Danning; Muhs, Dominik; Pfeffer, Johannes Profiting from Kitties on Ethereum: Leveraging Blockchain RDF with
SANSA Inproceedings Proceedings of the Posters and Demos Track of the 14th International
Conference on Semantic Systems co-located with the 14th International
Conference on Semantic Systems (SEMANTiCS 2018), Vienna, Austria,
September 10-13, 2018, CEUR-WS.org, 2018. Links | BibTeX @inproceedings{DBLP:conf/i-semantics/GrauxSJLSMP18,
title = {Profiting from Kitties on Ethereum: Leveraging Blockchain RDF with
SANSA},
author = {Damien Graux and
Gezim Sejdiu and
Hajira Jabeen and
Jens Lehmann and
Danning Sui and
Dominik Muhs and
Johannes Pfeffer},
url = {http://jens-lehmann.org/files/2018/semantics_ethereum_pd.pdf},
year = {2018},
date = {2018-01-01},
booktitle = {Proceedings of the Posters and Demos Track of the 14th International
Conference on Semantic Systems co-located with the 14th International
Conference on Semantic Systems (SEMANTiCS 2018), Vienna, Austria,
September 10-13, 2018},
volume = {2198},
publisher = {CEUR-WS.org},
series = {CEUR Workshop Proceedings},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Sejdiu, Gezim; Ermilov, Ivan; Lehmann, Jens; Mami, Mohamed Nadjib DistLODStats: Distributed Computation of RDF Dataset Statistics Inproceedings The Semantic Web - ISWC 2018 - 17th International Semantic Web Conference,
Monterey, CA, USA, October 8-12, 2018, Proceedings, Part II, pp. 206–222, Springer, 2018. Links | BibTeX @inproceedings{DBLP:conf/semweb/SejdiuELM18,
title = {DistLODStats: Distributed Computation of RDF Dataset Statistics},
author = {Gezim Sejdiu and
Ivan Ermilov and
Jens Lehmann and
Mohamed Nadjib Mami},
url = {http://jens-lehmann.org/files/2018/iswc_distlodstats.pdf},
doi = {10.1007/978-3-030-00668-6_13},
year = {2018},
date = {2018-01-01},
booktitle = {The Semantic Web - ISWC 2018 - 17th International Semantic Web Conference,
Monterey, CA, USA, October 8-12, 2018, Proceedings, Part II},
volume = {11137},
pages = {206--222},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Sejdiu, Gezim; Ermilov, Ivan; Mami, Mohamed Nadjib; Lehmann, Jens STATisfy Me: What Are My Stats? Inproceedings Proceedings of the ISWC 2018 Posters & Demonstrations, Industry
and Blue Sky Ideas Tracks co-located with 17th International Semantic
Web Conference (ISWC 2018), Monterey, USA, October 8th - to - 12th,
2018, CEUR-WS.org, 2018. Links | BibTeX @inproceedings{DBLP:conf/semweb/SejdiuEML18,
title = {STATisfy Me: What Are My Stats?},
author = {Gezim Sejdiu and
Ivan Ermilov and
Mohamed Nadjib Mami and
Jens Lehmann},
url = {http://ceur-ws.org/Vol-2180/paper-58.pdf},
year = {2018},
date = {2018-01-01},
booktitle = {Proceedings of the ISWC 2018 Posters & Demonstrations, Industry
and Blue Sky Ideas Tracks co-located with 17th International Semantic
Web Conference (ISWC 2018), Monterey, USA, October 8th - to - 12th,
2018},
volume = {2180},
publisher = {CEUR-WS.org},
series = {CEUR Workshop Proceedings},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
2017
|
Auer, Sören; Scerri, Simon; Versteden, Aad; Pauwels, Erika; Charalambidis, Angelos; Konstantopoulos, Stasinos; Lehmann, Jens; Jabeen, Hajira; Ermilov, Ivan; Sejdiu, Gezim; Ikonomopoulos, Andreas; Andronopoulos, Spyros; Vlachogiannis, Mandy; Pappas, Charalambos; Davettas, Athanasios; Klampanos, Iraklis A; Grigoropoulos, Efstathios; Karkaletsis, Vangelis; de Boer, Victor; Siebes, Ronald; Mami, Mohamed Nadjib; Albani, Sergio; Lazzarini, Michele; Nunes, Paulo; Angiuli, Emanuele; Pittaras, Nikiforos; Giannakopoulos, George; Argyriou, Giorgos; Stamoulis, George; Papadakis, George; Koubarakis, Manolis; Karampiperis, Pythagoras; Ngomo, Axel-Cyrille Ngonga; Vidal, Maria-Esther The BigDataEurope Platform - Supporting the Variety Dimension of Big Data Inproceedings Web Engineering - 17th International Conference, ICWE 2017, Rome,
Italy, June 5-8, 2017, Proceedings, pp. 41–59, Springer, 2017. Abstract | Links | BibTeX @inproceedings{DBLP:conf/icwe/AuerSVPCKLJESIA17,
title = {The BigDataEurope Platform - Supporting the Variety Dimension of Big Data},
author = {Sören Auer and
Simon Scerri and
Aad Versteden and
Erika Pauwels and
Angelos Charalambidis and
Stasinos Konstantopoulos and
Jens Lehmann and
Hajira Jabeen and
Ivan Ermilov and
Gezim Sejdiu and
Andreas Ikonomopoulos and
Spyros Andronopoulos and
Mandy Vlachogiannis and
Charalambos Pappas and
Athanasios Davettas and
Iraklis A. Klampanos and
Efstathios Grigoropoulos and
Vangelis Karkaletsis and
Victor de Boer and
Ronald Siebes and
Mohamed Nadjib Mami and
Sergio Albani and
Michele Lazzarini and
Paulo Nunes and
Emanuele Angiuli and
Nikiforos Pittaras and
George Giannakopoulos and
Giorgos Argyriou and
George Stamoulis and
George Papadakis and
Manolis Koubarakis and
Pythagoras Karampiperis and
Axel-Cyrille Ngonga Ngomo and
Maria-Esther Vidal},
url = {http://jens-lehmann.org/files/2017/icwe_bde.pdf},
doi = {10.1007/978-3-319-60131-1_3},
year = {2017},
date = {2017-01-01},
booktitle = {Web Engineering - 17th International Conference, ICWE 2017, Rome,
Italy, June 5-8, 2017, Proceedings},
volume = {10360},
pages = {41--59},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
abstract = {The management and analysis of large-scale datasets -- described with the term Big Data -- involves the three classic dimensions volume, velocity and variety. While the former two are well supported by a plethora of software components, the variety dimension is still rather neglected. We present the BDE platform -- an easy-to-deploy, easy-to-use and adaptable (cluster-based and standalone) platform for the execution of big data components and tools like Hadoop, Spark, Flink. The BDE platform was designed based upon the requirements gathered from the seven societal challenges put forward by the European Commission in the Horizon 2020 programme and targeted by the BigDataEurope pilots. As a result, the BDE platform allows to perform a variety of Big Data flow tasks like message passing (Kafka, Flume), storage (Hive, Cassandra) or publishing (GeoTriples). In order to facilitate the processing of heterogeneous data, a particular innovation of the platform is the semantic layer, which allows to directly process RDF data and to map and transform arbitrary data into RDF.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
The management and analysis of large-scale datasets -- described with the term Big Data -- involves the three classic dimensions volume, velocity and variety. While the former two are well supported by a plethora of software components, the variety dimension is still rather neglected. We present the BDE platform -- an easy-to-deploy, easy-to-use and adaptable (cluster-based and standalone) platform for the execution of big data components and tools like Hadoop, Spark, Flink. The BDE platform was designed based upon the requirements gathered from the seven societal challenges put forward by the European Commission in the Horizon 2020 programme and targeted by the BigDataEurope pilots. As a result, the BDE platform allows to perform a variety of Big Data flow tasks like message passing (Kafka, Flume), storage (Hive, Cassandra) or publishing (GeoTriples). In order to facilitate the processing of heterogeneous data, a particular innovation of the platform is the semantic layer, which allows to directly process RDF data and to map and transform arbitrary data into RDF. |
Ermilov, Ivan; Ngomo, Axel-Cyrille Ngonga; Versteden, Aad; Jabeen, Hajira; Sejdiu, Gezim; Argyriou, Giorgos; Selmi, Luigi; Jakobitsch, Jürgen; Lehmann, Jens Managing Lifecycle of Big Data Applications Inproceedings Knowledge Engineering and Semantic Web - 8th International Conference,
KESW 2017, Szczecin, Poland, November 8-10, 2017, Proceedings, pp. 263–276, Springer, 2017. Links | BibTeX @inproceedings{DBLP:conf/kesw/ErmilovNVJSASJL17,
title = {Managing Lifecycle of Big Data Applications},
author = {Ivan Ermilov and
Axel-Cyrille Ngonga Ngomo and
Aad Versteden and
Hajira Jabeen and
Gezim Sejdiu and
Giorgos Argyriou and
Luigi Selmi and
Jürgen Jakobitsch and
Jens Lehmann},
url = {https://svn.aksw.org/papers/2017/KESW_BDE_Workflow/public.pdf},
doi = {10.1007/978-3-319-69548-8_18},
year = {2017},
date = {2017-01-01},
booktitle = {Knowledge Engineering and Semantic Web - 8th International Conference,
KESW 2017, Szczecin, Poland, November 8-10, 2017, Proceedings},
volume = {786},
pages = {263--276},
publisher = {Springer},
series = {Communications in Computer and Information Science},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Ermilov, Ivan; Lehmann, Jens; Sejdiu, Gezim; Bühmann, Lorenz; Westphal, Patrick; Stadler, Claus; Bin, Simon; Chakraborty, Nilesh; Petzka, Henning; Saleem, Muhammad; Ngomo, Axel-Cyrille Ngonga; Jabeen, Hajira The Tale of Sansa Spark Inproceedings Proceedings of the ISWC 2017 Posters & Demonstrations and Industry
Tracks co-located with 16th International Semantic Web Conference
(ISWC 2017), Vienna, Austria, October 23rd - to - 25th, 2017, CEUR-WS.org, 2017. Links | BibTeX @inproceedings{DBLP:conf/semweb/ErmilovLSBWSBCP17,
title = {The Tale of Sansa Spark},
author = {Ivan Ermilov and
Jens Lehmann and
Gezim Sejdiu and
Lorenz Bühmann and
Patrick Westphal and
Claus Stadler and
Simon Bin and
Nilesh Chakraborty and
Henning Petzka and
Muhammad Saleem and
Axel-Cyrille Ngonga Ngomo and
Hajira Jabeen},
url = {http://ceur-ws.org/Vol-1963/paper552.pdf},
year = {2017},
date = {2017-01-01},
booktitle = {Proceedings of the ISWC 2017 Posters & Demonstrations and Industry
Tracks co-located with 16th International Semantic Web Conference
(ISWC 2017), Vienna, Austria, October 23rd - to - 25th, 2017},
volume = {1963},
publisher = {CEUR-WS.org},
series = {CEUR Workshop Proceedings},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
|
Lehmann, Jens; Sejdiu, Gezim; Bühmann, Lorenz; Westphal, Patrick; Stadler, Claus; Ermilov, Ivan; Bin, Simon; Chakraborty, Nilesh; Saleem, Muhammad; Ngomo, Axel-Cyrille Ngonga; Jabeen, Hajira Distributed Semantic Analytics Using the SANSA Stack Inproceedings The Semantic Web - ISWC 2017 - 16th International Semantic Web Conference,
Vienna, Austria, October 21-25, 2017, Proceedings, Part II, pp. 147–155, Springer, 2017. Abstract | Links | BibTeX @inproceedings{DBLP:conf/semweb/LehmannSBWSEBCS17,
title = {Distributed Semantic Analytics Using the SANSA Stack},
author = {Jens Lehmann and
Gezim Sejdiu and
Lorenz Bühmann and
Patrick Westphal and
Claus Stadler and
Ivan Ermilov and
Simon Bin and
Nilesh Chakraborty and
Muhammad Saleem and
Axel-Cyrille Ngonga Ngomo and
Hajira Jabeen},
url = {http://svn.aksw.org/papers/2017/ISWC_SANSA_SoftwareFramework/public.pdf},
doi = {10.1007/978-3-319-68204-4_15},
year = {2017},
date = {2017-01-01},
booktitle = {The Semantic Web - ISWC 2017 - 16th International Semantic Web Conference,
Vienna, Austria, October 21-25, 2017, Proceedings, Part II},
volume = {10588},
pages = {147--155},
publisher = {Springer},
series = {Lecture Notes in Computer Science},
abstract = {Over the past decade, vast amounts of machine-readable structured information have become available through the automation of research processes as well as the increasing popularity of knowledge graphs and semantic technologies. A major research challenge today is to perform scalable analysis of large-scale knowledge graphs to facilitate applications like link prediction, knowledge base completion and question answering. Most analytics approaches, which scale horizontally (i.e., can be executed in a distributed environment) work on simple feature-vector-based input rather than more expressive knowledge structures. On the other hand, analytics methods which exploit expressive structures usually do not scale well to very large knowledge bases. This software framework paper describes the ongoing project Semantic Analytics Stack (SANSA) which supports expressive and scalable semantic analytics by providing functionality for distributed in-memory computing for RDF data. The library provides APIs for RDF storage, querying using SPARQL and forward chaining inference. It includes several machine learning algorithms for RDF knowledge graphs. The article describes the vision, architecture and use cases of SANSA.},
keywords = {},
pubstate = {published},
tppubtype = {inproceedings}
}
Over the past decade, vast amounts of machine-readable structured information have become available through the automation of research processes as well as the increasing popularity of knowledge graphs and semantic technologies. A major research challenge today is to perform scalable analysis of large-scale knowledge graphs to facilitate applications like link prediction, knowledge base completion and question answering. Most analytics approaches, which scale horizontally (i.e., can be executed in a distributed environment) work on simple feature-vector-based input rather than more expressive knowledge structures. On the other hand, analytics methods which exploit expressive structures usually do not scale well to very large knowledge bases. This software framework paper describes the ongoing project Semantic Analytics Stack (SANSA) which supports expressive and scalable semantic analytics by providing functionality for distributed in-memory computing for RDF data. The library provides APIs for RDF storage, querying using SPARQL and forward chaining inference. It includes several machine learning algorithms for RDF knowledge graphs. The article describes the vision, architecture and use cases of SANSA. |
2016
|
Thakkar, Harsh; Dubey, Mohnish; Sejdiu, Gezim; Ngomo, Axel-Cyrille Ngonga; Debattista, Jeremy; Lange, Christoph; Lehmann, Jens; Auer, Sören; Vidal, Maria-Esther LITMUS: An Open Extensible Framework for Benchmarking RDF Data
Management Solutions Journal Article CoRR, abs/1608.02800 , 2016. Links | BibTeX @article{DBLP:journals/corr/ThakkarDSNDLLAV16,
title = {LITMUS: An Open Extensible Framework for Benchmarking RDF Data
Management Solutions},
author = {Harsh Thakkar and
Mohnish Dubey and
Gezim Sejdiu and
Axel-Cyrille Ngonga Ngomo and
Jeremy Debattista and
Christoph Lange and
Jens Lehmann and
Sören Auer and
Maria-Esther Vidal},
url = {http://arxiv.org/abs/1608.02800},
year = {2016},
date = {2016-01-01},
journal = {CoRR},
volume = {abs/1608.02800},
keywords = {},
pubstate = {published},
tppubtype = {article}
}
|
2014
|
Sejdiu, Gezim Semantic Ranking of Web Pages : The Wikipedia Case Study Masters Thesis Faculty of Electrical and Computer Engineering, 2014. Links | BibTeX @mastersthesis{sejdiu2014,
title = {Semantic Ranking of Web Pages : The Wikipedia Case Study},
author = {Gezim Sejdiu},
url = {https://www.researchgate.net/profile/Gezim_Sejdiu/publication/264400068_Rangimi_semantik_i_ueb_faqeve_-_Wikipedia_si_nje_rast_studimi_Semantic_Ranking_of_Web_Pages_-_The_Wikipedia_Case_Study/links/569904a808aeeea98594506c/Rangimi-semantik-i-ueb-faqeve-Wikipedia-si-nje-rast-studimi-Semantic-Ranking-of-Web-Pages-The-Wikipedia-Case-Study.pdf?origin=publication_detail&ev=pub_int_prw_xdl&msrp=AA37FwBzmKERYXi1M2vhWudDort1uLpVM1OSeZjP0qQ0IpEmuvefoRBnX2gTOpctGw5NQ-WolOCmQ4CYW6PwSE9UP27VAGvrmWbzGO7X5ssHhngO5v4.lVzcwbIYCwbOaWUUPbOVaMXxWfjqqco8y7lPka6Sx7akCcIJgNaBUsRP9ybuqT0wg-ngpyu_fSPRrs63hkYjLJvJZvNDWR3fzZopSg.2puAeXufSna9VfnNYPTr3-L_fgans7XuC2YL1uo73vNE68nlRwKz0sc_RvUZusuNMkwxtSkJClAIrpmtZNrOeB7UtJ9-xaG5j8pqRQ.jB1XguS-PfblCV77SV_zZJK2kMl5WXGMPP-NgQs8X5x0efgfCk_urpyJJb-cnp7LHUlXEUiq_t5wSdDgb3j9lXd99NTG_tyV6LESEQ},
year = {2014},
date = {2014-01-01},
address = {University of Prishtina, Kosova},
school = {Faculty of Electrical and Computer Engineering},
keywords = {},
pubstate = {published},
tppubtype = {mastersthesis}
}
|
Ahmedi, Lule; Halilaj, Lavdim; "e, G; Bajraktari, Labinot Ranking Authors on the Web: A Semantic AuthorRank Incollection Social Networks: Analysis and Case Studies, pp. 19–40, Springer, 2014. Links | BibTeX @incollection{DBLP:series/lnsn/AhmediHSB14,
title = {Ranking Authors on the Web: A Semantic AuthorRank},
author = {Lule Ahmedi and
Lavdim Halilaj and
G{"e}zim Sejdiu and
Labinot Bajraktari},
url = {http://luleahmedi.uni-pr.edu/docs/pubs/SemAuthorRank2014.pdf},
doi = {10.1007/978-3-7091-1797-2_2},
year = {2014},
date = {2014-01-01},
booktitle = {Social Networks: Analysis and Case Studies},
pages = {19--40},
publisher = {Springer},
series = {Lecture Notes in Social Networks},
keywords = {},
pubstate = {published},
tppubtype = {incollection}
}
|